Commit 7e99f07935f9f6faf761804e0460c57be3a0ffa5

Authored by Jean-François Rey
1 parent f37e72eafc
Exists in master

up

Showing 3 changed files with 12 additions and 8 deletions Inline Diff

main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 echo "### ExploitConfidencePass.sh ###" 16 echo "### ExploitConfidencePass.sh ###"
17 17
18
19 # Check OTMEDIA_HOME env var 18 # Check OTMEDIA_HOME env var
20 if [ -z ${OTMEDIA_HOME} ] 19 if [ -z ${OTMEDIA_HOME} ]
21 then 20 then
22 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
23 export OTMEDIA_HOME=$OTMEDIA_HOME 22 export OTMEDIA_HOME=$OTMEDIA_HOME
24 fi 23 fi
25 24
26 # where is ExploitConfidencePass.sh 25 # where is ExploitConfidencePass.sh
27 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
28 27
29 if [ -z ${SCRIPT_PATH} ] 28 if [ -z ${SCRIPT_PATH} ]
30 then 29 then
31 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
32 fi 31 fi
33 32
34 # Include scripts 33 # Include scripts
35 . $SCRIPT_PATH"/Tools.sh" 34 . $SCRIPT_PATH"/Tools.sh"
36 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 35 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
37 36
38 # where is ExploitConfidencePass.cfg 37 # where is ExploitConfidencePass.cfg
39 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
40 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
41 then 40 then
42 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
43 else 42 else
44 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
45 exit 1 44 exit 1
46 fi 45 fi
47 46
48 #---------------# 47 #---------------#
49 # Parse Options # 48 # Parse Options #
50 #---------------# 49 #---------------#
51 while getopts ":hDv:cf:r" opt 50 while getopts ":hDv:cf:r" opt
52 do 51 do
53 case $opt in 52 case $opt in
54 h) 53 h)
55 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
56 echo -e "\t Options:" 55 echo -e "\t Options:"
57 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
58 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
59 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
60 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
61 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
62 echo -e "\t\t-r n :\tforce rerun without deleting files" 61 echo -e "\t\t-r n :\tforce rerun without deleting files"
63 exit 1 62 exit 1
64 ;; 63 ;;
65 D) 64 D)
66 DEBUG=1 65 DEBUG=1
67 ;; 66 ;;
68 v) 67 v)
69 VERBOSE=$OPTARG 68 VERBOSE=$OPTARG
70 ;; 69 ;;
71 c) 70 c)
72 CHECK=1 71 CHECK=1
73 ;; 72 ;;
74 f) 73 f)
75 FORKS="--forks $OPTARG" 74 FORKS="--forks $OPTARG"
76 ;; 75 ;;
77 r) 76 r)
78 RERUN=1 77 RERUN=1
79 ;; 78 ;;
80 :) 79 :)
81 echo "Option -$OPTARG requires an argument." >&2 80 echo "Option -$OPTARG requires an argument." > /dev/stderr
82 exit 1 81 exit 1
83 ;; 82 ;;
84 \?) 83 \?)
85 echo "BAD USAGE : unknow opton -$OPTARG" 84 echo "BAD USAGE : unknow opton -$OPTARG"
86 #exit 1 85 #exit 1
87 ;; 86 ;;
88 esac 87 esac
89 done 88 done
90 89
91 # mode debug enable 90 # mode debug enable
92 if [ $DEBUG -eq 1 ] 91 if [ $DEBUG -eq 1 ]
93 then 92 then
94 set -x 93 set -x
95 echo -e "## Mode DEBUG ON ##" 94 echo -e "## Mode DEBUG ON ##"
96 REDIRECTION_OUTPUT="" 95 REDIRECTION_OUTPUT=""
97 else 96 else
98 REDIRECTION_OUTPUT=" 2> /dev/null" 97 REDIRECTION_OUTPUT=" 2> /dev/null"
99 fi 98 fi
100 99
101 # mode verbose enable 100 # mode verbose enable
102 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 101 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
103 102
104 # Check USAGE by arguments number 103 # Check USAGE by arguments number
105 if [ $(($#-($OPTIND-1))) -ne 1 ] 104 if [ $(($#-($OPTIND-1))) -ne 1 ]
106 then 105 then
107 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 106 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
108 echo "$0 -h for more info" 107 echo "$0 -h for more info"
109 exit 1 108 exit 1
110 fi 109 fi
111 110
112 shift $((OPTIND-1)) 111 shift $((OPTIND-1))
113 # check input directory - first argument 112 # check input directory - first argument
114 if [ ! -e $1 ] 113 if [ ! -e $1 ]
115 then 114 then
116 print_error "can't open $1" 115 print_error "can't open $1"
117 exit 1 116 exit 1
118 fi 117 fi
119 118
120 #-------------# 119 #-------------#
121 # GLOBAL VARS # 120 # GLOBAL VARS #
122 #-------------# 121 #-------------#
123 INPUT_DIR=$(readlink -e $1) 122 INPUT_DIR=$(readlink -e $1)
124 OUTPUT_DIR=$INPUT_DIR 123 OUTPUT_DIR=$INPUT_DIR
125 BASENAME=$(basename $OUTPUT_DIR) 124 BASENAME=$(basename $OUTPUT_DIR)
126 SHOW_DIR="$OUTPUT_DIR/shows/" 125 SHOW_DIR="$OUTPUT_DIR/shows/"
127 SOLR_RES="$OUTPUT_DIR/solr/" 126 SOLR_RES="$OUTPUT_DIR/solr/"
128 EXT_LEX="$OUTPUT_DIR/LEX/" 127 EXT_LEX="$OUTPUT_DIR/LEX/"
129 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 128 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
130 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" 129 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log"
131 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" 130 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log"
132 131
133 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 132 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
134 if [ -e $CONFPASS_CONFIG_FILE ] 133 if [ -e $CONFPASS_CONFIG_FILE ]
135 then 134 then
136 { 135 {
137 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 136 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
138 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 137 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
139 print_warn "[${BASENAME}] Use confidence measure from : $RES_CONF" 1 138 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
140 } 139 }
141 else 140 else
142 { 141 {
143 print_error "Can't find $CONFPASS_CONFIG_FILE" 1 142 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
143 print_error "[${BASENAME}] -> use res_p2"
144 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 144 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
145 RES_CONF="$INPUT_DIR/conf/res_p2" 145 RES_CONF="$INPUT_DIR/conf/res_p2"
146 } 146 }
147 fi 147 fi
148 148
149 mkdir -p $SHOW_DIR > /dev/null 2>&1 149 mkdir -p $SHOW_DIR > /dev/null 2>&1
150 mkdir -p $SOLR_RES > /dev/null 2>&1 150 mkdir -p $SOLR_RES > /dev/null 2>&1
151 mkdir -p $EXT_LEX > /dev/null 2>&1 151 mkdir -p $EXT_LEX > /dev/null 2>&1
152 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 152 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
153 153
154 #------------------# 154 #------------------#
155 # Create Workspace # 155 # Create Workspace #
156 #------------------# 156 #------------------#
157 # Lock directory 157 # Lock directory
158 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi 158 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
159 then
160 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
161 exit 1
162 fi
159 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 163 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
160 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 164 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
161 165
162 #------# 166 #------#
163 # Save # 167 # Save #
164 #------# 168 #------#
165 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 169 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
166 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 170 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
167 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 171 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
168 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 172 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
169 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 173 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
170 174 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
171 175
172 #-----------------------# 176 #-----------------------#
173 # Segmentation by show # 177 # Segmentation by show #
174 #-----------------------# 178 #-----------------------#
175 # create txt file from scored res 179 # create txt file from scored res
176 # tag pos and lemmatization of the txt file 180 # tag pos and lemmatization of the txt file
177 # merge the scored res and taglem file 181 # merge the scored res and taglem file
178 # segment using the last generated file 182 # segment using the last generated file
179 # and create a ctm file by show 183 # and create a ctm file by show
180 184
181 print_info "Segmentation by show" 1 185 print_info "Segmentation by show" 1
182 186
183 # -> to txt 187 # -> to txt
184 print_info "Create txt from scored res" 2 188 print_info "Create txt from scored res" 2
185 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 189 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
186 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 190 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
187 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 191 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
188 192
189 # -> to tagger + lemme 193 # -> to tagger + lemme
190 print_info "Tag pos and lem in txt file" 2 194 print_info "Tag pos and lem in txt file" 2
191 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 195 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
192 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 196 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
193 197
194 # merge sctm and taglem 198 # merge sctm and taglem
195 print_info "Merge scored ctm with tag pos and lem file" 2 199 print_info "Merge scored ctm with tag pos and lem file" 2
196 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 200 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
197 201
198 # -> new seg 202 # -> new seg
199 print_info "Create xml file and run Topic Seg" 2 203 print_info "Create xml file and run Topic Seg" 2
200 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 204 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
201 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 205 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
202 206
203 # Lia_topic_seg : bring together sentences into show 207 # Lia_topic_seg : bring together sentences into show
204 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 208 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
205 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 209 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
206 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 210 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
207 rm 0.xml $INPUT_DIR/show.seg 211 rm 0.xml $INPUT_DIR/show.seg
208 212
209 if [ $CHECK -eq 1 ] 213 if [ $CHECK -eq 1 ]
210 then 214 then
211 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi 215 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi
212 fi 216 fi
213 217
214 # Segment ctm into several show files and create a seg list by show 218 # Segment ctm into several show files and create a seg list by show
215 print_info "Segment ctm into show files and a seg list by show" 2 219 print_info "Segment ctm into show files and a seg list by show" 2
216 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT 220 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT
217 221
218 #-----------------------------------------------------------# 222 #-----------------------------------------------------------#
219 # SOLR QUERIES # 223 # SOLR QUERIES #
220 # -> Create Confidente Word # 224 # -> Create Confidente Word #
221 # Keep conf words and use Tags # 225 # Keep conf words and use Tags #
222 # -> Query SOLR (document & multimedia) # 226 # -> Query SOLR (document & multimedia) #
223 # concat word + add date 2 day before and after the show # 227 # concat word + add date 2 day before and after the show #
224 # query document & multimedia # 228 # query document & multimedia #
225 #-----------------------------------------------------------# 229 #-----------------------------------------------------------#
226 print_info "Create SOLR queries and ASK SOLR" 1 230 print_info "Create SOLR queries and ASK SOLR" 1
227 for show in $(ls $SHOW_DIR/*.ctm) 231 for show in $(ls $SHOW_DIR/*.ctm)
228 do 232 do
229 bn=$(basename $show .ctm) 233 bn=$(basename $show .ctm)
230 # Remove words with low confidence and keep useful tagger words 234 # Remove words with low confidence and keep useful tagger words
231 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 235 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
232 # Get date 2 day before and after the show 236 # Get date 2 day before and after the show
233 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 237 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
234 # Create SOLR queries 238 # Create SOLR queries
235 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 239 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
236 # Ask SOLR DB 240 # Ask SOLR DB
237 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 241 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
238 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 242 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
239 echo $query > $SHOW_DIR/$bn.queries 243 echo $query > $SHOW_DIR/$bn.queries
240 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT 244 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT
241 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 245 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
242 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 246 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
243 rm $SOLR_RES/*.tmp > /dev/null 2>&1 247 rm $SOLR_RES/*.tmp > /dev/null 2>&1
244 fi 248 fi
245 249
246 if [ $CHECK -eq 1 ] 250 if [ $CHECK -eq 1 ]
247 then 251 then
248 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 252 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
249 then 253 then
250 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 254 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1
251 fi 255 fi
252 fi 256 fi
253 257
254 done 258 done
255 259
256 #----------------------------------------------------------------------------------------------- 260 #-----------------------------------------------------------------------------------------------
257 # Build trigger file 261 # Build trigger file
258 # 1) keywords are automatically boosted in the non confident zone of the current res 262 # 1) keywords are automatically boosted in the non confident zone of the current res
259 # confident zone are boosted 263 # confident zone are boosted
260 # previous words in sensible zone are penalized 264 # previous words in sensible zone are penalized
261 # 2) OOVs are extracted + phonetized 265 # 2) OOVs are extracted + phonetized
262 # 3) Try to find OOVs acousticly in the current segment 266 # 3) Try to find OOVs acousticly in the current segment
263 # 4) Generate the .trigg file 267 # 4) Generate the .trigg file
264 #------------------------------------------------------------------------------------------------ 268 #------------------------------------------------------------------------------------------------
265 print_info "Build trigger files" 1 269 print_info "Build trigger files" 1
266 for i in `ls $SOLR_RES/*.keywords` 270 for i in `ls $SOLR_RES/*.keywords`
267 do 271 do
268 basename=`basename $i .keywords` 272 basename=`basename $i .keywords`
269 273
270 # 274 #
271 # Tokenize & produce coverage report 275 # Tokenize & produce coverage report
272 # Use filter you need 276 # Use filter you need
273 # 277 #
274 print_info "keywords filtering and produce coverage report" 2 278 print_info "keywords filtering and produce coverage report" 2
275 # Default filter 279 # Default filter
276 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 280 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
277 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 281 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
278 # do less filter 282 # do less filter
279 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 283 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
280 284
281 285
282 # 286 #
283 # Extract "real" OOV and phonetize them 287 # Extract "real" OOV and phonetize them
284 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 288 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
285 # 289 #
286 print_info "Extract OOV and phonetize them" 2 290 print_info "Extract OOV and phonetize them" 2
287 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 291 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
288 292
289 # 293 #
290 # Search INVOC & OOV in the current lattice 294 # Search INVOC & OOV in the current lattice
291 # 295 #
292 print_info "Search INVOC and OOV in the current lattice" 2 296 print_info "Search INVOC and OOV in the current lattice" 2
293 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 297 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
294 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 298 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
295 299
296 # For each treil 300 # For each treil
297 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 301 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
298 do 302 do
299 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION 303 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
300 # 304 #
301 # Produce the boost file for the next decoding pass 305 # Produce the boost file for the next decoding pass
302 # 306 #
303 print_info "Produce trigg file : $baseseg " 3 307 print_info "Produce trigg file : $baseseg " 3
304 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 308 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
305 done 309 done
306 310
307 done 311 done
308 312
309 #----------------------------------------------------------------------------------------------- 313 #-----------------------------------------------------------------------------------------------
310 # Build the extended SPEERAL Lexicon 314 # Build the extended SPEERAL Lexicon
311 # 1) Merge OOVs + LEXICON 315 # 1) Merge OOVs + LEXICON
312 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 316 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
313 # 2) The current lexicon is extended with all the valid OOVs 317 # 2) The current lexicon is extended with all the valid OOVs
314 #----------------------------------------------------------------------------------------------- 318 #-----------------------------------------------------------------------------------------------
315 print_info "Build extended Speeral Lexicon" 1 319 print_info "Build extended Speeral Lexicon" 1
316 mkdir -p $EXT_LEX/final 320 mkdir -p $EXT_LEX/final
317 mkdir -p $EXT_LEX/tmp 321 mkdir -p $EXT_LEX/tmp
318 mkdir -p $EXT_LEX/tmp/txt 322 mkdir -p $EXT_LEX/tmp/txt
319 # 323 #
320 # Collect the acousticly found oov and their phonetisation 324 # Collect the acousticly found oov and their phonetisation
321 # 325 #
322 print_info "Get all OOV and retrieve all phonetisation" 2 326 print_info "Get all OOV and retrieve all phonetisation" 2
323 for i in `ls $SOLR_RES/*.phon_oov` 327 for i in `ls $SOLR_RES/*.phon_oov`
324 do 328 do
325 basename=`basename $i .phon_oov` 329 basename=`basename $i .phon_oov`
326 330
327 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 331 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
328 # list acousticly found for the show 332 # list acousticly found for the show
329 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 333 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
330 do 334 do
331 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 335 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
332 done 336 done
333 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 337 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
334 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 338 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
335 339
336 # 340 #
337 # Extract OOV really added 341 # Extract OOV really added
338 # 342 #
339 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 343 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
340 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT 344 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT
341 # 345 #
342 # Retrieve all phonetisation 346 # Retrieve all phonetisation
343 # 347 #
344 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 348 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
345 done 349 done
346 350
347 # 351 #
348 # Merge OOVs and their phonetisation 352 # Merge OOVs and their phonetisation
349 # 353 #
350 print_info "Merge OOV and their phonetisation" 2 354 print_info "Merge OOV and their phonetisation" 2
351 lexname=$(basename $LEXICON) 355 lexname=$(basename $LEXICON)
352 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 356 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
353 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 357 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
354 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT 358 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT
355 359
356 # 360 #
357 # Collect + clean retrieved txt 361 # Collect + clean retrieved txt
358 # 362 #
359 print_info "Collect and clean SOLR txt answers" 2 363 print_info "Collect and clean SOLR txt answers" 2
360 # choose filter 364 # choose filter
361 # default 365 # default
362 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 366 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
363 # low filter 367 # low filter
364 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 368 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
365 369
366 # 370 #
367 # Construct the map file 371 # Construct the map file
368 # 372 #
369 # Notes: 373 # Notes:
370 # - Expected format : 374 # - Expected format :
371 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 375 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
372 # 376 #
373 print_info "Construct map file" 2 377 print_info "Construct map file" 2
374 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 378 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
375 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 379 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
376 380
377 while read oov 381 while read oov
378 do 382 do
379 oov=`echo $oov | sed "s/\n//g"` 383 oov=`echo $oov | sed "s/\n//g"`
380 # 384 #
381 # Obtain the oov's tag 385 # Obtain the oov's tag
382 # 386 #
383 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 387 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
384 # 388 #
385 # Try to collect text containing the oov word 389 # Try to collect text containing the oov word
386 # 390 #
387 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 391 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
388 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 392 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
389 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 393 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
390 if [ $nbWords -eq 0 ]; then 394 if [ $nbWords -eq 0 ]; then
391 echo "UNVALID OOV: $oov => $nbWords occurrences" 395 echo "UNVALID OOV: $oov => $nbWords occurrences"
392 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 396 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
393 else 397 else
394 # 398 #
395 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 399 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
396 # 400 #
397 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 401 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
398 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 402 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
399 if [ ! x$candidate = "x" ]; then 403 if [ ! x$candidate = "x" ]; then
400 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 404 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
401 while read phonLine 405 while read phonLine
402 do 406 do
403 #<word> <phon> => <word> <candidate> <phon> 407 #<word> <phon> => <word> <candidate> <phon>
404 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 408 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
405 done < $EXT_LEX/tmp/$oov.phon 409 done < $EXT_LEX/tmp/$oov.phon
406 else 410 else
407 echo "UNVALID OOV: $oov => no availaible Candidate word in LM" 411 echo "UNVALID OOV: $oov => no availaible Candidate word in LM"
408 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 412 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
409 fi 413 fi
410 fi 414 fi
411 else 415 else
412 echo "UNVALID OOV: $oov" 416 echo "UNVALID OOV: $oov"
413 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 417 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
414 fi 418 fi
415 done < $EXT_LEX/final/all.oov_acousticlyfound 419 done < $EXT_LEX/final/all.oov_acousticlyfound
416 420
417 # 421 #
418 ### Speeral 422 ### Speeral
419 # 423 #
420 424
421 lexname=`basename $LEXICON` 425 lexname=`basename $LEXICON`
422 # 426 #
423 # Build the final trigger file 427 # Build the final trigger file
424 # 428 #
425 print_info "Clean trigg files" 2 429 print_info "Clean trigg files" 2
426 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 430 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
427 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 431 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
428 for i in `ls $TRIGGER_CONFZONE/*.trigg` 432 for i in `ls $TRIGGER_CONFZONE/*.trigg`
429 do 433 do
430 basename=`basename $i .trigg` 434 basename=`basename $i .trigg`
431 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 435 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
432 done 436 done
433 # 437 #
434 # Compile the speeral extended lexicon 438 # Compile the speeral extended lexicon
435 # 439 #
436 print_info "Compile Speeral extended lexicon" 2 440 print_info "Compile Speeral extended lexicon" 2
437 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT 441 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT
438 442
439 if [ $CHECK -eq 1 ] 443 if [ $CHECK -eq 1 ]
440 then 444 then
441 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 445 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
442 if [ $? -eq 1 ] 446 if [ $? -eq 1 ]
443 then 447 then
444 echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE 448 echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE
445 exit 1; 449 exit 1;
446 fi 450 fi
447 fi 451 fi
448 452
449 453
450 #-------# 454 #-------#
451 # CLOSE # 455 # CLOSE #
452 #-------# 456 #-------#
453 # Seem OK 457 # Seem OK
454 print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 458 print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1
455 echo -e "#Solr $BASENAME " >> $LOGFILE 459 echo -e "#Solr $BASENAME " >> $LOGFILE
456 460
457 # unlock directory 461 # unlock directory
458 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 462 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
459 463
460 464
main_tools/OneScriptToRuleThemAll.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################### 3 #####################################
4 # File: OneScriptToRuleThemAll.sh # 4 # File: OneScriptToRuleThemAll.sh #
5 # Brief : Script to launch OTMEDIA # 5 # Brief : Script to launch OTMEDIA #
6 # Version : 1.0 # 6 # Version : 1.0 #
7 # Date : 23/07/2013 # 7 # Date : 23/07/2013 #
8 # Author : Jean-François Rey # 8 # Author : Jean-François Rey #
9 ##################################### 9 #####################################
10 10
11 echo -e "OneScriptToRuleThemAll :" 11 echo -e "#### OneScriptToRuleThemAll ####"
12 LORD=" Three::rings 12 LORD=" Three::rings
13 for:::the::Elven-King 13 for:::the::Elven-King
14 under:the:sky,:Seven:for:the 14 under:the:sky,:Seven:for:the
15 Dwarf-Lords::in::their::halls:of 15 Dwarf-Lords::in::their::halls:of
16 stone,:Nine for:Mortal 16 stone,:Nine for:Mortal
17 :::Men::: ________ doomed::to 17 :::Men::: ________ doomed::to
18 die.:One _,-'...:... \`-. for:::the 18 die.:One _,-'...:... \`-. for:::the
19 ::Dark:: ,- .:::::::::::. \`. Lord::on 19 ::Dark:: ,- .:::::::::::. \`. Lord::on
20 his:dark ,' .:::::zzz:::::. \`. :throne: 20 his:dark ,' .:::::zzz:::::. \`. :throne:
21 In:::the/ ::::OTMEDIA:::: \ Land::of 21 In:::the/ ::::OTMEDIA:::: \ Land::of
22 :Mordor:\ ::::SCRIPTS:::: / :where:: 22 :Mordor:\ ::::SCRIPTS:::: / :where::
23 ::the::: '. '::::YEEEP::::' ,' Shadows: 23 ::the::: '. '::::YEEEP::::' ,' Shadows:
24 lie.::One \`. \`\`:::::::::'' ,' Ring::to 24 lie.::One \`. \`\`:::::::::'' ,' Ring::to
25 ::rule:: \`-._\`\`\`:'''_,-' ::them:: 25 ::rule:: \`-._\`\`\`:'''_,-' ::them::
26 all,::One \`-----' ring::to 26 all,::One \`-----' ring::to
27 ::find::: them,:One 27 ::find::: them,:One
28 Ring:::::to bring::them 28 Ring:::::to bring::them
29 all::and::in:the:darkness:bind 29 all::and::in:the:darkness:bind
30 them:In:the:Land:of:Mordor 30 them:In:the:Land:of:Mordor
31 where:::the::Shadows 31 where:::the::Shadows
32 :::lie.:::" 32 :::lie.:::"
33 33
34 34
35 # Check OTMEDIA_HOME env var 35 # Check OTMEDIA_HOME env var
36 if [ -z ${OTMEDIA_HOME} ] 36 if [ -z ${OTMEDIA_HOME} ]
37 then 37 then
38 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 38 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
39 export OTMEDIA_HOME=$OTMEDIA_HOME 39 export OTMEDIA_HOME=$OTMEDIA_HOME
40 fi 40 fi
41 41
42 # where is OneScriptToRuleThemAll.sh 42 # where is OneScriptToRuleThemAll.sh
43 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 43 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
44 44
45 RING="" 45 RING=""
46 p1=0 46 p1=0
47 p2=0 47 p2=0
48 conf=0 48 conf=0
49 exploitconf=0 49 exploitconf=0
50 p3=0 50 p3=0
51 recompose=0 51 recompose=0
52 scoring=0 52 scoring=0
53 53
54 #---------------# 54 #---------------#
55 # Parse Options # 55 # Parse Options #
56 #---------------# 56 #---------------#
57 while getopts ":hDv:cf:r123CeRsa" opt 57 while getopts ":hDv:cf:r123CeRsa" opt
58 do 58 do
59 case $opt in 59 case $opt in
60 h) 60 h)
61 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" 61 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n"
62 echo -e "\t Options:" 62 echo -e "\t Options:"
63 echo -e "\t\t-h :\tprint this message" 63 echo -e "\t\t-h :\tprint this message"
64 echo -e "\t\t-D :\tDEBUG mode on" 64 echo -e "\t\t-D :\tDEBUG mode on"
65 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 65 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
66 echo -e "\t\t-c :\tCheck process, stop if error detected" 66 echo -e "\t\t-c :\tCheck process, stop if error detected"
67 echo -e "\t\t-f n :\tSpecify a speeral forks number (default 1)" 67 echo -e "\t\t-f n :\tSpecify a speeral forks number (default 1)"
68 echo -e "\t\t-r :\tForce to rerun without deleting all files" 68 echo -e "\t\t-r :\tForce to rerun without deleting all files"
69 echo -e "\t\t-a :\tDo every pass" 69 echo -e "\t\t-a :\tDo every pass"
70 echo -e "\t\t-1 :\tDo 1rt pass" 70 echo -e "\t\t-1 :\tDo 1rt pass"
71 echo -e "\t\t-2 :\tDo 2nd pass" 71 echo -e "\t\t-2 :\tDo 2nd pass"
72 echo -e "\t\t-3 :\tDo 3rd pass" 72 echo -e "\t\t-3 :\tDo 3rd pass"
73 echo -e "\t\t-C :\tDo confidence pass" 73 echo -e "\t\t-C :\tDo confidence pass"
74 echo -e "\t\t-e :\tDo exploit confidence pass (SOLR)" 74 echo -e "\t\t-e :\tDo exploit confidence pass (SOLR)"
75 echo -e "\t\t-R :\tDo recompose res" 75 echo -e "\t\t-R :\tDo recompose res"
76 echo -e "\t\t-s :\tDo scoring" 76 echo -e "\t\t-s :\tDo scoring"
77 exit 1 77 exit 1
78 ;; 78 ;;
79 D) 79 D)
80 RING=$RING" -D" 80 RING=$RING" -D"
81 ;; 81 ;;
82 v) 82 v)
83 RING=$RING" -v $OPTARG" 83 RING=$RING" -v $OPTARG"
84 ;; 84 ;;
85 c) 85 c)
86 RING=$RING" -c" 86 RING=$RING" -c"
87 ;; 87 ;;
88 f) 88 f)
89 RING=$RING" -f $OPTARG" 89 RING=$RING" -f $OPTARG"
90 ;; 90 ;;
91 r) 91 r)
92 RING=$RING" -r" 92 RING=$RING" -r"
93 ;; 93 ;;
94 a) 94 a)
95 p1=1 95 p1=1
96 p2=1 96 p2=1
97 p3=1 97 p3=1
98 conf=1 98 conf=1
99 exploitconf=1 99 exploitconf=1
100 recompose=1 100 recompose=1
101 scoring=1 101 scoring=1
102 ;; 102 ;;
103 1) 103 1)
104 p1=1 104 p1=1
105 ;; 105 ;;
106 2) 106 2)
107 p2=1 107 p2=1
108 ;; 108 ;;
109 3) 109 3)
110 p3=1 110 p3=1
111 ;; 111 ;;
112 C) 112 C)
113 conf=1 113 conf=1
114 ;; 114 ;;
115 e) 115 e)
116 exploitconf=1 116 exploitconf=1
117 ;; 117 ;;
118 a) 118 a)
119 recompose=1 119 recompose=1
120 ;; 120 ;;
121 s) 121 s)
122 scoring=1 122 scoring=1
123 ;; 123 ;;
124 :) 124 :)
125 echo "Option -$OPTARG requires an argument." >&2 125 echo "Option -$OPTARG requires an argument." >&2
126 exit 1 126 exit 1
127 ;; 127 ;;
128 \?) 128 \?)
129 echo "BAD USAGE : unknow opton -$OPTARG" 129 echo "BAD USAGE : unknow opton -$OPTARG"
130 exit 1 130 exit 1
131 ;; 131 ;;
132 esac 132 esac
133 done 133 done
134 134
135 # Check USAGE by arguments number 135 # Check USAGE by arguments number
136 if [ $(($#-($OPTIND-1))) -ne 2 ] 136 if [ $(($#-($OPTIND-1))) -ne 2 ]
137 then 137 then
138 echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" 138 echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>"
139 echo "$0 -h for more info" 139 echo "$0 -h for more info"
140 exit 1 140 exit 1
141 fi 141 fi
142 142
143 shift $((OPTIND-1)) 143 shift $((OPTIND-1))
144 # check audio file - First argument 144 # check audio file - First argument
145 if [ -e $1 ] && [ -s $1 ] 145 if [ -e $1 ] && [ -s $1 ]
146 then 146 then
147 echo -e "$LORD\n" 147 echo -e "$LORD\n"
148 REP_OUT=$2/${1%.*} 148 REP_OUT=$2/${1%.*}
149 if [ $p1 -eq 1 ];then ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2;fi 149 if [ $p1 -eq 1 ];then ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2;fi
150 if [ $p2 -eq 1 ];then ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT};fi 150 if [ $p2 -eq 1 ];then ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT};fi
151 if [ $conf -eq 1 ];then $p${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2";fi 151 if [ $conf -eq 1 ];then $p${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2";fi
152 if [ $exploitconf -eq 1 ]; then ${MAIN_SCRIPT_PATH}/ExploitConfidencePass.sh ${RING} ${REP_OUT};fi 152 if [ $exploitconf -eq 1 ]; then ${MAIN_SCRIPT_PATH}/ExploitConfidencePass.sh ${RING} ${REP_OUT};fi
153 if [ $p3 -eq 1 ];then ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT};fi 153 if [ $p3 -eq 1 ];then ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT};fi
154 if [ $conf -eq 1 ];then ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3";fi 154 if [ $conf -eq 1 ];then ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3";fi
155 if [ $recompose -eq 1 ];then ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT};fi 155 if [ $recompose -eq 1 ];then ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT};fi
156 if [ $scoring -eq 1 ];then ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT};fi 156 if [ $scoring -eq 1 ];then ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT};fi
157 echo "done" 157 echo "done"
158 else 158 else
159 echo "can't find $1 OR file is empty" 159 echo "can't find $1 OR file is empty"
160 exit 1 160 exit 1
161 fi 161 fi
162 162
163 163
164 164
165 165
tools/scripts/Tools.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ################################################# 3 #################################################
4 # File : Tools.sh # 4 # File : Tools.sh #
5 # Brief : Tool functions # 5 # Brief : Tool functions #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # Version : 1.0 # 7 # Version : 1.0 #
8 # Date : 13/06/13 # 8 # Date : 13/06/13 #
9 ################################################# 9 #################################################
10 10
11 # Type 11 # Type
12 ERROR=2 12 ERROR=2
13 WARN=1 13 WARN=1
14 INFO=0 14 INFO=0
15 15
16 # Global Verbose variable (have to be define in scripts) 16 # Global Verbose variable (have to be define in scripts)
17 # 0 : mute 17 # 0 : mute
18 # 1 : basic info (start,config,end) 18 # 1 : basic info (start,config,end)
19 # 2 : add process 19 # 2 : add process
20 # 3 : all 20 # 3 : all
21 VERBOSE=0 21 VERBOSE=0
22 22
23 # print_message( verbose_level, message) 23 # print_message( verbose_level, message)
24 # - message_type : ERROR, WARN, INFO 24 # - message_type : ERROR, WARN, INFO
25 # - verbose_level : level to print message 25 # - verbose_level : level to print message
26 # - message : the message to print 26 # - message : the message to print
27 function print_message() 27 function print_message()
28 { 28 {
29 if [ $# -eq 3 ] 29 if [ $# -eq 3 ]
30 then 30 then
31 # print error 31 # print error
32 if [ $1 -eq $ERROR ]; then echo -ne "###ERROR : $3\n" > /dev/stderr; exit 0; fi 32 if [ $1 -eq $ERROR ]; then echo -ne "###ERROR : $3\n" > /dev/stderr; exit 0; fi
33 # check verbose level 33 # check verbose level
34 if [ $2 -le $VERBOSE ] 34 if [ $2 -le $VERBOSE ]
35 then 35 then
36 #for (( dec=1 ; $dec<$2 ; dec++ )); do echo -ne "\t"; done 36 #for (( dec=1 ; $dec<$2 ; dec++ )); do echo -ne "\t"; done
37 # print warn 37 # print warn
38 if [ $1 = $WARN ]; then echo -ne "##WARNING : $3\n"; exit 0; fi 38 if [ $1 = $WARN ]; then echo -ne "##WARNING : $3\n"; exit 0; fi
39 # default info 39 # default info
40 echo -ne "#INFO : $3\n" 40 echo -ne "#INFO : $3\n"
41 fi 41 fi
42 fi 42 fi
43 } 43 }
44 44
45 # print_error(message) 45 # print_error(message)
46 # - message : error message to print 46 # - message : error message to print
47 function print_error() 47 function print_error()
48 { 48 {
49 print_message $ERROR 3 "$1" 49 print_message $ERROR 3 "$1"
50 } 50 }
51 51
52 # print_warn(message,[level]) 52 # print_warn(message,[level])
53 # - message : message to print 53 # - message : message to print
54 # - level : optional option - verbose level to print 54 # - level : optional option - verbose level to print
55 function print_warn() 55 function print_warn()
56 { 56 {
57 if [ $# -eq 2 ] 57 if [ $# -eq 2 ]
58 then 58 then
59 print_message $WARN $2 "$1" 59 print_message $WARN $2 "$1"
60 else 60 else
61 print_message $WARN $VERBOSE "$1" 61 print_message $WARN $VERBOSE "$1"
62 fi 62 fi
63 } 63 }
64 64
65 # print_info(message,[level]) 65 # print_info(message,[level])
66 # - message : message to print 66 # - message : message to print
67 # - level : optional option - verbose level to print 67 # - level : optional option - verbose level to print
68 function print_info() 68 function print_info()
69 { 69 {
70 if [ $# -eq 2 ] 70 if [ $# -eq 2 ]
71 then 71 then
72 print_message $INFO $2 "$1" 72 print_message $INFO $2 "$1"
73 else 73 else
74 print_message $INFO $VERBOSE "$1" 74 print_message $INFO $VERBOSE "$1"
75 fi 75 fi
76 } 76 }
77 77
78 # print_log_file(filename,message) 78 # print_log_file(filename,message)
79 # - filename : full path to log file 79 # - filename : full path to log file
80 # - message : message to log 80 # - message : message to log
81 print_log_file() 81 print_log_file()
82 { 82 {
83 if [ $# -eq 2 ] 83 if [ $# -eq 2 ]
84 then 84 then
85 echo -e "$message" >> $filename 85 echo -e "$2" >> $1
86 fi 86 fi
87 } 87 }
88 88