Commit 7c52739538a4527d4e71336e7d451c9f58c11eab

Authored by Jean-François Rey
1 parent 1fa99e8a2b
Exists in master

up

Showing 1 changed file with 2 additions and 0 deletions Inline Diff

main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 echo "### ExploitConfidencePass.sh ###" 16 echo "### ExploitConfidencePass.sh ###"
17 17
18 # Check OTMEDIA_HOME env var 18 # Check OTMEDIA_HOME env var
19 if [ -z ${OTMEDIA_HOME} ] 19 if [ -z ${OTMEDIA_HOME} ]
20 then 20 then
21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
22 export OTMEDIA_HOME=$OTMEDIA_HOME 22 export OTMEDIA_HOME=$OTMEDIA_HOME
23 fi 23 fi
24 24
25 # where is ExploitConfidencePass.sh 25 # where is ExploitConfidencePass.sh
26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
27 27
28 if [ -z ${SCRIPT_PATH} ] 28 if [ -z ${SCRIPT_PATH} ]
29 then 29 then
30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
31 fi 31 fi
32 32
33 # Include scripts 33 # Include scripts
34 . $SCRIPT_PATH"/Tools.sh" 34 . $SCRIPT_PATH"/Tools.sh"
35 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 35 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
36 36
37 # where is ExploitConfidencePass.cfg 37 # where is ExploitConfidencePass.cfg
38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
40 then 40 then
41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
42 else 42 else
43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
44 exit 1 44 exit 1
45 fi 45 fi
46 46
47 #---------------# 47 #---------------#
48 # Parse Options # 48 # Parse Options #
49 #---------------# 49 #---------------#
50 while getopts ":hDv:cr" opt 50 while getopts ":hDv:cr" opt
51 do 51 do
52 case $opt in 52 case $opt in
53 h) 53 h)
54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
55 echo -e "\t Options:" 55 echo -e "\t Options:"
56 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
57 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
59 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
60 echo -e "\t\t-r n :\tforce rerun without deleting files" 60 echo -e "\t\t-r n :\tforce rerun without deleting files"
61 exit 1 61 exit 1
62 ;; 62 ;;
63 D) 63 D)
64 DEBUG=1 64 DEBUG=1
65 ;; 65 ;;
66 v) 66 v)
67 VERBOSE=$OPTARG 67 VERBOSE=$OPTARG
68 ;; 68 ;;
69 c) 69 c)
70 CHECK=1 70 CHECK=1
71 ;; 71 ;;
72 r) 72 r)
73 RERUN=1 73 RERUN=1
74 ;; 74 ;;
75 :) 75 :)
76 echo "Option -$OPTARG requires an argument." >&2 76 echo "Option -$OPTARG requires an argument." >&2
77 exit 1 77 exit 1
78 ;; 78 ;;
79 \?) 79 \?)
80 echo "BAD USAGE : unknow opton -$OPTARG" 80 echo "BAD USAGE : unknow opton -$OPTARG"
81 #exit 1 81 #exit 1
82 ;; 82 ;;
83 esac 83 esac
84 done 84 done
85 85
86 # mode debug enable 86 # mode debug enable
87 if [ $DEBUG -eq 1 ] 87 if [ $DEBUG -eq 1 ]
88 then 88 then
89 set -x 89 set -x
90 echo -e "## Mode DEBUG ON ##" 90 echo -e "## Mode DEBUG ON ##"
91 fi 91 fi
92 92
93 # mode verbose enable 93 # mode verbose enable
94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
95 95
96 # Check USAGE by arguments number 96 # Check USAGE by arguments number
97 if [ $(($#-($OPTIND-1))) -ne 1 ] 97 if [ $(($#-($OPTIND-1))) -ne 1 ]
98 then 98 then
99 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 99 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
100 echo "$0 -h for more info" 100 echo "$0 -h for more info"
101 exit 1 101 exit 1
102 fi 102 fi
103 103
104 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
105 # check input directory - first argument 105 # check input directory - first argument
106 if [ ! -e $1 ] 106 if [ ! -e $1 ]
107 then 107 then
108 print_error "can't open $1" 108 print_error "can't open $1"
109 exit 1 109 exit 1
110 fi 110 fi
111 111
112 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 112 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1
113 113
114 #-------------# 114 #-------------#
115 # GLOBAL VARS # 115 # GLOBAL VARS #
116 #-------------# 116 #-------------#
117 INPUT_DIR=$(readlink -e $1) 117 INPUT_DIR=$(readlink -e $1)
118 OUTPUT_DIR=$INPUT_DIR 118 OUTPUT_DIR=$INPUT_DIR
119 BASENAME=$(basename $OUTPUT_DIR) 119 BASENAME=$(basename $OUTPUT_DIR)
120 SHOW_DIR="$OUTPUT_DIR/shows/" 120 SHOW_DIR="$OUTPUT_DIR/shows/"
121 SOLR_RES="$OUTPUT_DIR/solr/" 121 SOLR_RES="$OUTPUT_DIR/solr/"
122 EXT_LEX="$OUTPUT_DIR/LEX/" 122 EXT_LEX="$OUTPUT_DIR/LEX/"
123 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 123 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
124 LOGFILE="$OUTPUT_DIR/info_exploitconf.log" 124 LOGFILE="$OUTPUT_DIR/info_exploitconf.log"
125 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log" 125 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log"
126 126
127 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 127 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
128 if [ -e $CONFPASS_CONFIG_FILE ] 128 if [ -e $CONFPASS_CONFIG_FILE ]
129 then 129 then
130 { 130 {
131 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 131 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
132 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 132 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
133 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 133 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
134 } 134 }
135 else 135 else
136 { 136 {
137 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" 137 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
138 print_error "[${BASENAME}] -> use res_p2" 138 print_error "[${BASENAME}] -> use res_p2"
139 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 139 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
140 RES_CONF="$INPUT_DIR/conf/res_p2" 140 RES_CONF="$INPUT_DIR/conf/res_p2"
141 } 141 }
142 fi 142 fi
143 143
144 mkdir -p $SHOW_DIR > /dev/null 2>&1 144 mkdir -p $SHOW_DIR > /dev/null 2>&1
145 mkdir -p $SOLR_RES > /dev/null 2>&1 145 mkdir -p $SOLR_RES > /dev/null 2>&1
146 mkdir -p $EXT_LEX > /dev/null 2>&1 146 mkdir -p $EXT_LEX > /dev/null 2>&1
147 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 147 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
148 148
149 #------------------# 149 #------------------#
150 # Create Workspace # 150 # Create Workspace #
151 #------------------# 151 #------------------#
152 # Lock directory 152 # Lock directory
153 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] 153 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
154 then 154 then
155 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 155 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
156 exit 1 156 exit 1
157 fi 157 fi
158 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 158 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
159 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 159 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
160 160
161 rm $LOGFILE $ERRORFILE 2>/dev/null
162
161 #------# 163 #------#
162 # Save # 164 # Save #
163 #------# 165 #------#
164 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 166 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
165 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 167 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
166 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 168 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
167 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 169 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
168 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 170 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
169 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 171 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
170 172
171 #---------------# 173 #---------------#
172 # Check Pass # 174 # Check Pass #
173 #---------------# 175 #---------------#
174 if [ $( ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ] 176 if [ $( ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ]
175 then 177 then
176 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass" 178 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass"
177 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONF_DIR}" ;fi 179 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONF_DIR}" ;fi
178 exit 1 180 exit 1
179 fi 181 fi
180 182
181 #-----------------------# 183 #-----------------------#
182 # Segmentation by show # 184 # Segmentation by show #
183 #-----------------------# 185 #-----------------------#
184 # create txt file from scored res 186 # create txt file from scored res
185 # tag pos and lemmatization of the txt file 187 # tag pos and lemmatization of the txt file
186 # merge the scored res and taglem file 188 # merge the scored res and taglem file
187 # segment using the last generated file 189 # segment using the last generated file
188 # and create a ctm file by show 190 # and create a ctm file by show
189 191
190 print_info "[${BASENAME}] Segmentation by show" 1 192 print_info "[${BASENAME}] Segmentation by show" 1
191 193
192 # -> to txt 194 # -> to txt
193 print_info "[${BASENAME}] Create txt from scored res" 3 195 print_info "[${BASENAME}] Create txt from scored res" 3
194 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 196 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
195 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 197 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
196 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 198 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
197 199
198 # -> to tagger + lemme 200 # -> to tagger + lemme
199 print_info "[${BASENAME}] Tag pos and lem in txt file" 3 201 print_info "[${BASENAME}] Tag pos and lem in txt file" 3
200 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 202 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
201 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 203 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
202 204
203 # merge sctm and taglem 205 # merge sctm and taglem
204 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3 206 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3
205 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 207 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
206 208
207 # -> new seg 209 # -> new seg
208 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3 210 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3
209 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 211 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
210 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 212 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
211 213
212 # Lia_topic_seg : bring together sentences into show 214 # Lia_topic_seg : bring together sentences into show
213 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 215 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
214 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 216 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
215 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 217 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
216 rm 0.xml $INPUT_DIR/show.seg 218 rm 0.xml $INPUT_DIR/show.seg
217 219
218 if [ $CHECK -eq 1 ] 220 if [ $CHECK -eq 1 ]
219 then 221 then
220 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ] 222 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ]
221 then 223 then
222 print_error "[${BASENAME}] No Topic segmentation ! " 224 print_error "[${BASENAME}] No Topic segmentation ! "
223 print_error "[${BASENAME}] Check $ERRORFILE " 225 print_error "[${BASENAME}] Check $ERRORFILE "
224 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg" 226 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg"
225 fi 227 fi
226 fi 228 fi
227 229
228 # Segment ctm into several show files and create a seg list by show 230 # Segment ctm into several show files and create a seg list by show
229 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1 231 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1
230 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR 232 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR
231 233
232 #-----------------------------------------------------------# 234 #-----------------------------------------------------------#
233 # SOLR QUERIES # 235 # SOLR QUERIES #
234 # -> Create Confidente Word # 236 # -> Create Confidente Word #
235 # Keep conf words and use Tags # 237 # Keep conf words and use Tags #
236 # -> Query SOLR (document & multimedia) # 238 # -> Query SOLR (document & multimedia) #
237 # concat word + add date 2 day before and after the show # 239 # concat word + add date 2 day before and after the show #
238 # query document & multimedia # 240 # query document & multimedia #
239 #-----------------------------------------------------------# 241 #-----------------------------------------------------------#
240 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1 242 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1
241 for show in $(ls $SHOW_DIR/*.ctm) 243 for show in $(ls $SHOW_DIR/*.ctm)
242 do 244 do
243 bn=$(basename $show .ctm) 245 bn=$(basename $show .ctm)
244 # Remove words with low confidence and keep useful tagger words 246 # Remove words with low confidence and keep useful tagger words
245 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 247 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
246 # Get date 2 day before and after the show 248 # Get date 2 day before and after the show
247 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 249 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
248 # Create SOLR queries 250 # Create SOLR queries
249 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 251 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
250 # Ask SOLR DB 252 # Ask SOLR DB
251 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 253 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
252 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 254 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
253 echo $query > $SHOW_DIR/$bn.queries 255 echo $query > $SHOW_DIR/$bn.queries
254 print_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3 256 print_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3
255 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp 257 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp
256 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 258 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
257 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 259 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
258 rm $SOLR_RES/*.tmp > /dev/null 2>&1 260 rm $SOLR_RES/*.tmp > /dev/null 2>&1
259 fi 261 fi
260 262
261 if [ $CHECK -eq 1 ] 263 if [ $CHECK -eq 1 ]
262 then 264 then
263 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 265 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
264 then 266 then
265 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2 267 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2
266 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 268 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !"
267 fi 269 fi
268 fi 270 fi
269 271
270 done 272 done
271 273
272 #----------------------------------------------------------------------------------------------- 274 #-----------------------------------------------------------------------------------------------
273 # Build trigger file 275 # Build trigger file
274 # 1) keywords are automatically boosted in the non confident zone of the current res 276 # 1) keywords are automatically boosted in the non confident zone of the current res
275 # confident zone are boosted 277 # confident zone are boosted
276 # previous words in sensible zone are penalized 278 # previous words in sensible zone are penalized
277 # 2) OOVs are extracted + phonetized 279 # 2) OOVs are extracted + phonetized
278 # 3) Try to find OOVs acousticly in the current segment 280 # 3) Try to find OOVs acousticly in the current segment
279 # 4) Generate the .trigg file 281 # 4) Generate the .trigg file
280 #------------------------------------------------------------------------------------------------ 282 #------------------------------------------------------------------------------------------------
281 print_info "[${BASENAME}] Build trigger files" 1 283 print_info "[${BASENAME}] Build trigger files" 1
282 for i in `ls $SOLR_RES/*.keywords` 284 for i in `ls $SOLR_RES/*.keywords`
283 do 285 do
284 basename=`basename $i .keywords` 286 basename=`basename $i .keywords`
285 287
286 # 288 #
287 # Tokenize & produce coverage report 289 # Tokenize & produce coverage report
288 # Use filter you need 290 # Use filter you need
289 # 291 #
290 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3 292 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3
291 # Default filter 293 # Default filter
292 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 294 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
293 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 295 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
294 # do less filter 296 # do less filter
295 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 297 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
296 298
297 299
298 # 300 #
299 # Extract "real" OOV and phonetize them 301 # Extract "real" OOV and phonetize them
300 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 302 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
301 # 303 #
302 print_info "[${BASENAME}] Extract OOV and phonetize them" 3 304 print_info "[${BASENAME}] Extract OOV and phonetize them" 3
303 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 305 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
304 306
305 # 307 #
306 # Search INVOC & OOV in the current lattice 308 # Search INVOC & OOV in the current lattice
307 # 309 #
308 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3 310 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3
309 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 311 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
310 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 312 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
311 313
312 # For each treil 314 # For each treil
313 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 315 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
314 do 316 do
315 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION 317 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
316 # 318 #
317 # Produce the boost file for the next decoding pass 319 # Produce the boost file for the next decoding pass
318 # 320 #
319 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 321 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3
320 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 322 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
321 done 323 done
322 324
323 done 325 done
324 326
325 #----------------------------------------------------------------------------------------------- 327 #-----------------------------------------------------------------------------------------------
326 # Build the extended SPEERAL Lexicon 328 # Build the extended SPEERAL Lexicon
327 # 1) Merge OOVs + LEXICON 329 # 1) Merge OOVs + LEXICON
328 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 330 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
329 # 2) The current lexicon is extended with all the valid OOVs 331 # 2) The current lexicon is extended with all the valid OOVs
330 #----------------------------------------------------------------------------------------------- 332 #-----------------------------------------------------------------------------------------------
331 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 333 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1
332 mkdir -p $EXT_LEX/final 334 mkdir -p $EXT_LEX/final
333 mkdir -p $EXT_LEX/tmp 335 mkdir -p $EXT_LEX/tmp
334 mkdir -p $EXT_LEX/tmp/txt 336 mkdir -p $EXT_LEX/tmp/txt
335 # 337 #
336 # Collect the acousticly found oov and their phonetisation 338 # Collect the acousticly found oov and their phonetisation
337 # 339 #
338 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3 340 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3
339 for i in `ls $SOLR_RES/*.phon_oov` 341 for i in `ls $SOLR_RES/*.phon_oov`
340 do 342 do
341 basename=`basename $i .phon_oov` 343 basename=`basename $i .phon_oov`
342 344
343 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 345 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
344 # list acousticly found for the show 346 # list acousticly found for the show
345 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 347 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
346 do 348 do
347 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 349 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
348 done 350 done
349 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 351 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
350 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 352 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
351 353
352 # 354 #
353 # Extract OOV really added 355 # Extract OOV really added
354 # 356 #
355 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 357 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
356 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound 358 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound
357 # 359 #
358 # Retrieve all phonetisation 360 # Retrieve all phonetisation
359 # 361 #
360 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 362 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
361 done 363 done
362 364
363 # 365 #
364 # Merge OOVs and their phonetisation 366 # Merge OOVs and their phonetisation
365 # 367 #
366 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3 368 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3
367 lexname=$(basename $LEXICON) 369 lexname=$(basename $LEXICON)
368 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 370 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
369 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 371 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
370 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon 372 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon
371 373
372 # 374 #
373 # Collect + clean retrieved txt 375 # Collect + clean retrieved txt
374 # 376 #
375 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 377 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2
376 # choose filter 378 # choose filter
377 # default 379 # default
378 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 380 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
379 # low filter 381 # low filter
380 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 382 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
381 383
382 # 384 #
383 # Construct the map file 385 # Construct the map file
384 # 386 #
385 # Notes: 387 # Notes:
386 # - Expected format : 388 # - Expected format :
387 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 389 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
388 # 390 #
389 print_info "[${BASENAME}] Construct map file" 3 391 print_info "[${BASENAME}] Construct map file" 3
390 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 392 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
391 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 393 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
392 394
393 while read oov 395 while read oov
394 do 396 do
395 oov=`echo $oov | sed "s/\n//g"` 397 oov=`echo $oov | sed "s/\n//g"`
396 # 398 #
397 # Obtain the oov's tag 399 # Obtain the oov's tag
398 # 400 #
399 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 401 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
400 # 402 #
401 # Try to collect text containing the oov word 403 # Try to collect text containing the oov word
402 # 404 #
403 print_info "[${BASENAME}] Collect text containing the oov" 3 405 print_info "[${BASENAME}] Collect text containing the oov" 3
404 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 406 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
405 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 407 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
406 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 408 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
407 if [ $nbWords -eq 0 ]; then 409 if [ $nbWords -eq 0 ]; then
408 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2 410 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2
409 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 411 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
410 else 412 else
411 # 413 #
412 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 414 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
413 # 415 #
414 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 416 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
415 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3 417 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3
416 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 418 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
417 if [ ! "$candidate" == "" ]; then 419 if [ ! "$candidate" == "" ]; then
418 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 420 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
419 while read phonLine 421 while read phonLine
420 do 422 do
421 #<word> <phon> => <word> <candidate> <phon> 423 #<word> <phon> => <word> <candidate> <phon>
422 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 424 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
423 done < $EXT_LEX/tmp/$oov.phon 425 done < $EXT_LEX/tmp/$oov.phon
424 else 426 else
425 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2 427 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2
426 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 428 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
427 fi 429 fi
428 fi 430 fi
429 else 431 else
430 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2 432 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2
431 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 433 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
432 fi 434 fi
433 done < $EXT_LEX/final/all.oov_acousticlyfound 435 done < $EXT_LEX/final/all.oov_acousticlyfound
434 436
435 # 437 #
436 ### Speeral 438 ### Speeral
437 # 439 #
438 440
439 lexname=`basename $LEXICON` 441 lexname=`basename $LEXICON`
440 # 442 #
441 # Build the final trigger file 443 # Build the final trigger file
442 # 444 #
443 print_info "[${BASENAME}] Clean trigg files" 3 445 print_info "[${BASENAME}] Clean trigg files" 3
444 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 446 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
445 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 447 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
446 for i in `ls $TRIGGER_CONFZONE/*.trigg` 448 for i in `ls $TRIGGER_CONFZONE/*.trigg`
447 do 449 do
448 basename=`basename $i .trigg` 450 basename=`basename $i .trigg`
449 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 451 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
450 done 452 done
451 # 453 #
452 # Compile the speeral extended lexicon 454 # Compile the speeral extended lexicon
453 # 455 #
454 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3 456 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3
455 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3 457 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3
456 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext 458 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext
457 459
458 if [ $CHECK -eq 1 ] 460 if [ $CHECK -eq 1 ]
459 then 461 then
460 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 462 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
461 if [ $? -eq 1 ] 463 if [ $? -eq 1 ]
462 then 464 then
463 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit" 465 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit"
464 print_error "[${BASENAME}] Check $ERRORFILE" 466 print_error "[${BASENAME}] Check $ERRORFILE"
465 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR" 467 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR"
466 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?" 468 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?"
467 exit 1; 469 exit 1;
468 fi 470 fi
469 fi 471 fi
470 472
471 473
472 #-------# 474 #-------#
473 # CLOSE # 475 # CLOSE #
474 #-------# 476 #-------#
475 # Seem OK 477 # Seem OK
476 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1 478 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1
477 479
478 # unlok directory 480 # unlok directory
479 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 481 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
480 482
481 483
482 484