Commit 4188f35cd874216184dc7cd30737873852b25808

Authored by Jean-François Rey
1 parent c388b40c7c
Exists in master

update

Showing 2 changed files with 2 additions and 1 deletions Inline Diff

1 - Check and add Verbose messages 1 - Check and add Verbose messages
2 - Modify option -r behaviour
2 - Modifiy SOLR request 3 - Modifiy SOLR request
3 4
main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 echo "### ExploitConfidencePass.sh ###" 16 echo "### ExploitConfidencePass.sh ###"
17 17
18 # Check OTMEDIA_HOME env var 18 # Check OTMEDIA_HOME env var
19 if [ -z ${OTMEDIA_HOME} ] 19 if [ -z ${OTMEDIA_HOME} ]
20 then 20 then
21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
22 export OTMEDIA_HOME=$OTMEDIA_HOME 22 export OTMEDIA_HOME=$OTMEDIA_HOME
23 fi 23 fi
24 24
25 # where is ExploitConfidencePass.sh 25 # where is ExploitConfidencePass.sh
26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
27 27
28 if [ -z ${SCRIPT_PATH} ] 28 if [ -z ${SCRIPT_PATH} ]
29 then 29 then
30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
31 fi 31 fi
32 32
33 # Include scripts 33 # Include scripts
34 . $SCRIPT_PATH"/Tools.sh" 34 . $SCRIPT_PATH"/Tools.sh"
35 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 35 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
36 36
37 # where is ExploitConfidencePass.cfg 37 # where is ExploitConfidencePass.cfg
38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
40 then 40 then
41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
42 else 42 else
43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
44 exit 1 44 exit 1
45 fi 45 fi
46 46
47 #---------------# 47 #---------------#
48 # Parse Options # 48 # Parse Options #
49 #---------------# 49 #---------------#
50 while getopts ":hDv:cf:r" opt 50 while getopts ":hDv:cf:r" opt
51 do 51 do
52 case $opt in 52 case $opt in
53 h) 53 h)
54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
55 echo -e "\t Options:" 55 echo -e "\t Options:"
56 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
57 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
59 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
61 echo -e "\t\t-r n :\tforce rerun without deleting files" 61 echo -e "\t\t-r n :\tforce rerun without deleting files"
62 exit 1 62 exit 1
63 ;; 63 ;;
64 D) 64 D)
65 DEBUG=1 65 DEBUG=1
66 ;; 66 ;;
67 v) 67 v)
68 VERBOSE=$OPTARG 68 VERBOSE=$OPTARG
69 ;; 69 ;;
70 c) 70 c)
71 CHECK=1 71 CHECK=1
72 ;; 72 ;;
73 f) 73 f)
74 FORKS="--forks $OPTARG" 74 FORKS="--forks $OPTARG"
75 ;; 75 ;;
76 r) 76 r)
77 RERUN=1 77 RERUN=1
78 ;; 78 ;;
79 :) 79 :)
80 echo "Option -$OPTARG requires an argument." >&2 80 echo "Option -$OPTARG requires an argument." >&2
81 exit 1 81 exit 1
82 ;; 82 ;;
83 \?) 83 \?)
84 echo "BAD USAGE : unknow opton -$OPTARG" 84 echo "BAD USAGE : unknow opton -$OPTARG"
85 #exit 1 85 #exit 1
86 ;; 86 ;;
87 esac 87 esac
88 done 88 done
89 89
90 # mode debug enable 90 # mode debug enable
91 if [ $DEBUG -eq 1 ] 91 if [ $DEBUG -eq 1 ]
92 then 92 then
93 set -x 93 set -x
94 echo -e "## Mode DEBUG ON ##" 94 echo -e "## Mode DEBUG ON ##"
95 fi 95 fi
96 96
97 # mode verbose enable 97 # mode verbose enable
98 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 98 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
99 99
100 # Check USAGE by arguments number 100 # Check USAGE by arguments number
101 if [ $(($#-($OPTIND-1))) -ne 1 ] 101 if [ $(($#-($OPTIND-1))) -ne 1 ]
102 then 102 then
103 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 103 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
104 echo "$0 -h for more info" 104 echo "$0 -h for more info"
105 exit 1 105 exit 1
106 fi 106 fi
107 107
108 shift $((OPTIND-1)) 108 shift $((OPTIND-1))
109 # check input directory - first argument 109 # check input directory - first argument
110 if [ ! -e $1 ] 110 if [ ! -e $1 ]
111 then 111 then
112 print_error "can't open $1" 112 print_error "can't open $1"
113 exit 1 113 exit 1
114 fi 114 fi
115 115
116 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 116 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1
117 117
118 #-------------# 118 #-------------#
119 # GLOBAL VARS # 119 # GLOBAL VARS #
120 #-------------# 120 #-------------#
121 INPUT_DIR=$(readlink -e $1) 121 INPUT_DIR=$(readlink -e $1)
122 OUTPUT_DIR=$INPUT_DIR 122 OUTPUT_DIR=$INPUT_DIR
123 BASENAME=$(basename $OUTPUT_DIR) 123 BASENAME=$(basename $OUTPUT_DIR)
124 SHOW_DIR="$OUTPUT_DIR/shows/" 124 SHOW_DIR="$OUTPUT_DIR/shows/"
125 SOLR_RES="$OUTPUT_DIR/solr/" 125 SOLR_RES="$OUTPUT_DIR/solr/"
126 EXT_LEX="$OUTPUT_DIR/LEX/" 126 EXT_LEX="$OUTPUT_DIR/LEX/"
127 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 127 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
128 LOGFILE="$OUTPUT_DIR/info_exploitconf.log" 128 LOGFILE="$OUTPUT_DIR/info_exploitconf.log"
129 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log" 129 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log"
130 130
131 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 131 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
132 if [ -e $CONFPASS_CONFIG_FILE ] 132 if [ -e $CONFPASS_CONFIG_FILE ]
133 then 133 then
134 { 134 {
135 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 135 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
136 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 136 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
137 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 137 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
138 } 138 }
139 else 139 else
140 { 140 {
141 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" 141 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
142 print_error "[${BASENAME}] -> use res_p2" 142 print_error "[${BASENAME}] -> use res_p2"
143 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 143 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
144 RES_CONF="$INPUT_DIR/conf/res_p2" 144 RES_CONF="$INPUT_DIR/conf/res_p2"
145 } 145 }
146 fi 146 fi
147 147
148 mkdir -p $SHOW_DIR > /dev/null 2>&1 148 mkdir -p $SHOW_DIR > /dev/null 2>&1
149 mkdir -p $SOLR_RES > /dev/null 2>&1 149 mkdir -p $SOLR_RES > /dev/null 2>&1
150 mkdir -p $EXT_LEX > /dev/null 2>&1 150 mkdir -p $EXT_LEX > /dev/null 2>&1
151 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 151 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
152 152
153 #------------------# 153 #------------------#
154 # Create Workspace # 154 # Create Workspace #
155 #------------------# 155 #------------------#
156 # Lock directory 156 # Lock directory
157 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] 157 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
158 then 158 then
159 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 159 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
160 exit 1 160 exit 1
161 fi 161 fi
162 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 162 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
163 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 163 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
164 164
165 #------# 165 #------#
166 # Save # 166 # Save #
167 #------# 167 #------#
168 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 168 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
169 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 169 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
170 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 170 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
171 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 171 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
172 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 172 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
173 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 173 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
174 174
175 #---------------# 175 #---------------#
176 # Check Pass # 176 # Check Pass #
177 #---------------# 177 #---------------#
178 if [ $( ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ] 178 if [ $( ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ]
179 then 179 then
180 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass" 180 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass"
181 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONF_DIR}" ;fi 181 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONF_DIR}" ;fi
182 exit 1 182 exit 1
183 fi 183 fi
184 184
185 #-----------------------# 185 #-----------------------#
186 # Segmentation by show # 186 # Segmentation by show #
187 #-----------------------# 187 #-----------------------#
188 # create txt file from scored res 188 # create txt file from scored res
189 # tag pos and lemmatization of the txt file 189 # tag pos and lemmatization of the txt file
190 # merge the scored res and taglem file 190 # merge the scored res and taglem file
191 # segment using the last generated file 191 # segment using the last generated file
192 # and create a ctm file by show 192 # and create a ctm file by show
193 193
194 print_info "[${BASENAME}] Segmentation by show" 1 194 print_info "[${BASENAME}] Segmentation by show" 1
195 195
196 # -> to txt 196 # -> to txt
197 print_info "[${BASENAME}] Create txt from scored res" 3 197 print_info "[${BASENAME}] Create txt from scored res" 3
198 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 198 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
199 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 199 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
200 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 200 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
201 201
202 # -> to tagger + lemme 202 # -> to tagger + lemme
203 print_info "[${BASENAME}] Tag pos and lem in txt file" 3 203 print_info "[${BASENAME}] Tag pos and lem in txt file" 3
204 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 204 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
205 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 205 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
206 206
207 # merge sctm and taglem 207 # merge sctm and taglem
208 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3 208 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3
209 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 209 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
210 210
211 # -> new seg 211 # -> new seg
212 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3 212 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3
213 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 213 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
214 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 214 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
215 215
216 # Lia_topic_seg : bring together sentences into show 216 # Lia_topic_seg : bring together sentences into show
217 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 217 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
218 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 218 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
219 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 219 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
220 rm 0.xml $INPUT_DIR/show.seg 220 rm 0.xml $INPUT_DIR/show.seg
221 221
222 if [ $CHECK -eq 1 ] 222 if [ $CHECK -eq 1 ]
223 then 223 then
224 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ] 224 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ]
225 then 225 then
226 print_error "[${BASENAME}] No Topic segmentation ! " 226 print_error "[${BASENAME}] No Topic segmentation ! "
227 print_error "[${BASENAME}] Check $ERRORFILE " 227 print_error "[${BASENAME}] Check $ERRORFILE "
228 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg" 228 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg"
229 fi 229 fi
230 fi 230 fi
231 231
232 # Segment ctm into several show files and create a seg list by show 232 # Segment ctm into several show files and create a seg list by show
233 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1 233 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1
234 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR 234 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR
235 235
236 #-----------------------------------------------------------# 236 #-----------------------------------------------------------#
237 # SOLR QUERIES # 237 # SOLR QUERIES #
238 # -> Create Confidente Word # 238 # -> Create Confidente Word #
239 # Keep conf words and use Tags # 239 # Keep conf words and use Tags #
240 # -> Query SOLR (document & multimedia) # 240 # -> Query SOLR (document & multimedia) #
241 # concat word + add date 2 day before and after the show # 241 # concat word + add date 2 day before and after the show #
242 # query document & multimedia # 242 # query document & multimedia #
243 #-----------------------------------------------------------# 243 #-----------------------------------------------------------#
244 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1 244 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1
245 for show in $(ls $SHOW_DIR/*.ctm) 245 for show in $(ls $SHOW_DIR/*.ctm)
246 do 246 do
247 bn=$(basename $show .ctm) 247 bn=$(basename $show .ctm)
248 # Remove words with low confidence and keep useful tagger words 248 # Remove words with low confidence and keep useful tagger words
249 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 249 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
250 # Get date 2 day before and after the show 250 # Get date 2 day before and after the show
251 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 251 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
252 # Create SOLR queries 252 # Create SOLR queries
253 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 253 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
254 # Ask SOLR DB 254 # Ask SOLR DB
255 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 255 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
256 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 256 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
257 echo $query > $SHOW_DIR/$bn.queries 257 echo $query > $SHOW_DIR/$bn.queries
258 prnt_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3 258 print_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3
259 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp 259 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp
260 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 260 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
261 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 261 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
262 rm $SOLR_RES/*.tmp > /dev/null 2>&1 262 rm $SOLR_RES/*.tmp > /dev/null 2>&1
263 fi 263 fi
264 264
265 if [ $CHECK -eq 1 ] 265 if [ $CHECK -eq 1 ]
266 then 266 then
267 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 267 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
268 then 268 then
269 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2 269 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2
270 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 270 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !"
271 fi 271 fi
272 fi 272 fi
273 273
274 done 274 done
275 275
276 #----------------------------------------------------------------------------------------------- 276 #-----------------------------------------------------------------------------------------------
277 # Build trigger file 277 # Build trigger file
278 # 1) keywords are automatically boosted in the non confident zone of the current res 278 # 1) keywords are automatically boosted in the non confident zone of the current res
279 # confident zone are boosted 279 # confident zone are boosted
280 # previous words in sensible zone are penalized 280 # previous words in sensible zone are penalized
281 # 2) OOVs are extracted + phonetized 281 # 2) OOVs are extracted + phonetized
282 # 3) Try to find OOVs acousticly in the current segment 282 # 3) Try to find OOVs acousticly in the current segment
283 # 4) Generate the .trigg file 283 # 4) Generate the .trigg file
284 #------------------------------------------------------------------------------------------------ 284 #------------------------------------------------------------------------------------------------
285 print_info "[${BASENAME}] Build trigger files" 1 285 print_info "[${BASENAME}] Build trigger files" 1
286 for i in `ls $SOLR_RES/*.keywords` 286 for i in `ls $SOLR_RES/*.keywords`
287 do 287 do
288 basename=`basename $i .keywords` 288 basename=`basename $i .keywords`
289 289
290 # 290 #
291 # Tokenize & produce coverage report 291 # Tokenize & produce coverage report
292 # Use filter you need 292 # Use filter you need
293 # 293 #
294 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3 294 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3
295 # Default filter 295 # Default filter
296 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 296 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
297 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 297 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
298 # do less filter 298 # do less filter
299 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 299 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
300 300
301 301
302 # 302 #
303 # Extract "real" OOV and phonetize them 303 # Extract "real" OOV and phonetize them
304 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 304 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
305 # 305 #
306 print_info "[${BASENAME}] Extract OOV and phonetize them" 3 306 print_info "[${BASENAME}] Extract OOV and phonetize them" 3
307 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 307 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
308 308
309 # 309 #
310 # Search INVOC & OOV in the current lattice 310 # Search INVOC & OOV in the current lattice
311 # 311 #
312 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3 312 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3
313 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 313 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
314 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 314 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
315 315
316 # For each treil 316 # For each treil
317 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 317 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
318 do 318 do
319 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION 319 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
320 # 320 #
321 # Produce the boost file for the next decoding pass 321 # Produce the boost file for the next decoding pass
322 # 322 #
323 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 323 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3
324 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 324 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
325 done 325 done
326 326
327 done 327 done
328 328
329 #----------------------------------------------------------------------------------------------- 329 #-----------------------------------------------------------------------------------------------
330 # Build the extended SPEERAL Lexicon 330 # Build the extended SPEERAL Lexicon
331 # 1) Merge OOVs + LEXICON 331 # 1) Merge OOVs + LEXICON
332 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 332 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
333 # 2) The current lexicon is extended with all the valid OOVs 333 # 2) The current lexicon is extended with all the valid OOVs
334 #----------------------------------------------------------------------------------------------- 334 #-----------------------------------------------------------------------------------------------
335 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 335 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1
336 mkdir -p $EXT_LEX/final 336 mkdir -p $EXT_LEX/final
337 mkdir -p $EXT_LEX/tmp 337 mkdir -p $EXT_LEX/tmp
338 mkdir -p $EXT_LEX/tmp/txt 338 mkdir -p $EXT_LEX/tmp/txt
339 # 339 #
340 # Collect the acousticly found oov and their phonetisation 340 # Collect the acousticly found oov and their phonetisation
341 # 341 #
342 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3 342 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3
343 for i in `ls $SOLR_RES/*.phon_oov` 343 for i in `ls $SOLR_RES/*.phon_oov`
344 do 344 do
345 basename=`basename $i .phon_oov` 345 basename=`basename $i .phon_oov`
346 346
347 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 347 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
348 # list acousticly found for the show 348 # list acousticly found for the show
349 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 349 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
350 do 350 do
351 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 351 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
352 done 352 done
353 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 353 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
354 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 354 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
355 355
356 # 356 #
357 # Extract OOV really added 357 # Extract OOV really added
358 # 358 #
359 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 359 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
360 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound 360 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound
361 # 361 #
362 # Retrieve all phonetisation 362 # Retrieve all phonetisation
363 # 363 #
364 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 364 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
365 done 365 done
366 366
367 # 367 #
368 # Merge OOVs and their phonetisation 368 # Merge OOVs and their phonetisation
369 # 369 #
370 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3 370 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3
371 lexname=$(basename $LEXICON) 371 lexname=$(basename $LEXICON)
372 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 372 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
373 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 373 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
374 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon 374 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon
375 375
376 # 376 #
377 # Collect + clean retrieved txt 377 # Collect + clean retrieved txt
378 # 378 #
379 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 379 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2
380 # choose filter 380 # choose filter
381 # default 381 # default
382 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 382 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
383 # low filter 383 # low filter
384 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 384 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
385 385
386 # 386 #
387 # Construct the map file 387 # Construct the map file
388 # 388 #
389 # Notes: 389 # Notes:
390 # - Expected format : 390 # - Expected format :
391 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 391 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
392 # 392 #
393 print_info "[${BASENAME}] Construct map file" 3 393 print_info "[${BASENAME}] Construct map file" 3
394 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 394 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
395 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 395 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
396 396
397 while read oov 397 while read oov
398 do 398 do
399 oov=`echo $oov | sed "s/\n//g"` 399 oov=`echo $oov | sed "s/\n//g"`
400 # 400 #
401 # Obtain the oov's tag 401 # Obtain the oov's tag
402 # 402 #
403 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 403 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
404 # 404 #
405 # Try to collect text containing the oov word 405 # Try to collect text containing the oov word
406 # 406 #
407 print_info "[${BASENAME}] Collect text containing the oov" 3 407 print_info "[${BASENAME}] Collect text containing the oov" 3
408 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 408 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
409 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 409 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
410 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 410 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
411 if [ $nbWords -eq 0 ]; then 411 if [ $nbWords -eq 0 ]; then
412 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2 412 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2
413 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 413 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
414 else 414 else
415 # 415 #
416 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 416 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
417 # 417 #
418 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 418 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
419 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3 419 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3
420 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 420 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
421 if [ ! "$candidate" == "" ]; then 421 if [ ! "$candidate" == "" ]; then
422 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 422 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
423 while read phonLine 423 while read phonLine
424 do 424 do
425 #<word> <phon> => <word> <candidate> <phon> 425 #<word> <phon> => <word> <candidate> <phon>
426 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 426 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
427 done < $EXT_LEX/tmp/$oov.phon 427 done < $EXT_LEX/tmp/$oov.phon
428 else 428 else
429 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2 429 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2
430 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 430 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
431 fi 431 fi
432 fi 432 fi
433 else 433 else
434 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2 434 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2
435 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 435 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
436 fi 436 fi
437 done < $EXT_LEX/final/all.oov_acousticlyfound 437 done < $EXT_LEX/final/all.oov_acousticlyfound
438 438
439 # 439 #
440 ### Speeral 440 ### Speeral
441 # 441 #
442 442
443 lexname=`basename $LEXICON` 443 lexname=`basename $LEXICON`
444 # 444 #
445 # Build the final trigger file 445 # Build the final trigger file
446 # 446 #
447 print_info "[${BASENAME}] Clean trigg files" 3 447 print_info "[${BASENAME}] Clean trigg files" 3
448 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 448 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
449 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 449 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
450 for i in `ls $TRIGGER_CONFZONE/*.trigg` 450 for i in `ls $TRIGGER_CONFZONE/*.trigg`
451 do 451 do
452 basename=`basename $i .trigg` 452 basename=`basename $i .trigg`
453 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 453 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
454 done 454 done
455 # 455 #
456 # Compile the speeral extended lexicon 456 # Compile the speeral extended lexicon
457 # 457 #
458 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3 458 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3
459 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3 459 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3
460 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext 460 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext
461 461
462 if [ $CHECK -eq 1 ] 462 if [ $CHECK -eq 1 ]
463 then 463 then
464 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 464 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
465 if [ $? -eq 1 ] 465 if [ $? -eq 1 ]
466 then 466 then
467 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit" 467 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit"
468 print_error "[${BASENAME}] Check $ERRORFILE" 468 print_error "[${BASENAME}] Check $ERRORFILE"
469 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR" 469 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR"
470 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?" 470 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?"
471 exit 1; 471 exit 1;
472 fi 472 fi
473 fi 473 fi
474 474
475 475
476 #-------# 476 #-------#
477 # CLOSE # 477 # CLOSE #
478 #-------# 478 #-------#
479 # Seem OK 479 # Seem OK
480 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1 480 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1
481 481
482 # unlok directory 482 # unlok directory
483 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 483 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
484 484
485 485
486 486