Commit f3043a8b949ebc79be54053c83fe110e9fedc59c

Authored by Jean-François Rey
1 parent 48b8af9ef5
Exists in master

bugfix

Showing 2 changed files with 4 additions and 4 deletions Inline Diff

main_tools/ConfPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ConfPass.sh # 4 # File : ConfPass.sh #
5 # Brief : Process the ASR Confidence pass # 5 # Brief : Process the ASR Confidence pass #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and hugo Mauchrétien works) # 8 # and hugo Mauchrétien works) #
9 # Version : 1.0 # 9 # Version : 1.0 #
10 # Date : 17/06/13 # 10 # Date : 17/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 echo "### ConfPass.sh ###" 13 echo "### ConfPass.sh ###"
14 14
15 #Check OTMEDIA_HOME env var 15 #Check OTMEDIA_HOME env var
16 if [ -z ${OTMEDIA_HOME} ] 16 if [ -z ${OTMEDIA_HOME} ]
17 then 17 then
18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
19 export OTMEDIA_HOME=$OTMEDIA_HOME 19 export OTMEDIA_HOME=$OTMEDIA_HOME
20 fi 20 fi
21 21
22 22
23 # where is ConfPath.sh 23 # where is ConfPath.sh
24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
25 25
26 # Scripts Path 26 # Scripts Path
27 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 27 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
28 28
29 # Include scripts 29 # Include scripts
30 . $SCRIPT_PATH"/Tools.sh" 30 . $SCRIPT_PATH"/Tools.sh"
31 . $SCRIPT_PATH"/CheckConfPass.sh" 31 . $SCRIPT_PATH"/CheckConfPass.sh"
32 32
33 # where is FirstPass.cfg 33 # where is FirstPass.cfg
34 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" 34 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg"
35 if [ -e $CONFPASS_CONFIG_FILE ] 35 if [ -e $CONFPASS_CONFIG_FILE ]
36 then 36 then
37 . $CONFPASS_CONFIG_FILE 37 . $CONFPASS_CONFIG_FILE
38 else 38 else
39 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 39 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2
40 exit 1 40 exit 1
41 fi 41 fi
42 42
43 #---------------# 43 #---------------#
44 # Parse Options # 44 # Parse Options #
45 #---------------# 45 #---------------#
46 while getopts ":hDv:cr" opt 46 while getopts ":hDv:cr" opt
47 do 47 do
48 case $opt in 48 case $opt in
49 h) 49 h)
50 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" 50 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n"
51 echo -e "\t Options:" 51 echo -e "\t Options:"
52 echo -e "\t\t-h :\tprint this message" 52 echo -e "\t\t-h :\tprint this message"
53 echo -e "\t\t-D :\tDEBUG mode on" 53 echo -e "\t\t-D :\tDEBUG mode on"
54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
55 echo -e "\t\t-c :\t Check process, stop if error detected" 55 echo -e "\t\t-c :\t Check process, stop if error detected"
56 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" 56 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files"
57 exit 1 57 exit 1
58 ;; 58 ;;
59 D) 59 D)
60 DEBUG=1 60 DEBUG=1
61 ;; 61 ;;
62 v) 62 v)
63 VERBOSE=$OPTARG 63 VERBOSE=$OPTARG
64 ;; 64 ;;
65 c) 65 c)
66 CHECK=1 66 CHECK=1
67 ;; 67 ;;
68 r) 68 r)
69 RERUN=1 69 RERUN=1
70 ;; 70 ;;
71 :) 71 :)
72 echo "Option -$OPTARG requires an argument." >&2 72 echo "Option -$OPTARG requires an argument." >&2
73 exit 1 73 exit 1
74 ;; 74 ;;
75 \?) 75 \?)
76 echo "BAD USAGE : unknow option -$OPTARG" 76 echo "BAD USAGE : unknow option -$OPTARG"
77 #exit 1 77 #exit 1
78 ;; 78 ;;
79 esac 79 esac
80 done 80 done
81 81
82 # mode debug enable 82 # mode debug enable
83 if [ $DEBUG -eq 1 ] 83 if [ $DEBUG -eq 1 ]
84 then 84 then
85 set -x 85 set -x
86 echo -e "## Mode DEBUG ON ##" 86 echo -e "## Mode DEBUG ON ##"
87 fi 87 fi
88 88
89 # mode verbose enable 89 # mode verbose enable
90 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 90 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
91 91
92 # Check USAGE by arguments number 92 # Check USAGE by arguments number
93 if [ $(($#-($OPTIND-1))) -ne 2 ] 93 if [ $(($#-($OPTIND-1))) -ne 2 ]
94 then 94 then
95 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" 95 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>"
96 echo "$0 -h for more info" 96 echo "$0 -h for more info"
97 exit 1 97 exit 1
98 fi 98 fi
99 99
100 shift $((OPTIND-1)) 100 shift $((OPTIND-1))
101 # check input directory - first argument 101 # check input directory - first argument
102 if [ ! -e $1 ] 102 if [ ! -e $1 ]
103 then 103 then
104 print_error "can't open $1" 104 print_error "can't open $1"
105 exit 1 105 exit 1
106 fi 106 fi
107 # check treil input directory - second argument 107 # check treil input directory - second argument
108 if [ ! -e $1/$2 ] 108 if [ ! -e $1/$2 ]
109 then 109 then
110 print_error "can't open $1/$2" 110 print_error "can't open $1/$2"
111 exit 1 111 exit 1
112 fi 112 fi
113 113
114 #-------------# 114 #-------------#
115 # GLOBAL VARS # 115 # GLOBAL VARS #
116 #-------------# 116 #-------------#
117 INPUT_DIR=$(readlink -e $1) 117 INPUT_DIR=$(readlink -e $1)
118 OUTPUT_DIR=$INPUT_DIR 118 OUTPUT_DIR=$INPUT_DIR
119 BASENAME=$(basename $OUTPUT_DIR) 119 BASENAME=$(basename $OUTPUT_DIR)
120 RES_NAME=$2 120 RES_NAME=$2
121 RES_P="${INPUT_DIR}/${RES_NAME}" 121 RES_P="${INPUT_DIR}/${RES_NAME}"
122 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf 122 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf
123 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" 123 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME"
124 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" 124 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm"
125 LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" 125 LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log"
126 ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" 126 ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log"
127 127
128 print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1 128 print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1
129 129
130 #------------------# 130 #------------------#
131 # Create Workspace # 131 # Create Workspace #
132 #------------------# 132 #------------------#
133 # Lock directory 133 # Lock directory
134 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] 134 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ]
135 then 135 then
136 print_warn "[${BASENAME}] Confpass is locked -> exit" 2 136 print_warn "[${BASENAME}] Confpass is locked -> exit" 2
137 exit 1 137 exit 1
138 fi 138 fi
139 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 139 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1
140 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 140 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1
141 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi 141 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi
142 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi 142 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi
143 mkdir -p $CONF_DIR > /dev/null 2>&1 143 mkdir -p $CONF_DIR > /dev/null 2>&1
144 mkdir -p $RES_CONF_DIR > /dev/null 2>&1 144 mkdir -p $RES_CONF_DIR > /dev/null 2>&1
145 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 145 rm $LOGFILE $ERRORFILE > /dev/null 2>&1
146 146
147 #---------------# 147 #---------------#
148 # Check Pass # 148 # Check Pass #
149 #---------------# 149 #---------------#
150 print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 150 print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1
151 # if usf contains more than 49% of 0.600 confidence -> usf error 151 # if usf contains more than 49% of 0.600 confidence -> usf error
152 if [ -s $USF_FILE ] 152 if [ -s $USF_FILE ]
153 then 153 then
154 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") 154 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}")
155 confall=$(grep -c -E 'confidence=' "${USF_FILE}") 155 confall=$(grep -c -E 'confidence=' "${USF_FILE}")
156 if [ $confall -gt 0 ] 156 if [ $confall -gt 0 ]
157 then 157 then
158 pourcentageofzerosix=$((($conftozerosix*100)/$confall)) 158 pourcentageofzerosix=$((($conftozerosix*100)/$confall))
159 if [ $pourcentageofzerosix -gt 49 ] 159 if [ $pourcentageofzerosix -gt 49 ]
160 then 160 then
161 print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 161 print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2
162 print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 162 print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1
163 mv "${USF_FILE}" "${USF_FILE}.back" 163 mv "${USF_FILE}" "${USF_FILE}.back"
164 rm -r $CONF_DIR > /dev/null 2>&1 164 rm -r $CONF_DIR > /dev/null 2>&1
165 else 165 else
166 print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 166 print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1
167 exit 0 167 exit 0
168 fi 168 fi
169 fi 169 fi
170 else 170 else
171 print_info "[${BASENAME}] No USF file already done, continue..." 1 171 print_info "[${BASENAME}] No USF file already done, continue..." 1
172 fi 172 fi
173 173
174 # Check if treil are here 174 # Check if treil are here
175 nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) 175 nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l)
176 nbtreil_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l) 176 nbtreil_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l)
177 if [ $nbtreil_p -eq 0 ] 177 if [ $nbtreil_p -eq 0 ]
178 then 178 then
179 print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" 179 print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass"
180 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi 180 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi
181 exit 1 181 exit 1
182 else 182 else
183 #Check if more then 89% of treil are done 183 #Check if more then 89% of treil are done
184 if [ $nbres_p1 -gt 0 ] 184 if [ $nbres_p1 -gt 0 ]
185 then 185 then
186 pourcentage=$((($nbtreil_p*100)/$nbres_p1)) 186 pourcentage=$((($nbtreil_p*100)/$nbres_p1))
187 if [ $pourcentage -gt 89 ] 187 if [ $pourcentage -gt 89 ]
188 then 188 then
189 print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 189 print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1
190 else 190 else
191 print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 191 print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2
192 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil ";fi 192 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil ";fi
193 fi 193 fi
194 fi 194 fi
195 fi 195 fi
196 196
197 #------# 197 #------#
198 # Save # 198 # Save #
199 #------# 199 #------#
200 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg 200 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg
201 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 201 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
202 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 202 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
203 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 203 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1
204 204
205 #--------------------# 205 #--------------------#
206 # CONFIDENCE MEASURE # 206 # CONFIDENCE MEASURE #
207 #--------------------# 207 #--------------------#
208 208
209 # Check pourcentage of scored_ctm already done, if < 85% do confidence measure 209 # Check pourcentage of scored_ctm already done, if < 85% do confidence measure
210 nbres_p=$(ls ${RES_P}/*.treil | wc -l) 210 nbres_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l)
211 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 211 nbconf=$(ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l)
212 if [ $nbres_p -gt 0 ] 212 if [ $nbres_p -gt 0 ]
213 then 213 then
214 pourcentageres=$((($nbconf*100)/$nbres_p)) 214 pourcentageres=$((($nbconf*100)/$nbres_p))
215 if [ $pourcentageres -lt 85 ] 215 if [ $pourcentageres -lt 85 ]
216 then 216 then
217 print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 217 print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1
218 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME 218 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME
219 219
220 else 220 else
221 print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 221 print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1
222 fi 222 fi
223 fi 223 fi
224 224
225 ### Check scored_ctm number res files ! 225 ### Check scored_ctm number res files !
226 if [ $CHECK -eq 1 ] 226 if [ $CHECK -eq 1 ]
227 then 227 then
228 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 228 nbconf=$(ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l)
229 if [ $nbres_p -ne $nbconf ] 229 if [ $nbres_p -ne $nbconf ]
230 then 230 then
231 print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 231 print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2
232 print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 232 print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ"
233 fi 233 fi
234 fi 234 fi
235 235
236 #---------------------------# 236 #---------------------------#
237 # FROM RES WITH CONF => USF # 237 # FROM RES WITH CONF => USF #
238 #---------------------------# 238 #---------------------------#
239 print_info "[${BASENAME}] Create USF file for $RES_P" 1 239 print_info "[${BASENAME}] Create USF file for $RES_P" 1
240 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done 240 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done
241 # create USF configuration file 241 # create USF configuration file
242 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg 242 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg
243 # create USF file 243 # create USF file
244 print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3 244 print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3
245 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg 245 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg
246 rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 246 rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1
247 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE 247 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE
248 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf 248 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf
249 rm $USF_FILE.tmp > /dev/null 2>&1 249 rm $USF_FILE.tmp > /dev/null 2>&1
250 250
251 #----------------# 251 #----------------#
252 # Check USF file # 252 # Check USF file #
253 #----------------# 253 #----------------#
254 if [ $CHECK -eq 1 ] 254 if [ $CHECK -eq 1 ]
255 then 255 then
256 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" 256 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf"
257 if [ $? -eq 1 ] 257 if [ $? -eq 1 ]
258 then 258 then
259 print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" 259 print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf"
260 print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" 260 print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf"
261 exit 1 261 exit 1
262 fi 262 fi
263 fi 263 fi
264 264
265 #-------# 265 #-------#
266 # CLOSE # 266 # CLOSE #
267 #-------# 267 #-------#
268 # Seem OK 268 # Seem OK
269 print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 269 print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1
270 270
271 # unlock directory 271 # unlock directory
272 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" 272 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock"
273 273
274 274
main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 echo "### ExploitConfidencePass.sh ###" 16 echo "### ExploitConfidencePass.sh ###"
17 17
18 # Check OTMEDIA_HOME env var 18 # Check OTMEDIA_HOME env var
19 if [ -z ${OTMEDIA_HOME} ] 19 if [ -z ${OTMEDIA_HOME} ]
20 then 20 then
21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
22 export OTMEDIA_HOME=$OTMEDIA_HOME 22 export OTMEDIA_HOME=$OTMEDIA_HOME
23 fi 23 fi
24 24
25 # where is ExploitConfidencePass.sh 25 # where is ExploitConfidencePass.sh
26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
27 27
28 if [ -z ${SCRIPT_PATH} ] 28 if [ -z ${SCRIPT_PATH} ]
29 then 29 then
30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
31 fi 31 fi
32 32
33 # Include scripts 33 # Include scripts
34 . $SCRIPT_PATH"/Tools.sh" 34 . $SCRIPT_PATH"/Tools.sh"
35 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 35 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
36 36
37 # where is ExploitConfidencePass.cfg 37 # where is ExploitConfidencePass.cfg
38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
40 then 40 then
41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
42 else 42 else
43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
44 exit 1 44 exit 1
45 fi 45 fi
46 46
47 #---------------# 47 #---------------#
48 # Parse Options # 48 # Parse Options #
49 #---------------# 49 #---------------#
50 while getopts ":hDv:cf:r" opt 50 while getopts ":hDv:cf:r" opt
51 do 51 do
52 case $opt in 52 case $opt in
53 h) 53 h)
54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
55 echo -e "\t Options:" 55 echo -e "\t Options:"
56 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
57 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
59 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
61 echo -e "\t\t-r n :\tforce rerun without deleting files" 61 echo -e "\t\t-r n :\tforce rerun without deleting files"
62 exit 1 62 exit 1
63 ;; 63 ;;
64 D) 64 D)
65 DEBUG=1 65 DEBUG=1
66 ;; 66 ;;
67 v) 67 v)
68 VERBOSE=$OPTARG 68 VERBOSE=$OPTARG
69 ;; 69 ;;
70 c) 70 c)
71 CHECK=1 71 CHECK=1
72 ;; 72 ;;
73 f) 73 f)
74 FORKS="--forks $OPTARG" 74 FORKS="--forks $OPTARG"
75 ;; 75 ;;
76 r) 76 r)
77 RERUN=1 77 RERUN=1
78 ;; 78 ;;
79 :) 79 :)
80 echo "Option -$OPTARG requires an argument." >&2 80 echo "Option -$OPTARG requires an argument." >&2
81 exit 1 81 exit 1
82 ;; 82 ;;
83 \?) 83 \?)
84 echo "BAD USAGE : unknow opton -$OPTARG" 84 echo "BAD USAGE : unknow opton -$OPTARG"
85 #exit 1 85 #exit 1
86 ;; 86 ;;
87 esac 87 esac
88 done 88 done
89 89
90 # mode debug enable 90 # mode debug enable
91 if [ $DEBUG -eq 1 ] 91 if [ $DEBUG -eq 1 ]
92 then 92 then
93 set -x 93 set -x
94 echo -e "## Mode DEBUG ON ##" 94 echo -e "## Mode DEBUG ON ##"
95 fi 95 fi
96 96
97 # mode verbose enable 97 # mode verbose enable
98 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 98 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
99 99
100 # Check USAGE by arguments number 100 # Check USAGE by arguments number
101 if [ $(($#-($OPTIND-1))) -ne 1 ] 101 if [ $(($#-($OPTIND-1))) -ne 1 ]
102 then 102 then
103 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 103 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
104 echo "$0 -h for more info" 104 echo "$0 -h for more info"
105 exit 1 105 exit 1
106 fi 106 fi
107 107
108 shift $((OPTIND-1)) 108 shift $((OPTIND-1))
109 # check input directory - first argument 109 # check input directory - first argument
110 if [ ! -e $1 ] 110 if [ ! -e $1 ]
111 then 111 then
112 print_error "can't open $1" 112 print_error "can't open $1"
113 exit 1 113 exit 1
114 fi 114 fi
115 115
116 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 116 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1
117 117
118 #-------------# 118 #-------------#
119 # GLOBAL VARS # 119 # GLOBAL VARS #
120 #-------------# 120 #-------------#
121 INPUT_DIR=$(readlink -e $1) 121 INPUT_DIR=$(readlink -e $1)
122 OUTPUT_DIR=$INPUT_DIR 122 OUTPUT_DIR=$INPUT_DIR
123 BASENAME=$(basename $OUTPUT_DIR) 123 BASENAME=$(basename $OUTPUT_DIR)
124 SHOW_DIR="$OUTPUT_DIR/shows/" 124 SHOW_DIR="$OUTPUT_DIR/shows/"
125 SOLR_RES="$OUTPUT_DIR/solr/" 125 SOLR_RES="$OUTPUT_DIR/solr/"
126 EXT_LEX="$OUTPUT_DIR/LEX/" 126 EXT_LEX="$OUTPUT_DIR/LEX/"
127 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 127 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
128 LOGFILE="$OUTPUT_DIR/info_exploitconf.log" 128 LOGFILE="$OUTPUT_DIR/info_exploitconf.log"
129 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log" 129 ERRORFILE="$OUTPUT_DIR/error_exploitconf.log"
130 130
131 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 131 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
132 if [ -e $CONFPASS_CONFIG_FILE ] 132 if [ -e $CONFPASS_CONFIG_FILE ]
133 then 133 then
134 { 134 {
135 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 135 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
136 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 136 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
137 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 137 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
138 } 138 }
139 else 139 else
140 { 140 {
141 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" 141 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
142 print_error "[${BASENAME}] -> use res_p2" 142 print_error "[${BASENAME}] -> use res_p2"
143 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 143 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
144 RES_CONF="$INPUT_DIR/conf/res_p2" 144 RES_CONF="$INPUT_DIR/conf/res_p2"
145 } 145 }
146 fi 146 fi
147 147
148 mkdir -p $SHOW_DIR > /dev/null 2>&1 148 mkdir -p $SHOW_DIR > /dev/null 2>&1
149 mkdir -p $SOLR_RES > /dev/null 2>&1 149 mkdir -p $SOLR_RES > /dev/null 2>&1
150 mkdir -p $EXT_LEX > /dev/null 2>&1 150 mkdir -p $EXT_LEX > /dev/null 2>&1
151 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 151 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
152 152
153 #------------------# 153 #------------------#
154 # Create Workspace # 154 # Create Workspace #
155 #------------------# 155 #------------------#
156 # Lock directory 156 # Lock directory
157 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] 157 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
158 then 158 then
159 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 159 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
160 exit 1 160 exit 1
161 fi 161 fi
162 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 162 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
163 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 163 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
164 164
165 #------# 165 #------#
166 # Save # 166 # Save #
167 #------# 167 #------#
168 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 168 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
169 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 169 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
170 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 170 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
171 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 171 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
172 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 172 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
173 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 173 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
174 174
175 #---------------# 175 #---------------#
176 # Check Pass # 176 # Check Pass #
177 #---------------# 177 #---------------#
178 if [ $( ${RES_CONf_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ] 178 if [ $( ${RES_CONf_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ]
179 then 179 then
180 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass" 180 print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass"
181 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in $[RES_CONf_DIR}" ;fi 181 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONf_DIR}" ;fi
182 exit 1 182 exit 1
183 fi 183 fi
184 184
185 #-----------------------# 185 #-----------------------#
186 # Segmentation by show # 186 # Segmentation by show #
187 #-----------------------# 187 #-----------------------#
188 # create txt file from scored res 188 # create txt file from scored res
189 # tag pos and lemmatization of the txt file 189 # tag pos and lemmatization of the txt file
190 # merge the scored res and taglem file 190 # merge the scored res and taglem file
191 # segment using the last generated file 191 # segment using the last generated file
192 # and create a ctm file by show 192 # and create a ctm file by show
193 193
194 print_info "[${BASENAME}] Segmentation by show" 1 194 print_info "[${BASENAME}] Segmentation by show" 1
195 195
196 # -> to txt 196 # -> to txt
197 print_info "[${BASENAME}] Create txt from scored res" 3 197 print_info "[${BASENAME}] Create txt from scored res" 3
198 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 198 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
199 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 199 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
200 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 200 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
201 201
202 # -> to tagger + lemme 202 # -> to tagger + lemme
203 print_info "[${BASENAME}] Tag pos and lem in txt file" 3 203 print_info "[${BASENAME}] Tag pos and lem in txt file" 3
204 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 204 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
205 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 205 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
206 206
207 # merge sctm and taglem 207 # merge sctm and taglem
208 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3 208 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3
209 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 209 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
210 210
211 # -> new seg 211 # -> new seg
212 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3 212 print_info "[${BASENAME}] Create xml file and run Topic Seg" 3
213 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 213 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
214 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 214 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
215 215
216 # Lia_topic_seg : bring together sentences into show 216 # Lia_topic_seg : bring together sentences into show
217 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 217 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
218 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 218 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
219 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 219 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
220 rm 0.xml $INPUT_DIR/show.seg 220 rm 0.xml $INPUT_DIR/show.seg
221 221
222 if [ $CHECK -eq 1 ] 222 if [ $CHECK -eq 1 ]
223 then 223 then
224 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ] 224 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ]
225 then 225 then
226 print_error "[${BASENAME}] No Topic segmentation ! " 226 print_error "[${BASENAME}] No Topic segmentation ! "
227 print_error "[${BASENAME}] Check $ERRORFILE " 227 print_error "[${BASENAME}] Check $ERRORFILE "
228 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg" 228 print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg"
229 fi 229 fi
230 fi 230 fi
231 231
232 # Segment ctm into several show files and create a seg list by show 232 # Segment ctm into several show files and create a seg list by show
233 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1 233 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1
234 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR 234 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR
235 235
236 #-----------------------------------------------------------# 236 #-----------------------------------------------------------#
237 # SOLR QUERIES # 237 # SOLR QUERIES #
238 # -> Create Confidente Word # 238 # -> Create Confidente Word #
239 # Keep conf words and use Tags # 239 # Keep conf words and use Tags #
240 # -> Query SOLR (document & multimedia) # 240 # -> Query SOLR (document & multimedia) #
241 # concat word + add date 2 day before and after the show # 241 # concat word + add date 2 day before and after the show #
242 # query document & multimedia # 242 # query document & multimedia #
243 #-----------------------------------------------------------# 243 #-----------------------------------------------------------#
244 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1 244 print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1
245 for show in $(ls $SHOW_DIR/*.ctm) 245 for show in $(ls $SHOW_DIR/*.ctm)
246 do 246 do
247 bn=$(basename $show .ctm) 247 bn=$(basename $show .ctm)
248 # Remove words with low confidence and keep useful tagger words 248 # Remove words with low confidence and keep useful tagger words
249 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 249 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
250 # Get date 2 day before and after the show 250 # Get date 2 day before and after the show
251 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 251 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
252 # Create SOLR queries 252 # Create SOLR queries
253 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 253 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
254 # Ask SOLR DB 254 # Ask SOLR DB
255 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 255 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
256 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 256 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
257 echo $query > $SHOW_DIR/$bn.queries 257 echo $query > $SHOW_DIR/$bn.queries
258 prnt_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3 258 prnt_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3
259 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp 259 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp
260 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 260 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
261 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 261 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
262 rm $SOLR_RES/*.tmp > /dev/null 2>&1 262 rm $SOLR_RES/*.tmp > /dev/null 2>&1
263 fi 263 fi
264 264
265 if [ $CHECK -eq 1 ] 265 if [ $CHECK -eq 1 ]
266 then 266 then
267 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 267 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
268 then 268 then
269 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2 269 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2
270 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 270 print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !"
271 fi 271 fi
272 fi 272 fi
273 273
274 done 274 done
275 275
276 #----------------------------------------------------------------------------------------------- 276 #-----------------------------------------------------------------------------------------------
277 # Build trigger file 277 # Build trigger file
278 # 1) keywords are automatically boosted in the non confident zone of the current res 278 # 1) keywords are automatically boosted in the non confident zone of the current res
279 # confident zone are boosted 279 # confident zone are boosted
280 # previous words in sensible zone are penalized 280 # previous words in sensible zone are penalized
281 # 2) OOVs are extracted + phonetized 281 # 2) OOVs are extracted + phonetized
282 # 3) Try to find OOVs acousticly in the current segment 282 # 3) Try to find OOVs acousticly in the current segment
283 # 4) Generate the .trigg file 283 # 4) Generate the .trigg file
284 #------------------------------------------------------------------------------------------------ 284 #------------------------------------------------------------------------------------------------
285 print_info "[${BASENAME}] Build trigger files" 1 285 print_info "[${BASENAME}] Build trigger files" 1
286 for i in `ls $SOLR_RES/*.keywords` 286 for i in `ls $SOLR_RES/*.keywords`
287 do 287 do
288 basename=`basename $i .keywords` 288 basename=`basename $i .keywords`
289 289
290 # 290 #
291 # Tokenize & produce coverage report 291 # Tokenize & produce coverage report
292 # Use filter you need 292 # Use filter you need
293 # 293 #
294 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3 294 print_info "[${BASENAME}] keywords filtering and produce coverage report" 3
295 # Default filter 295 # Default filter
296 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 296 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
297 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 297 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
298 # do less filter 298 # do less filter
299 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 299 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
300 300
301 301
302 # 302 #
303 # Extract "real" OOV and phonetize them 303 # Extract "real" OOV and phonetize them
304 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 304 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
305 # 305 #
306 print_info "[${BASENAME}] Extract OOV and phonetize them" 3 306 print_info "[${BASENAME}] Extract OOV and phonetize them" 3
307 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 307 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
308 308
309 # 309 #
310 # Search INVOC & OOV in the current lattice 310 # Search INVOC & OOV in the current lattice
311 # 311 #
312 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3 312 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3
313 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 313 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
314 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 314 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
315 315
316 # For each treil 316 # For each treil
317 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 317 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
318 do 318 do
319 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION 319 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
320 # 320 #
321 # Produce the boost file for the next decoding pass 321 # Produce the boost file for the next decoding pass
322 # 322 #
323 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 323 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3
324 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 324 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
325 done 325 done
326 326
327 done 327 done
328 328
329 #----------------------------------------------------------------------------------------------- 329 #-----------------------------------------------------------------------------------------------
330 # Build the extended SPEERAL Lexicon 330 # Build the extended SPEERAL Lexicon
331 # 1) Merge OOVs + LEXICON 331 # 1) Merge OOVs + LEXICON
332 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 332 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
333 # 2) The current lexicon is extended with all the valid OOVs 333 # 2) The current lexicon is extended with all the valid OOVs
334 #----------------------------------------------------------------------------------------------- 334 #-----------------------------------------------------------------------------------------------
335 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 335 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1
336 mkdir -p $EXT_LEX/final 336 mkdir -p $EXT_LEX/final
337 mkdir -p $EXT_LEX/tmp 337 mkdir -p $EXT_LEX/tmp
338 mkdir -p $EXT_LEX/tmp/txt 338 mkdir -p $EXT_LEX/tmp/txt
339 # 339 #
340 # Collect the acousticly found oov and their phonetisation 340 # Collect the acousticly found oov and their phonetisation
341 # 341 #
342 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3 342 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3
343 for i in `ls $SOLR_RES/*.phon_oov` 343 for i in `ls $SOLR_RES/*.phon_oov`
344 do 344 do
345 basename=`basename $i .phon_oov` 345 basename=`basename $i .phon_oov`
346 346
347 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 347 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
348 # list acousticly found for the show 348 # list acousticly found for the show
349 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 349 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
350 do 350 do
351 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 351 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
352 done 352 done
353 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 353 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
354 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 354 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
355 355
356 # 356 #
357 # Extract OOV really added 357 # Extract OOV really added
358 # 358 #
359 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 359 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
360 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound 360 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound
361 # 361 #
362 # Retrieve all phonetisation 362 # Retrieve all phonetisation
363 # 363 #
364 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 364 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
365 done 365 done
366 366
367 # 367 #
368 # Merge OOVs and their phonetisation 368 # Merge OOVs and their phonetisation
369 # 369 #
370 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3 370 print_info "[${BASENAME}] Merge OOV and their phonetisation" 3
371 lexname=$(basename $LEXICON) 371 lexname=$(basename $LEXICON)
372 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 372 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
373 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 373 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
374 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon 374 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon
375 375
376 # 376 #
377 # Collect + clean retrieved txt 377 # Collect + clean retrieved txt
378 # 378 #
379 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 379 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2
380 # choose filter 380 # choose filter
381 # default 381 # default
382 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 382 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
383 # low filter 383 # low filter
384 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 384 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
385 385
386 # 386 #
387 # Construct the map file 387 # Construct the map file
388 # 388 #
389 # Notes: 389 # Notes:
390 # - Expected format : 390 # - Expected format :
391 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 391 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
392 # 392 #
393 print_info "[${BASENAME}] Construct map file" 3 393 print_info "[${BASENAME}] Construct map file" 3
394 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 394 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
395 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 395 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
396 396
397 while read oov 397 while read oov
398 do 398 do
399 oov=`echo $oov | sed "s/\n//g"` 399 oov=`echo $oov | sed "s/\n//g"`
400 # 400 #
401 # Obtain the oov's tag 401 # Obtain the oov's tag
402 # 402 #
403 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 403 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
404 # 404 #
405 # Try to collect text containing the oov word 405 # Try to collect text containing the oov word
406 # 406 #
407 print_info "[${BASENAME}] Collect text containing the oov" 3 407 print_info "[${BASENAME}] Collect text containing the oov" 3
408 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 408 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
409 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 409 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
410 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 410 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
411 if [ $nbWords -eq 0 ]; then 411 if [ $nbWords -eq 0 ]; then
412 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2 412 print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2
413 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 413 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
414 else 414 else
415 # 415 #
416 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 416 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
417 # 417 #
418 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 418 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
419 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3 419 print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3
420 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 420 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
421 if [ ! "$candidate" == "" ]; then 421 if [ ! "$candidate" == "" ]; then
422 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 422 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
423 while read phonLine 423 while read phonLine
424 do 424 do
425 #<word> <phon> => <word> <candidate> <phon> 425 #<word> <phon> => <word> <candidate> <phon>
426 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 426 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
427 done < $EXT_LEX/tmp/$oov.phon 427 done < $EXT_LEX/tmp/$oov.phon
428 else 428 else
429 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2 429 print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2
430 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 430 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
431 fi 431 fi
432 fi 432 fi
433 else 433 else
434 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2 434 print_warn "[${BASENAME}] UNVALID OOV: $oov" 2
435 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 435 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
436 fi 436 fi
437 done < $EXT_LEX/final/all.oov_acousticlyfound 437 done < $EXT_LEX/final/all.oov_acousticlyfound
438 438
439 # 439 #
440 ### Speeral 440 ### Speeral
441 # 441 #
442 442
443 lexname=`basename $LEXICON` 443 lexname=`basename $LEXICON`
444 # 444 #
445 # Build the final trigger file 445 # Build the final trigger file
446 # 446 #
447 print_info "[${BASENAME}] Clean trigg files" 3 447 print_info "[${BASENAME}] Clean trigg files" 3
448 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 448 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
449 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 449 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
450 for i in `ls $TRIGGER_CONFZONE/*.trigg` 450 for i in `ls $TRIGGER_CONFZONE/*.trigg`
451 do 451 do
452 basename=`basename $i .trigg` 452 basename=`basename $i .trigg`
453 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 453 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
454 done 454 done
455 # 455 #
456 # Compile the speeral extended lexicon 456 # Compile the speeral extended lexicon
457 # 457 #
458 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3 458 print_info "[${BASENAME}] Compile Speeral extended lexicon" 3
459 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3 459 print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3
460 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext 460 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext
461 461
462 if [ $CHECK -eq 1 ] 462 if [ $CHECK -eq 1 ]
463 then 463 then
464 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 464 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
465 if [ $? -eq 1 ] 465 if [ $? -eq 1 ]
466 then 466 then
467 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit" 467 print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit"
468 print_error "[${BASENAME}] Check $ERRORFILE" 468 print_error "[${BASENAME}] Check $ERRORFILE"
469 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR" 469 print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR"
470 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?" 470 print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?"
471 exit 1; 471 exit 1;
472 fi 472 fi
473 fi 473 fi
474 474
475 475
476 #-------# 476 #-------#
477 # CLOSE # 477 # CLOSE #
478 #-------# 478 #-------#
479 # Seem OK 479 # Seem OK
480 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1 480 print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1
481 481
482 # unlok directory 482 # unlok directory
483 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 483 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
484 484
485 485
486 486