ExploitConfidencePass.sh
18.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
#!/bin/bash
#####################################################
# File : ExploitConfidencePass.sh #
# Brief : Exploit the ASR confidence pass to : #
# -> boost the confident zone #
# -> find alternative in non confident zone
# -> dynamicly extend the lexicon #
# Author : Jean-François Rey #
# (base on Emmanuel Ferreira #
# and Hugo Mauchrétien works) #
# Version : 1.0 #
# Date : 25/06/13 #
#####################################################
echo "### ExploitConfidencePass.sh ###"
# Check OTMEDIA_HOME env var
if [ -z ${OTMEDIA_HOME} ]
then
OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
export OTMEDIA_HOME=$OTMEDIA_HOME
fi
# where is ExploitConfidencePass.sh
MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
if [ -z ${SCRIPT_PATH} ]
then
SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
fi
# Include scripts
. $SCRIPT_PATH"/Tools.sh"
. $SCRIPT_PATH"/CheckExploitConfPass.sh"
# where is ExploitConfidencePass.cfg
EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
then
. $EXPLOITCONFIDENCEPASS_CONFIG_FILE
else
echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
exit 1
fi
#---------------#
# Parse Options #
#---------------#
while getopts ":hDv:cr" opt
do
case $opt in
h)
echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
echo -e "\t Options:"
echo -e "\t\t-h :\tprint this message"
echo -e "\t\t-D :\tDEBUG mode on"
echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
echo -e "\t\t-c :\tCheck process, stop if error detected"
echo -e "\t\t-r n :\tforce rerun without deleting files"
exit 1
;;
D)
DEBUG=1
;;
v)
VERBOSE=$OPTARG
;;
c)
CHECK=1
;;
r)
RERUN=1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
\?)
echo "BAD USAGE : unknow opton -$OPTARG"
#exit 1
;;
esac
done
# mode debug enable
if [ $DEBUG -eq 1 ]
then
set -x
echo -e "## Mode DEBUG ON ##"
fi
# mode verbose enable
if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
# Check USAGE by arguments number
if [ $(($#-($OPTIND-1))) -ne 1 ]
then
echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
echo "$0 -h for more info"
exit 1
fi
shift $((OPTIND-1))
# check input directory - first argument
if [ ! -e $1 ]
then
print_error "can't open $1"
exit 1
fi
print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1
#-------------#
# GLOBAL VARS #
#-------------#
INPUT_DIR=$(readlink -e $1)
OUTPUT_DIR=$INPUT_DIR
BASENAME=$(basename $OUTPUT_DIR)
SHOW_DIR="$OUTPUT_DIR/shows/"
SOLR_RES="$OUTPUT_DIR/solr/"
EXT_LEX="$OUTPUT_DIR/LEX/"
TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
LOGFILE="$OUTPUT_DIR/info_exploitconf.log"
ERRORFILE="$OUTPUT_DIR/error_exploitconf.log"
CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
if [ -e $CONFPASS_CONFIG_FILE ]
then
{
RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
}
else
{
print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
print_error "[${BASENAME}] -> use res_p2"
RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
RES_CONF="$INPUT_DIR/conf/res_p2"
}
fi
mkdir -p $SHOW_DIR > /dev/null 2>&1
mkdir -p $SOLR_RES > /dev/null 2>&1
mkdir -p $EXT_LEX > /dev/null 2>&1
mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
#------------------#
# Create Workspace #
#------------------#
# Lock directory
if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
then
print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
exit 1
fi
rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
#------#
# Save #
#------#
cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
#---------------#
# Check Pass #
#---------------#
if [ $( ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ]
then
print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass"
if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONF_DIR}" ;fi
exit 1
fi
#-----------------------#
# Segmentation by show #
#-----------------------#
# create txt file from scored res
# tag pos and lemmatization of the txt file
# merge the scored res and taglem file
# segment using the last generated file
# and create a ctm file by show
print_info "[${BASENAME}] Segmentation by show" 1
# -> to txt
print_info "[${BASENAME}] Create txt from scored res" 3
cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
# -> to tagger + lemme
print_info "[${BASENAME}] Tag pos and lem in txt file" 3
iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
$SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
# merge sctm and taglem
print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3
cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
# -> new seg
print_info "[${BASENAME}] Create xml file and run Topic Seg" 3
$SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
# Lia_topic_seg : bring together sentences into show
cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
rm 0.xml $INPUT_DIR/show.seg
if [ $CHECK -eq 1 ]
then
if [ ! -s $INPUT_DIR/$BASENAME.show.seg ]
then
print_error "[${BASENAME}] No Topic segmentation ! "
print_error "[${BASENAME}] Check $ERRORFILE "
print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg"
fi
fi
# Segment ctm into several show files and create a seg list by show
print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1
$SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR
#-----------------------------------------------------------#
# SOLR QUERIES #
# -> Create Confidente Word #
# Keep conf words and use Tags #
# -> Query SOLR (document & multimedia) #
# concat word + add date 2 day before and after the show #
# query document & multimedia #
#-----------------------------------------------------------#
print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1
for show in $(ls $SHOW_DIR/*.ctm)
do
bn=$(basename $show .ctm)
# Remove words with low confidence and keep useful tagger words
cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
# Get date 2 day before and after the show
datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
# Create SOLR queries
cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
# Ask SOLR DB
if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
echo $query > $SHOW_DIR/$bn.queries
print_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3
python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp
cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
rm $SOLR_RES/*.tmp > /dev/null 2>&1
fi
if [ $CHECK -eq 1 ]
then
if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
then
print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2
print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !"
fi
fi
done
#-----------------------------------------------------------------------------------------------
# Build trigger file
# 1) keywords are automatically boosted in the non confident zone of the current res
# confident zone are boosted
# previous words in sensible zone are penalized
# 2) OOVs are extracted + phonetized
# 3) Try to find OOVs acousticly in the current segment
# 4) Generate the .trigg file
#------------------------------------------------------------------------------------------------
print_info "[${BASENAME}] Build trigger files" 1
for i in `ls $SOLR_RES/*.keywords`
do
basename=`basename $i .keywords`
#
# Tokenize & produce coverage report
# Use filter you need
#
print_info "[${BASENAME}] keywords filtering and produce coverage report" 3
# Default filter
cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
$SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
# do less filter
#cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
#
# Extract "real" OOV and phonetize them
# -> petit filtrage persoo pour eviter d'avoir trop de bruits
#
print_info "[${BASENAME}] Extract OOV and phonetize them" 3
${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
#
# Search INVOC & OOV in the current lattice
#
print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3
cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
# For each treil
for baseseg in $(cat "$SHOW_DIR/$basename.lst")
do
$OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
#
# Produce the boost file for the next decoding pass
#
print_info "[${BASENAME}] Produce trigg file : $baseseg " 3
cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
done
done
#-----------------------------------------------------------------------------------------------
# Build the extended SPEERAL Lexicon
# 1) Merge OOVs + LEXICON
# 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
# 2) The current lexicon is extended with all the valid OOVs
#-----------------------------------------------------------------------------------------------
print_info "[${BASENAME}] Build extended Speeral Lexicon" 1
mkdir -p $EXT_LEX/final
mkdir -p $EXT_LEX/tmp
mkdir -p $EXT_LEX/tmp/txt
#
# Collect the acousticly found oov and their phonetisation
#
print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3
for i in `ls $SOLR_RES/*.phon_oov`
do
basename=`basename $i .phon_oov`
rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
# list acousticly found for the show
for baseseg in $(cat "$SHOW_DIR/$basename.lst")
do
cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
done
cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
#
# Extract OOV really added
#
cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
$SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound
#
# Retrieve all phonetisation
#
cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
done
#
# Merge OOVs and their phonetisation
#
print_info "[${BASENAME}] Merge OOV and their phonetisation" 3
lexname=$(basename $LEXICON)
cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
$SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon
#
# Collect + clean retrieved txt
#
print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2
# choose filter
# default
cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
# low filter
#cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
#
# Construct the map file
#
# Notes:
# - Expected format :
# <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
#
print_info "[${BASENAME}] Construct map file" 3
rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
while read oov
do
oov=`echo $oov | sed "s/\n//g"`
#
# Obtain the oov's tag
#
#oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
#
# Try to collect text containing the oov word
#
print_info "[${BASENAME}] Collect text containing the oov" 3
cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
if [ $nbWords -eq 0 ]; then
print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2
echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
else
#
# Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
#
#echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3
candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
if [ ! "$candidate" == "" ]; then
grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
while read phonLine
do
#<word> <phon> => <word> <candidate> <phon>
echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
done < $EXT_LEX/tmp/$oov.phon
else
print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2
echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
fi
fi
else
print_warn "[${BASENAME}] UNVALID OOV: $oov" 2
echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
fi
done < $EXT_LEX/final/all.oov_acousticlyfound
#
### Speeral
#
lexname=`basename $LEXICON`
#
# Build the final trigger file
#
print_info "[${BASENAME}] Clean trigg files" 3
mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
mkdir -p $EXT_LEX/speeral/ 2> /dev/null
for i in `ls $TRIGGER_CONFZONE/*.trigg`
do
basename=`basename $i .trigg`
cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
done
#
# Compile the speeral extended lexicon
#
print_info "[${BASENAME}] Compile Speeral extended lexicon" 3
print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3
$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext
if [ $CHECK -eq 1 ]
then
check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
if [ $? -eq 1 ]
then
print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit"
print_error "[${BASENAME}] Check $ERRORFILE"
print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR"
print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?"
exit 1;
fi
fi
#-------#
# CLOSE #
#-------#
# Seem OK
print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1
# unlok directory
mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"