Commit 668cac4d1b992742db942dc3a086d8089a1da97d
1 parent
1e7751e7ca
Exists in
master
check if solr quieres is empty
Showing 1 changed file with 2 additions and 2 deletions Inline Diff
main_tools/ExploitConfidencePass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ExploitConfidencePass.sh # | 4 | # File : ExploitConfidencePass.sh # |
5 | # Brief : Exploit the ASR confidence pass to : # | 5 | # Brief : Exploit the ASR confidence pass to : # |
6 | # -> boost the confident zone # | 6 | # -> boost the confident zone # |
7 | # -> find alternative in non confident zone | 7 | # -> find alternative in non confident zone |
8 | # -> dynamicly extend the lexicon # | 8 | # -> dynamicly extend the lexicon # |
9 | # Author : Jean-François Rey # | 9 | # Author : Jean-François Rey # |
10 | # (base on Emmanuel Ferreira # | 10 | # (base on Emmanuel Ferreira # |
11 | # and Hugo Mauchrétien works) # | 11 | # and Hugo Mauchrétien works) # |
12 | # Version : 1.0 # | 12 | # Version : 1.0 # |
13 | # Date : 25/06/13 # | 13 | # Date : 25/06/13 # |
14 | ##################################################### | 14 | ##################################################### |
15 | 15 | ||
16 | # Check OTMEDIA_HOME env var | 16 | # Check OTMEDIA_HOME env var |
17 | if [ -z ${OTMEDIA_HOME} ] | 17 | if [ -z ${OTMEDIA_HOME} ] |
18 | then | 18 | then |
19 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 19 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
20 | export OTMEDIA_HOME=$OTMEDIA_HOME | 20 | export OTMEDIA_HOME=$OTMEDIA_HOME |
21 | fi | 21 | fi |
22 | 22 | ||
23 | # where is ExploitConfidencePass.sh | 23 | # where is ExploitConfidencePass.sh |
24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
25 | 25 | ||
26 | if [ -z ${SCRIPT_PATH} ] | 26 | if [ -z ${SCRIPT_PATH} ] |
27 | then | 27 | then |
28 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 28 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
29 | fi | 29 | fi |
30 | 30 | ||
31 | # Include scripts | 31 | # Include scripts |
32 | . $SCRIPT_PATH"/Tools.sh" | 32 | . $SCRIPT_PATH"/Tools.sh" |
33 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" | 33 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" |
34 | 34 | ||
35 | # where is ExploitConfidencePass.cfg | 35 | # where is ExploitConfidencePass.cfg |
36 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" | 36 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" |
37 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] | 37 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] |
38 | then | 38 | then |
39 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE | 39 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE |
40 | else | 40 | else |
41 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 | 41 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 |
42 | exit 1 | 42 | exit 1 |
43 | fi | 43 | fi |
44 | 44 | ||
45 | #---------------# | 45 | #---------------# |
46 | # Parse Options # | 46 | # Parse Options # |
47 | #---------------# | 47 | #---------------# |
48 | while getopts ":hDv:cf:r" opt | 48 | while getopts ":hDv:cf:r" opt |
49 | do | 49 | do |
50 | case $opt in | 50 | case $opt in |
51 | h) | 51 | h) |
52 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" | 52 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" |
53 | echo -e "\t Options:" | 53 | echo -e "\t Options:" |
54 | echo -e "\t\t-h :\tprint this message" | 54 | echo -e "\t\t-h :\tprint this message" |
55 | echo -e "\t\t-D :\tDEBUG mode on" | 55 | echo -e "\t\t-D :\tDEBUG mode on" |
56 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 56 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
57 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 57 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
58 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 58 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
59 | echo -e "\t\t-r n :\tforce rerun without deleting files" | 59 | echo -e "\t\t-r n :\tforce rerun without deleting files" |
60 | exit 1 | 60 | exit 1 |
61 | ;; | 61 | ;; |
62 | D) | 62 | D) |
63 | DEBUG=1 | 63 | DEBUG=1 |
64 | ;; | 64 | ;; |
65 | v) | 65 | v) |
66 | VERBOSE=$OPTARG | 66 | VERBOSE=$OPTARG |
67 | ;; | 67 | ;; |
68 | c) | 68 | c) |
69 | CHECK=1 | 69 | CHECK=1 |
70 | ;; | 70 | ;; |
71 | f) | 71 | f) |
72 | FORKS="--forks $OPTARG" | 72 | FORKS="--forks $OPTARG" |
73 | ;; | 73 | ;; |
74 | r) | 74 | r) |
75 | RERUN=1 | 75 | RERUN=1 |
76 | ;; | 76 | ;; |
77 | :) | 77 | :) |
78 | echo "Option -$OPTARG requires an argument." >&2 | 78 | echo "Option -$OPTARG requires an argument." >&2 |
79 | exit 1 | 79 | exit 1 |
80 | ;; | 80 | ;; |
81 | \?) | 81 | \?) |
82 | echo "BAD USAGE : unknow opton -$OPTARG" | 82 | echo "BAD USAGE : unknow opton -$OPTARG" |
83 | #exit 1 | 83 | #exit 1 |
84 | ;; | 84 | ;; |
85 | esac | 85 | esac |
86 | done | 86 | done |
87 | 87 | ||
88 | # mode debug enable | 88 | # mode debug enable |
89 | if [ $DEBUG -eq 1 ] | 89 | if [ $DEBUG -eq 1 ] |
90 | then | 90 | then |
91 | set -x | 91 | set -x |
92 | echo -e "## Mode DEBUG ON ##" | 92 | echo -e "## Mode DEBUG ON ##" |
93 | fi | 93 | fi |
94 | 94 | ||
95 | # mode verbose enable | 95 | # mode verbose enable |
96 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 96 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi |
97 | 97 | ||
98 | # Check USAGE by arguments number | 98 | # Check USAGE by arguments number |
99 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 99 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
100 | then | 100 | then |
101 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" | 101 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" |
102 | echo "$0 -h for more info" | 102 | echo "$0 -h for more info" |
103 | exit 1 | 103 | exit 1 |
104 | fi | 104 | fi |
105 | 105 | ||
106 | shift $((OPTIND-1)) | 106 | shift $((OPTIND-1)) |
107 | # check input directory - first argument | 107 | # check input directory - first argument |
108 | if [ ! -e $1 ] | 108 | if [ ! -e $1 ] |
109 | then | 109 | then |
110 | print_error "can't open $1" | 110 | print_error "can't open $1" |
111 | exit 1 | 111 | exit 1 |
112 | fi | 112 | fi |
113 | 113 | ||
114 | #-------------# | 114 | #-------------# |
115 | # GLOBAL VARS # | 115 | # GLOBAL VARS # |
116 | #-------------# | 116 | #-------------# |
117 | INPUT_DIR=$(readlink -e $1) | 117 | INPUT_DIR=$(readlink -e $1) |
118 | OUTPUT_DIR=$INPUT_DIR | 118 | OUTPUT_DIR=$INPUT_DIR |
119 | BASENAME=$(basename $OUTPUT_DIR) | 119 | BASENAME=$(basename $OUTPUT_DIR) |
120 | SHOW_DIR="$OUTPUT_DIR/shows/" | 120 | SHOW_DIR="$OUTPUT_DIR/shows/" |
121 | SOLR_RES="$OUTPUT_DIR/solr/" | 121 | SOLR_RES="$OUTPUT_DIR/solr/" |
122 | EXT_LEX="$OUTPUT_DIR/LEX/" | 122 | EXT_LEX="$OUTPUT_DIR/LEX/" |
123 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" | 123 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" |
124 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" | 124 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" |
125 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" | 125 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" |
126 | 126 | ||
127 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" | 127 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" |
128 | if [ -e $CONFPASS_CONFIG_FILE ] | 128 | if [ -e $CONFPASS_CONFIG_FILE ] |
129 | then | 129 | then |
130 | { | 130 | { |
131 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") | 131 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") |
132 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") | 132 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") |
133 | print_warn "Use confidence measure from : $RES_CONF" 1 | 133 | print_warn "Use confidence measure from : $RES_CONF" 1 |
134 | } | 134 | } |
135 | else | 135 | else |
136 | { | 136 | { |
137 | print_error "Can't find $CONFPASS_CONFIG_FILE" 1 | 137 | print_error "Can't find $CONFPASS_CONFIG_FILE" 1 |
138 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" | 138 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" |
139 | RES_CONF="$INPUT_DIR/conf/res_p2" | 139 | RES_CONF="$INPUT_DIR/conf/res_p2" |
140 | } | 140 | } |
141 | fi | 141 | fi |
142 | 142 | ||
143 | mkdir -p $SHOW_DIR | 143 | mkdir -p $SHOW_DIR |
144 | mkdir -p $SOLR_RES | 144 | mkdir -p $SOLR_RES |
145 | mkdir -p $EXT_LEX | 145 | mkdir -p $EXT_LEX |
146 | mkdir -p $TRIGGER_CONFZONE | 146 | mkdir -p $TRIGGER_CONFZONE |
147 | 147 | ||
148 | #------------------# | 148 | #------------------# |
149 | # Create Workspace # | 149 | # Create Workspace # |
150 | #------------------# | 150 | #------------------# |
151 | # Lock directory | 151 | # Lock directory |
152 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi | 152 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi |
153 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 | 153 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 |
154 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 | 154 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 |
155 | 155 | ||
156 | #------# | 156 | #------# |
157 | # Save # | 157 | # Save # |
158 | #------# | 158 | #------# |
159 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg | 159 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg |
160 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg | 160 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg |
161 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg | 161 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg |
162 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg | 162 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg |
163 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg | 163 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg |
164 | 164 | ||
165 | 165 | ||
166 | #-----------------------# | 166 | #-----------------------# |
167 | # Segmentation by show # | 167 | # Segmentation by show # |
168 | #-----------------------# | 168 | #-----------------------# |
169 | # create txt file from scored res | 169 | # create txt file from scored res |
170 | # tag pos and lemmatization of the txt file | 170 | # tag pos and lemmatization of the txt file |
171 | # merge the scored res and taglem file | 171 | # merge the scored res and taglem file |
172 | # segment using the last generated file | 172 | # segment using the last generated file |
173 | # and create a ctm file by show | 173 | # and create a ctm file by show |
174 | 174 | ||
175 | print_info "Segmentation by show" 1 | 175 | print_info "Segmentation by show" 1 |
176 | 176 | ||
177 | # -> to txt | 177 | # -> to txt |
178 | print_info "Create txt from scored res" 2 | 178 | print_info "Create txt from scored res" 2 |
179 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm | 179 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm |
180 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp | 180 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp |
181 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt | 181 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt |
182 | 182 | ||
183 | # -> to tagger + lemme | 183 | # -> to tagger + lemme |
184 | print_info "Tag pos and lem in txt file" 2 | 184 | print_info "Tag pos and lem in txt file" 2 |
185 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp | 185 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp |
186 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem | 186 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem |
187 | 187 | ||
188 | # merge sctm and taglem | 188 | # merge sctm and taglem |
189 | print_info "Merge scored ctm with tag pos and lem file" 2 | 189 | print_info "Merge scored ctm with tag pos and lem file" 2 |
190 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl | 190 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl |
191 | 191 | ||
192 | # -> new seg | 192 | # -> new seg |
193 | print_info "Create xml file and run Topic Seg" 2 | 193 | print_info "Create xml file and run Topic Seg" 2 |
194 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml | 194 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml |
195 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem | 195 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem |
196 | 196 | ||
197 | # Lia_topic_seg : bring together sentences into show | 197 | # Lia_topic_seg : bring together sentences into show |
198 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml | 198 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml |
199 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg | 199 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg |
200 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg | 200 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg |
201 | rm 0.xml $INPUT_DIR/show.seg | 201 | rm 0.xml $INPUT_DIR/show.seg |
202 | 202 | ||
203 | if [ $CHECK -eq 1 ] | 203 | if [ $CHECK -eq 1 ] |
204 | then | 204 | then |
205 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi | 205 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi |
206 | fi | 206 | fi |
207 | 207 | ||
208 | # Segment ctm into several show files and create a seg list by show | 208 | # Segment ctm into several show files and create a seg list by show |
209 | print_info "Segment ctm into show files and a seg list by show" 2 | 209 | print_info "Segment ctm into show files and a seg list by show" 2 |
210 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR | 210 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR |
211 | 211 | ||
212 | #-----------------------------------------------------------# | 212 | #-----------------------------------------------------------# |
213 | # SOLR QUERIES # | 213 | # SOLR QUERIES # |
214 | # -> Create Confidente Word # | 214 | # -> Create Confidente Word # |
215 | # Keep conf words and use Tags # | 215 | # Keep conf words and use Tags # |
216 | # -> Query SOLR (document & multimedia) # | 216 | # -> Query SOLR (document & multimedia) # |
217 | # concat word + add date 2 day before and after the show # | 217 | # concat word + add date 2 day before and after the show # |
218 | # query document & multimedia # | 218 | # query document & multimedia # |
219 | #-----------------------------------------------------------# | 219 | #-----------------------------------------------------------# |
220 | print_info "Create SOLR queries and ASK SOLR" 1 | 220 | print_info "Create SOLR queries and ASK SOLR" 1 |
221 | for show in $(ls $SHOW_DIR/*.ctm) | 221 | for show in $(ls $SHOW_DIR/*.ctm) |
222 | do | 222 | do |
223 | bn=$(basename $show .ctm) | 223 | bn=$(basename $show .ctm) |
224 | # Remove words with low confidence and keep useful tagger words | 224 | # Remove words with low confidence and keep useful tagger words |
225 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" | 225 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" |
226 | # Get date 2 day before and after the show | 226 | # Get date 2 day before and after the show |
227 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` | 227 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` |
228 | # Create SOLR queries | 228 | # Create SOLR queries |
229 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" | 229 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" |
230 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" | ||
231 | echo $query > $SHOW_DIR/$bn.queries | ||
232 | # Ask SOLR DB | 230 | # Ask SOLR DB |
233 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then | 231 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then |
232 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" | ||
233 | echo $query > $SHOW_DIR/$bn.queries | ||
234 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp | 234 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp |
235 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords | 235 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords |
236 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt | 236 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt |
237 | rm $SOLR_RES/*.tmp | 237 | rm $SOLR_RES/*.tmp |
238 | fi | 238 | fi |
239 | 239 | ||
240 | if [ $CHECK -eq 1 ] | 240 | if [ $CHECK -eq 1 ] |
241 | then | 241 | then |
242 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] | 242 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] |
243 | then | 243 | then |
244 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 | 244 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 |
245 | fi | 245 | fi |
246 | fi | 246 | fi |
247 | 247 | ||
248 | done | 248 | done |
249 | 249 | ||
250 | #----------------------------------------------------------------------------------------------- | 250 | #----------------------------------------------------------------------------------------------- |
251 | # Build trigger file | 251 | # Build trigger file |
252 | # 1) keywords are automatically boosted in the non confident zone of the current res | 252 | # 1) keywords are automatically boosted in the non confident zone of the current res |
253 | # confident zone are boosted | 253 | # confident zone are boosted |
254 | # previous words in sensible zone are penalized | 254 | # previous words in sensible zone are penalized |
255 | # 2) OOVs are extracted + phonetized | 255 | # 2) OOVs are extracted + phonetized |
256 | # 3) Try to find OOVs acousticly in the current segment | 256 | # 3) Try to find OOVs acousticly in the current segment |
257 | # 4) Generate the .trigg file | 257 | # 4) Generate the .trigg file |
258 | #------------------------------------------------------------------------------------------------ | 258 | #------------------------------------------------------------------------------------------------ |
259 | print_info "Build trigger files" 1 | 259 | print_info "Build trigger files" 1 |
260 | for i in `ls $SOLR_RES/*.keywords` | 260 | for i in `ls $SOLR_RES/*.keywords` |
261 | do | 261 | do |
262 | basename=`basename $i .keywords` | 262 | basename=`basename $i .keywords` |
263 | 263 | ||
264 | # | 264 | # |
265 | # Tokenize & produce coverage report | 265 | # Tokenize & produce coverage report |
266 | # Use filter you need | 266 | # Use filter you need |
267 | # | 267 | # |
268 | print_info "keywords filtering and produce coverage report" 2 | 268 | print_info "keywords filtering and produce coverage report" 2 |
269 | # Default filter | 269 | # Default filter |
270 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ | 270 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ |
271 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 271 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
272 | # do less filter | 272 | # do less filter |
273 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 273 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
274 | 274 | ||
275 | 275 | ||
276 | # | 276 | # |
277 | # Extract "real" OOV and phonetize them | 277 | # Extract "real" OOV and phonetize them |
278 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits | 278 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits |
279 | # | 279 | # |
280 | print_info "Extract OOV and phonetize them" 2 | 280 | print_info "Extract OOV and phonetize them" 2 |
281 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov | 281 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov |
282 | 282 | ||
283 | # | 283 | # |
284 | # Search INVOC & OOV in the current lattice | 284 | # Search INVOC & OOV in the current lattice |
285 | # | 285 | # |
286 | print_info "Search INVOC and OOV in the current lattice" 2 | 286 | print_info "Search INVOC and OOV in the current lattice" 2 |
287 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch | 287 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch |
288 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch | 288 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch |
289 | 289 | ||
290 | # For each treil | 290 | # For each treil |
291 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 291 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
292 | do | 292 | do |
293 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound | 293 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound |
294 | # | 294 | # |
295 | # Produce the boost file for the next decoding pass | 295 | # Produce the boost file for the next decoding pass |
296 | # | 296 | # |
297 | print_info "Produce trigg file : $baseseg " 3 | 297 | print_info "Produce trigg file : $baseseg " 3 |
298 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg | 298 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg |
299 | done | 299 | done |
300 | 300 | ||
301 | done | 301 | done |
302 | 302 | ||
303 | #----------------------------------------------------------------------------------------------- | 303 | #----------------------------------------------------------------------------------------------- |
304 | # Build the extended SPEERAL Lexicon | 304 | # Build the extended SPEERAL Lexicon |
305 | # 1) Merge OOVs + LEXICON | 305 | # 1) Merge OOVs + LEXICON |
306 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) | 306 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) |
307 | # 2) The current lexicon is extended with all the valid OOVs | 307 | # 2) The current lexicon is extended with all the valid OOVs |
308 | #----------------------------------------------------------------------------------------------- | 308 | #----------------------------------------------------------------------------------------------- |
309 | print_info "Build extended Speeral Lexicon" 1 | 309 | print_info "Build extended Speeral Lexicon" 1 |
310 | mkdir -p $EXT_LEX/final | 310 | mkdir -p $EXT_LEX/final |
311 | mkdir -p $EXT_LEX/tmp | 311 | mkdir -p $EXT_LEX/tmp |
312 | mkdir -p $EXT_LEX/tmp/txt | 312 | mkdir -p $EXT_LEX/tmp/txt |
313 | # | 313 | # |
314 | # Collect the acousticly found oov and their phonetisation | 314 | # Collect the acousticly found oov and their phonetisation |
315 | # | 315 | # |
316 | print_info "Get all OOV and retrieve all phonetisation" 2 | 316 | print_info "Get all OOV and retrieve all phonetisation" 2 |
317 | for i in `ls $SOLR_RES/*.phon_oov` | 317 | for i in `ls $SOLR_RES/*.phon_oov` |
318 | do | 318 | do |
319 | basename=`basename $i .phon_oov` | 319 | basename=`basename $i .phon_oov` |
320 | 320 | ||
321 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null | 321 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null |
322 | # list acousticly found for the show | 322 | # list acousticly found for the show |
323 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 323 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
324 | do | 324 | do |
325 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound | 325 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound |
326 | done | 326 | done |
327 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp | 327 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp |
328 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound | 328 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound |
329 | 329 | ||
330 | # | 330 | # |
331 | # Extract OOV really added | 331 | # Extract OOV really added |
332 | # | 332 | # |
333 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov | 333 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov |
334 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound | 334 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound |
335 | # | 335 | # |
336 | # Retrieve all phonetisation | 336 | # Retrieve all phonetisation |
337 | # | 337 | # |
338 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon | 338 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon |
339 | done | 339 | done |
340 | 340 | ||
341 | # | 341 | # |
342 | # Merge OOVs and their phonetisation | 342 | # Merge OOVs and their phonetisation |
343 | # | 343 | # |
344 | print_info "Merge OOV and their phonetisation" 2 | 344 | print_info "Merge OOV and their phonetisation" 2 |
345 | lexname=$(basename $LEXICON) | 345 | lexname=$(basename $LEXICON) |
346 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon | 346 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon |
347 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound | 347 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound |
348 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon | 348 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon |
349 | 349 | ||
350 | # | 350 | # |
351 | # Collect + clean retrieved txt | 351 | # Collect + clean retrieved txt |
352 | # | 352 | # |
353 | print_info "Collect and clean SOLR txt answers" 2 | 353 | print_info "Collect and clean SOLR txt answers" 2 |
354 | # choose filter | 354 | # choose filter |
355 | # default | 355 | # default |
356 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt | 356 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt |
357 | # low filter | 357 | # low filter |
358 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt | 358 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt |
359 | 359 | ||
360 | # | 360 | # |
361 | # Construct the map file | 361 | # Construct the map file |
362 | # | 362 | # |
363 | # Notes: | 363 | # Notes: |
364 | # - Expected format : | 364 | # - Expected format : |
365 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> | 365 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> |
366 | # | 366 | # |
367 | print_info "Construct map file" 2 | 367 | print_info "Construct map file" 2 |
368 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null | 368 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null |
369 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null | 369 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null |
370 | 370 | ||
371 | while read oov | 371 | while read oov |
372 | do | 372 | do |
373 | oov=`echo $oov | sed "s/\n//g"` | 373 | oov=`echo $oov | sed "s/\n//g"` |
374 | # | 374 | # |
375 | # Obtain the oov's tag | 375 | # Obtain the oov's tag |
376 | # | 376 | # |
377 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` | 377 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` |
378 | # | 378 | # |
379 | # Try to collect text containing the oov word | 379 | # Try to collect text containing the oov word |
380 | # | 380 | # |
381 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt | 381 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt |
382 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then | 382 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then |
383 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 383 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
384 | if [ $nbWords -eq 0 ]; then | 384 | if [ $nbWords -eq 0 ]; then |
385 | echo "UNVALID OOV: $oov => $nbWords occurrences" | 385 | echo "UNVALID OOV: $oov => $nbWords occurrences" |
386 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 386 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
387 | else | 387 | else |
388 | # | 388 | # |
389 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected | 389 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected |
390 | # | 390 | # |
391 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" | 391 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" |
392 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 392 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
393 | if [ ! x$candidate = "x" ]; then | 393 | if [ ! x$candidate = "x" ]; then |
394 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon | 394 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon |
395 | while read phonLine | 395 | while read phonLine |
396 | do | 396 | do |
397 | #<word> <phon> => <word> <candidate> <phon> | 397 | #<word> <phon> => <word> <candidate> <phon> |
398 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map | 398 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map |
399 | done < $EXT_LEX/tmp/$oov.phon | 399 | done < $EXT_LEX/tmp/$oov.phon |
400 | else | 400 | else |
401 | echo "UNVALID OOV: $oov => no availaible Candidate word in LM" | 401 | echo "UNVALID OOV: $oov => no availaible Candidate word in LM" |
402 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 402 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
403 | fi | 403 | fi |
404 | fi | 404 | fi |
405 | else | 405 | else |
406 | echo "UNVALID OOV: $oov" | 406 | echo "UNVALID OOV: $oov" |
407 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 407 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
408 | fi | 408 | fi |
409 | done < $EXT_LEX/final/all.oov_acousticlyfound | 409 | done < $EXT_LEX/final/all.oov_acousticlyfound |
410 | 410 | ||
411 | # | 411 | # |
412 | ### Speeral | 412 | ### Speeral |
413 | # | 413 | # |
414 | 414 | ||
415 | lexname=`basename $LEXICON` | 415 | lexname=`basename $LEXICON` |
416 | # | 416 | # |
417 | # Build the final trigger file | 417 | # Build the final trigger file |
418 | # | 418 | # |
419 | print_info "Clean trigg files" 2 | 419 | print_info "Clean trigg files" 2 |
420 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null | 420 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null |
421 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null | 421 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null |
422 | for i in `ls $TRIGGER_CONFZONE/*.trigg` | 422 | for i in `ls $TRIGGER_CONFZONE/*.trigg` |
423 | do | 423 | do |
424 | basename=`basename $i .trigg` | 424 | basename=`basename $i .trigg` |
425 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg | 425 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg |
426 | done | 426 | done |
427 | # | 427 | # |
428 | # Compile the speeral extended lexicon | 428 | # Compile the speeral extended lexicon |
429 | # | 429 | # |
430 | print_info "Compile Speeral extended lexicon" 2 | 430 | print_info "Compile Speeral extended lexicon" 2 |
431 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext | 431 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext |
432 | 432 | ||
433 | if [ $CHECK -eq 1 ] | 433 | if [ $CHECK -eq 1 ] |
434 | then | 434 | then |
435 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" | 435 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" |
436 | if [ $? -eq 1 ] | 436 | if [ $? -eq 1 ] |
437 | then | 437 | then |
438 | echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE | 438 | echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE |
439 | exit 1; | 439 | exit 1; |
440 | fi | 440 | fi |
441 | fi | 441 | fi |
442 | 442 | ||
443 | 443 | ||
444 | #-------# | 444 | #-------# |
445 | # CLOSE # | 445 | # CLOSE # |
446 | #-------# | 446 | #-------# |
447 | # Seem OK | 447 | # Seem OK |
448 | print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 | 448 | print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 |
449 | echo -e "#Solr $BASENAME " >> $LOGFILE | 449 | echo -e "#Solr $BASENAME " >> $LOGFILE |
450 | 450 | ||
451 | # unlock directory | 451 | # unlock directory |
452 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" | 452 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" |
453 | 453 |