Commit 561670accdc5aa799e7d2ba3ce3fa2f825a656fa

Authored by Jean-François Rey
1 parent 7e99f07935
Exists in master

remove output redirections

Showing 5 changed files with 66 additions and 73 deletions Inline Diff

main_tools/ConfPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ConfPass.sh # 4 # File : ConfPass.sh #
5 # Brief : Process the ASR Confidence pass # 5 # Brief : Process the ASR Confidence pass #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and hugo Mauchrétien works) # 8 # and hugo Mauchrétien works) #
9 # Version : 1.0 # 9 # Version : 1.0 #
10 # Date : 17/06/13 # 10 # Date : 17/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 echo "### ConfPass.sh ###" 13 echo "### ConfPass.sh ###"
14 14
15 #Check OTMEDIA_HOME env var 15 #Check OTMEDIA_HOME env var
16 if [ -z ${OTMEDIA_HOME} ] 16 if [ -z ${OTMEDIA_HOME} ]
17 then 17 then
18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
19 export OTMEDIA_HOME=$OTMEDIA_HOME 19 export OTMEDIA_HOME=$OTMEDIA_HOME
20 fi 20 fi
21 21
22 22
23 # where is ConfPath.sh 23 # where is ConfPath.sh
24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
25 25
26 # Scripts Path 26 # Scripts Path
27 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 27 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
28 28
29 # Include scripts 29 # Include scripts
30 . $SCRIPT_PATH"/Tools.sh" 30 . $SCRIPT_PATH"/Tools.sh"
31 . $SCRIPT_PATH"/CheckConfPass.sh" 31 . $SCRIPT_PATH"/CheckConfPass.sh"
32 32
33 # where is FirstPass.cfg 33 # where is FirstPass.cfg
34 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" 34 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg"
35 if [ -e $CONFPASS_CONFIG_FILE ] 35 if [ -e $CONFPASS_CONFIG_FILE ]
36 then 36 then
37 . $CONFPASS_CONFIG_FILE 37 . $CONFPASS_CONFIG_FILE
38 else 38 else
39 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" > /dev/stderr 39 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" > /dev/stderr
40 exit 1 40 exit 1
41 fi 41 fi
42 42
43 #---------------# 43 #---------------#
44 # Parse Options # 44 # Parse Options #
45 #---------------# 45 #---------------#
46 while getopts ":hDv:cr" opt 46 while getopts ":hDv:cr" opt
47 do 47 do
48 case $opt in 48 case $opt in
49 h) 49 h)
50 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" 50 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n"
51 echo -e "\t Options:" 51 echo -e "\t Options:"
52 echo -e "\t\t-h :\tprint this message" 52 echo -e "\t\t-h :\tprint this message"
53 echo -e "\t\t-D :\tDEBUG mode on" 53 echo -e "\t\t-D :\tDEBUG mode on"
54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
55 echo -e "\t\t-c :\t Check process, stop if error detected" 55 echo -e "\t\t-c :\t Check process, stop if error detected"
56 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" 56 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files"
57 exit 1 57 exit 1
58 ;; 58 ;;
59 D) 59 D)
60 DEBUG=1 60 DEBUG=1
61 ;; 61 ;;
62 v) 62 v)
63 VERBOSE=$OPTARG 63 VERBOSE=$OPTARG
64 ;; 64 ;;
65 c) 65 c)
66 CHECK=1 66 CHECK=1
67 ;; 67 ;;
68 r) 68 r)
69 RERUN=1 69 RERUN=1
70 ;; 70 ;;
71 :) 71 :)
72 echo "Option -$OPTARG requires an argument." > /dev/stderr 72 echo "Option -$OPTARG requires an argument." > /dev/stderr
73 exit 1 73 exit 1
74 ;; 74 ;;
75 \?) 75 \?)
76 echo "BAD USAGE : unknow opton -$OPTARG" 76 echo "BAD USAGE : unknow opton -$OPTARG"
77 #exit 1 77 #exit 1
78 ;; 78 ;;
79 esac 79 esac
80 done 80 done
81 81
82 # mode debug enable 82 # mode debug enable
83 if [ $DEBUG -eq 1 ] 83 if [ $DEBUG -eq 1 ]
84 then 84 then
85 set -x 85 set -x
86 echo -e "## Mode DEBUG ON ##" 86 echo -e "## Mode DEBUG ON ##"
87 REDIRECTION_OUTPUT=""
88 else
89 REDIRECTION_OUTPUT=" 2> /dev/null"
90 fi 87 fi
91 88
92 # mode verbose enable 89 # mode verbose enable
93 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 90 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
94 91
95 # Check USAGE by arguments number 92 # Check USAGE by arguments number
96 if [ $(($#-($OPTIND-1))) -ne 2 ] 93 if [ $(($#-($OPTIND-1))) -ne 2 ]
97 then 94 then
98 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" 95 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>"
99 echo "$0 -h for more info" 96 echo "$0 -h for more info"
100 exit 1 97 exit 1
101 fi 98 fi
102 99
103 shift $((OPTIND-1)) 100 shift $((OPTIND-1))
104 # check input directory - first argument 101 # check input directory - first argument
105 if [ ! -e $1 ] 102 if [ ! -e $1 ]
106 then 103 then
107 print_error "can't open $1" 104 print_error "can't open $1"
108 exit 1 105 exit 1
109 fi 106 fi
110 # check treil input directory - second argument 107 # check treil input directory - second argument
111 if [ ! -e $1/$2 ] 108 if [ ! -e $1/$2 ]
112 then 109 then
113 print_error "can't open $1/$2" 110 print_error "can't open $1/$2"
114 exit 1 111 exit 1
115 fi 112 fi
116 113
114 print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1
115
117 #-------------# 116 #-------------#
118 # GLOBAL VARS # 117 # GLOBAL VARS #
119 #-------------# 118 #-------------#
120 INPUT_DIR=$(readlink -e $1) 119 INPUT_DIR=$(readlink -e $1)
121 OUTPUT_DIR=$INPUT_DIR 120 OUTPUT_DIR=$INPUT_DIR
122 BASENAME=$(basename $OUTPUT_DIR) 121 BASENAME=$(basename $OUTPUT_DIR)
123 RES_NAME=$2 122 RES_NAME=$2
124 RES_P="${INPUT_DIR}/${RES_NAME}" 123 RES_P="${INPUT_DIR}/${RES_NAME}"
125 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf 124 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf
126 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" 125 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME"
127 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" 126 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm"
128 LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" 127 LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log"
129 ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" 128 ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log"
130 129
131 #------------------# 130 #------------------#
132 # Create Workspace # 131 # Create Workspace #
133 #------------------# 132 #------------------#
134 # Lock directory 133 # Lock directory
135 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] 134 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ]
136 then 135 then
137 print_warn "[${BASENAME}] Confpass is locked -> exit" 2 136 print_warn "[${BASENAME}] Confpass is locked -> exit" 2
138 exit 1 137 exit 1
139 fi 138 fi
140 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 139 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1
141 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 140 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1
142 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi 141 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi
143 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi 142 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi
144 mkdir -p $CONF_DIR > /dev/null 2>&1 143 mkdir -p $CONF_DIR > /dev/null 2>&1
145 mkdir -p $RES_CONF_DIR > /dev/null 2>&1 144 mkdir -p $RES_CONF_DIR > /dev/null 2>&1
146 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 145 rm $LOGFILE $ERRORFILE > /dev/null 2>&1
147 146
148 #---------------# 147 #---------------#
149 # Check Pass # 148 # Check Pass #
150 #---------------# 149 #---------------#
151 print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 150 print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1
152 # if usf contains more than 49% of 0.600 confidence -> usf error 151 # if usf contains more than 49% of 0.600 confidence -> usf error
153 if [ -s $USF_FILE ] 152 if [ -s $USF_FILE ]
154 then 153 then
155 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") 154 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}")
156 confall=$(grep -c -E 'confidence=' "${USF_FILE}") 155 confall=$(grep -c -E 'confidence=' "${USF_FILE}")
157 if [ $confall -gt 0 ] 156 if [ $confall -gt 0 ]
158 then 157 then
159 pourcentageofzerosix=$((($conftozerosix*100)/$confall)) 158 pourcentageofzerosix=$((($conftozerosix*100)/$confall))
160 if [ $pourcentageofzerosix -gt 49 ] 159 if [ $pourcentageofzerosix -gt 49 ]
161 then 160 then
162 print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 161 print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2
163 print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 162 print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1
164 mv "${USF_FILE}" "${USF_FILE}.back" 163 mv "${USF_FILE}" "${USF_FILE}.back"
165 rm -r $CONF_DIR > /dev/null 2>&1 164 rm -r $CONF_DIR > /dev/null 2>&1
166 else 165 else
167 print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 166 print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1
168 exit 0 167 exit 0
169 fi 168 fi
170 fi 169 fi
171 else 170 else
172 print_info "[${BASENAME}] No USF file already done, continue..." 1 171 print_info "[${BASENAME}] No USF file already done, continue..." 1
173 fi 172 fi
174 173
175 # Check if treil are here 174 # Check if treil are here
176 nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) 175 nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l)
177 nbtreil_p=$(ls $RES_P/*.treil 2> /dev/null | wc -l) 176 nbtreil_p=$(ls $RES_P/*.treil 2> /dev/null | wc -l)
178 if [ $nbtreil_p -eq 0 ] 177 if [ $nbtreil_p -eq 0 ]
179 then 178 then
180 print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" 179 print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass"
181 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi 180 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi
182 exit 1 181 exit 1
183 else 182 else
184 #Check if more then 89% of treil are done 183 #Check if more then 89% of treil are done
185 if [ $nbres_p1 -gt 0 ] 184 if [ $nbres_p1 -gt 0 ]
186 then 185 then
187 pourcentage=$((($nbtreil_p*100)/$nbres_p1)) 186 pourcentage=$((($nbtreil_p*100)/$nbres_p1))
188 if [ $pourcentage -gt 89 ] 187 if [ $pourcentage -gt 89 ]
189 then 188 then
190 print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 189 print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1
191 else 190 else
192 print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 191 print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2
193 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil " 192 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil "
194 fi 193 fi
195 fi 194 fi
196 fi 195 fi
197 196
198 #------# 197 #------#
199 # Save # 198 # Save #
200 #------# 199 #------#
201 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg 200 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg
202 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 201 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
203 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 202 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
204 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 203 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1
205 204
206 #--------------------# 205 #--------------------#
207 # CONFIDENCE MEASURE # 206 # CONFIDENCE MEASURE #
208 #--------------------# 207 #--------------------#
209 208
210 # Check pourcentage of scored_ctm already done, if < 85% do confidence measure 209 # Check pourcentage of scored_ctm already done, if < 85% do confidence measure
211 nbres_p=$(ls ${RES_P}/*.treil | wc -l) 210 nbres_p=$(ls ${RES_P}/*.treil | wc -l)
212 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 211 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l)
213 if [ $nbres_p -gt 0 ] 212 if [ $nbres_p -gt 0 ]
214 then 213 then
215 pourcentageres=$((($nbconf*100)/$nbres_p)) 214 pourcentageres=$((($nbconf*100)/$nbres_p))
216 if [ $pourcentageres -lt 85 ] 215 if [ $pourcentageres -lt 85 ]
217 then 216 then
218 print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 217 print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1
219 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME $REDIRECTION_OUTPUT 218 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME
220 219
221 else 220 else
222 print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 221 print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1
223 fi 222 fi
224 fi 223 fi
225 224
226 ### Check scored_ctm number res files ! 225 ### Check scored_ctm number res files !
227 if [ $CHECK -eq 1 ] 226 if [ $CHECK -eq 1 ]
228 then 227 then
229 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 228 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l)
230 if [ $nbres_p -ne $nbconf ] 229 if [ $nbres_p -ne $nbconf ]
231 then 230 then
232 print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 231 print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2
233 print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 232 print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ"
234 fi 233 fi
235 fi 234 fi
236 235
237 #---------------------------# 236 #---------------------------#
238 # FROM RES WITH CONF => USF # 237 # FROM RES WITH CONF => USF #
239 #---------------------------# 238 #---------------------------#
240 print_info "[${BASENAME}] Create USF file for $RES_P" 1 239 print_info "[${BASENAME}] Create USF file for $RES_P" 1
241 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done 240 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done
242 # create USF configuration file 241 # create USF configuration file
243 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg 242 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg
244 # create USF file 243 # create USF file
245 print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT" 3 244 print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3
246 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT 245 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg
247 rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 246 rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1
248 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE 247 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE
249 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf 248 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf
250 rm $USF_FILE.tmp > /dev/null 2>&1 249 rm $USF_FILE.tmp > /dev/null 2>&1
251 250
252 #----------------# 251 #----------------#
253 # Check USF file # 252 # Check USF file #
254 #----------------# 253 #----------------#
255 if [ $CHECK -eq 1 ] 254 if [ $CHECK -eq 1 ]
256 then 255 then
257 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" 256 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf"
258 if [ $? -eq 1 ] 257 if [ $? -eq 1 ]
259 then 258 then
260 print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" 259 print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf"
261 print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" 260 print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf"
262 exit 1 261 exit 1
263 fi 262 fi
264 fi 263 fi
265 264
266 #-------# 265 #-------#
267 # CLOSE # 266 # CLOSE #
268 #-------# 267 #-------#
269 # Seem OK 268 # Seem OK
270 print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 269 print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1
271 270
272 # unlock directory 271 # unlock directory
273 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" 272 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock"
main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 echo "### ExploitConfidencePass.sh ###" 16 echo "### ExploitConfidencePass.sh ###"
17 17
18 # Check OTMEDIA_HOME env var 18 # Check OTMEDIA_HOME env var
19 if [ -z ${OTMEDIA_HOME} ] 19 if [ -z ${OTMEDIA_HOME} ]
20 then 20 then
21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 21 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
22 export OTMEDIA_HOME=$OTMEDIA_HOME 22 export OTMEDIA_HOME=$OTMEDIA_HOME
23 fi 23 fi
24 24
25 # where is ExploitConfidencePass.sh 25 # where is ExploitConfidencePass.sh
26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 26 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
27 27
28 if [ -z ${SCRIPT_PATH} ] 28 if [ -z ${SCRIPT_PATH} ]
29 then 29 then
30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 30 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
31 fi 31 fi
32 32
33 # Include scripts 33 # Include scripts
34 . $SCRIPT_PATH"/Tools.sh" 34 . $SCRIPT_PATH"/Tools.sh"
35 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 35 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
36 36
37 # where is ExploitConfidencePass.cfg 37 # where is ExploitConfidencePass.cfg
38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 38 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 39 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
40 then 40 then
41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 41 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
42 else 42 else
43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 43 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
44 exit 1 44 exit 1
45 fi 45 fi
46 46
47 #---------------# 47 #---------------#
48 # Parse Options # 48 # Parse Options #
49 #---------------# 49 #---------------#
50 while getopts ":hDv:cf:r" opt 50 while getopts ":hDv:cf:r" opt
51 do 51 do
52 case $opt in 52 case $opt in
53 h) 53 h)
54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
55 echo -e "\t Options:" 55 echo -e "\t Options:"
56 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
57 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
59 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
61 echo -e "\t\t-r n :\tforce rerun without deleting files" 61 echo -e "\t\t-r n :\tforce rerun without deleting files"
62 exit 1 62 exit 1
63 ;; 63 ;;
64 D) 64 D)
65 DEBUG=1 65 DEBUG=1
66 ;; 66 ;;
67 v) 67 v)
68 VERBOSE=$OPTARG 68 VERBOSE=$OPTARG
69 ;; 69 ;;
70 c) 70 c)
71 CHECK=1 71 CHECK=1
72 ;; 72 ;;
73 f) 73 f)
74 FORKS="--forks $OPTARG" 74 FORKS="--forks $OPTARG"
75 ;; 75 ;;
76 r) 76 r)
77 RERUN=1 77 RERUN=1
78 ;; 78 ;;
79 :) 79 :)
80 echo "Option -$OPTARG requires an argument." > /dev/stderr 80 echo "Option -$OPTARG requires an argument." > /dev/stderr
81 exit 1 81 exit 1
82 ;; 82 ;;
83 \?) 83 \?)
84 echo "BAD USAGE : unknow opton -$OPTARG" 84 echo "BAD USAGE : unknow opton -$OPTARG"
85 #exit 1 85 #exit 1
86 ;; 86 ;;
87 esac 87 esac
88 done 88 done
89 89
90 # mode debug enable 90 # mode debug enable
91 if [ $DEBUG -eq 1 ] 91 if [ $DEBUG -eq 1 ]
92 then 92 then
93 set -x 93 set -x
94 echo -e "## Mode DEBUG ON ##" 94 echo -e "## Mode DEBUG ON ##"
95 REDIRECTION_OUTPUT=""
96 else
97 REDIRECTION_OUTPUT=" 2> /dev/null"
98 fi 95 fi
99 96
100 # mode verbose enable 97 # mode verbose enable
101 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 98 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
102 99
103 # Check USAGE by arguments number 100 # Check USAGE by arguments number
104 if [ $(($#-($OPTIND-1))) -ne 1 ] 101 if [ $(($#-($OPTIND-1))) -ne 1 ]
105 then 102 then
106 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 103 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
107 echo "$0 -h for more info" 104 echo "$0 -h for more info"
108 exit 1 105 exit 1
109 fi 106 fi
110 107
111 shift $((OPTIND-1)) 108 shift $((OPTIND-1))
112 # check input directory - first argument 109 # check input directory - first argument
113 if [ ! -e $1 ] 110 if [ ! -e $1 ]
114 then 111 then
115 print_error "can't open $1" 112 print_error "can't open $1"
116 exit 1 113 exit 1
117 fi 114 fi
118 115
116 print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1
117
119 #-------------# 118 #-------------#
120 # GLOBAL VARS # 119 # GLOBAL VARS #
121 #-------------# 120 #-------------#
122 INPUT_DIR=$(readlink -e $1) 121 INPUT_DIR=$(readlink -e $1)
123 OUTPUT_DIR=$INPUT_DIR 122 OUTPUT_DIR=$INPUT_DIR
124 BASENAME=$(basename $OUTPUT_DIR) 123 BASENAME=$(basename $OUTPUT_DIR)
125 SHOW_DIR="$OUTPUT_DIR/shows/" 124 SHOW_DIR="$OUTPUT_DIR/shows/"
126 SOLR_RES="$OUTPUT_DIR/solr/" 125 SOLR_RES="$OUTPUT_DIR/solr/"
127 EXT_LEX="$OUTPUT_DIR/LEX/" 126 EXT_LEX="$OUTPUT_DIR/LEX/"
128 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 127 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
129 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" 128 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log"
130 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" 129 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log"
131 130
132 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 131 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
133 if [ -e $CONFPASS_CONFIG_FILE ] 132 if [ -e $CONFPASS_CONFIG_FILE ]
134 then 133 then
135 { 134 {
136 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 135 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
137 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 136 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
138 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 137 print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2
139 } 138 }
140 else 139 else
141 { 140 {
142 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" 141 print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE"
143 print_error "[${BASENAME}] -> use res_p2" 142 print_error "[${BASENAME}] -> use res_p2"
144 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 143 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
145 RES_CONF="$INPUT_DIR/conf/res_p2" 144 RES_CONF="$INPUT_DIR/conf/res_p2"
146 } 145 }
147 fi 146 fi
148 147
149 mkdir -p $SHOW_DIR > /dev/null 2>&1 148 mkdir -p $SHOW_DIR > /dev/null 2>&1
150 mkdir -p $SOLR_RES > /dev/null 2>&1 149 mkdir -p $SOLR_RES > /dev/null 2>&1
151 mkdir -p $EXT_LEX > /dev/null 2>&1 150 mkdir -p $EXT_LEX > /dev/null 2>&1
152 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 151 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
153 152
154 #------------------# 153 #------------------#
155 # Create Workspace # 154 # Create Workspace #
156 #------------------# 155 #------------------#
157 # Lock directory 156 # Lock directory
158 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] 157 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]
159 then 158 then
160 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 159 print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2
161 exit 1 160 exit 1
162 fi 161 fi
163 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 162 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
164 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 163 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
165 164
166 #------# 165 #------#
167 # Save # 166 # Save #
168 #------# 167 #------#
169 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 168 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
170 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 169 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
171 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 170 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
172 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 171 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
173 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 172 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
174 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 173 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1
175 174
175 #---------------#
176 # Check Pass #
177 #---------------#
178
176 #-----------------------# 179 #-----------------------#
177 # Segmentation by show # 180 # Segmentation by show #
178 #-----------------------# 181 #-----------------------#
179 # create txt file from scored res 182 # create txt file from scored res
180 # tag pos and lemmatization of the txt file 183 # tag pos and lemmatization of the txt file
181 # merge the scored res and taglem file 184 # merge the scored res and taglem file
182 # segment using the last generated file 185 # segment using the last generated file
183 # and create a ctm file by show 186 # and create a ctm file by show
184 187
185 print_info "Segmentation by show" 1 188 print_info "[${BASENAME}] Segmentation by show" 1
186 189
187 # -> to txt 190 # -> to txt
188 print_info "Create txt from scored res" 2 191 print_info "[${BASENAME}] Create txt from scored res" 2
189 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 192 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
190 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 193 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
191 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 194 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
192 195
193 # -> to tagger + lemme 196 # -> to tagger + lemme
194 print_info "Tag pos and lem in txt file" 2 197 print_info "[${BASENAME}] Tag pos and lem in txt file" 2
195 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 198 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
196 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 199 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
197 200
198 # merge sctm and taglem 201 # merge sctm and taglem
199 print_info "Merge scored ctm with tag pos and lem file" 2 202 print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 2
200 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 203 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
201 204
202 # -> new seg 205 # -> new seg
203 print_info "Create xml file and run Topic Seg" 2 206 print_info "[${BASENAME}] Create xml file and run Topic Seg" 2
204 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 207 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
205 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 208 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
206 209
207 # Lia_topic_seg : bring together sentences into show 210 # Lia_topic_seg : bring together sentences into show
208 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 211 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
209 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 212 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
210 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 213 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
211 rm 0.xml $INPUT_DIR/show.seg 214 rm 0.xml $INPUT_DIR/show.seg
212 215
213 if [ $CHECK -eq 1 ] 216 if [ $CHECK -eq 1 ]
214 then 217 then
215 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi 218 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "[${BASENAME}] ERROR : no Topic segmentation" >> $ERRORFILE; fi
216 fi 219 fi
217 220
218 # Segment ctm into several show files and create a seg list by show 221 # Segment ctm into several show files and create a seg list by show
219 print_info "Segment ctm into show files and a seg list by show" 2 222 print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 2
220 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT 223 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR
221 224
222 #-----------------------------------------------------------# 225 #-----------------------------------------------------------#
223 # SOLR QUERIES # 226 # SOLR QUERIES #
224 # -> Create Confidente Word # 227 # -> Create Confidente Word #
225 # Keep conf words and use Tags # 228 # Keep conf words and use Tags #
226 # -> Query SOLR (document & multimedia) # 229 # -> Query SOLR (document & multimedia) #
227 # concat word + add date 2 day before and after the show # 230 # concat word + add date 2 day before and after the show #
228 # query document & multimedia # 231 # query document & multimedia #
229 #-----------------------------------------------------------# 232 #-----------------------------------------------------------#
230 print_info "Create SOLR queries and ASK SOLR" 1 233 print_info "[${BASENAME}] Create SOLR queries and ASK SOLR" 1
231 for show in $(ls $SHOW_DIR/*.ctm) 234 for show in $(ls $SHOW_DIR/*.ctm)
232 do 235 do
233 bn=$(basename $show .ctm) 236 bn=$(basename $show .ctm)
234 # Remove words with low confidence and keep useful tagger words 237 # Remove words with low confidence and keep useful tagger words
235 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 238 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
236 # Get date 2 day before and after the show 239 # Get date 2 day before and after the show
237 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 240 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
238 # Create SOLR queries 241 # Create SOLR queries
239 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 242 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
240 # Ask SOLR DB 243 # Ask SOLR DB
241 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 244 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
242 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 245 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
243 echo $query > $SHOW_DIR/$bn.queries 246 echo $query > $SHOW_DIR/$bn.queries
244 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT 247 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp
245 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 248 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
246 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 249 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
247 rm $SOLR_RES/*.tmp > /dev/null 2>&1 250 rm $SOLR_RES/*.tmp > /dev/null 2>&1
248 fi 251 fi
249 252
250 if [ $CHECK -eq 1 ] 253 if [ $CHECK -eq 1 ]
251 then 254 then
252 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 255 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
253 then 256 then
254 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 257 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1
255 fi 258 fi
256 fi 259 fi
257 260
258 done 261 done
259 262
260 #----------------------------------------------------------------------------------------------- 263 #-----------------------------------------------------------------------------------------------
261 # Build trigger file 264 # Build trigger file
262 # 1) keywords are automatically boosted in the non confident zone of the current res 265 # 1) keywords are automatically boosted in the non confident zone of the current res
263 # confident zone are boosted 266 # confident zone are boosted
264 # previous words in sensible zone are penalized 267 # previous words in sensible zone are penalized
265 # 2) OOVs are extracted + phonetized 268 # 2) OOVs are extracted + phonetized
266 # 3) Try to find OOVs acousticly in the current segment 269 # 3) Try to find OOVs acousticly in the current segment
267 # 4) Generate the .trigg file 270 # 4) Generate the .trigg file
268 #------------------------------------------------------------------------------------------------ 271 #------------------------------------------------------------------------------------------------
269 print_info "Build trigger files" 1 272 print_info "[${BASENAME}] Build trigger files" 1
270 for i in `ls $SOLR_RES/*.keywords` 273 for i in `ls $SOLR_RES/*.keywords`
271 do 274 do
272 basename=`basename $i .keywords` 275 basename=`basename $i .keywords`
273 276
274 # 277 #
275 # Tokenize & produce coverage report 278 # Tokenize & produce coverage report
276 # Use filter you need 279 # Use filter you need
277 # 280 #
278 print_info "keywords filtering and produce coverage report" 2 281 print_info "[${BASENAME}] keywords filtering and produce coverage report" 2
279 # Default filter 282 # Default filter
280 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 283 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
281 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 284 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
282 # do less filter 285 # do less filter
283 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 286 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
284 287
285 288
286 # 289 #
287 # Extract "real" OOV and phonetize them 290 # Extract "real" OOV and phonetize them
288 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 291 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
289 # 292 #
290 print_info "Extract OOV and phonetize them" 2 293 print_info "[${BASENAME}] Extract OOV and phonetize them" 2
291 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 294 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
292 295
293 # 296 #
294 # Search INVOC & OOV in the current lattice 297 # Search INVOC & OOV in the current lattice
295 # 298 #
296 print_info "Search INVOC and OOV in the current lattice" 2 299 print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 2
297 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 300 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
298 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 301 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
299 302
300 # For each treil 303 # For each treil
301 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 304 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
302 do 305 do
303 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION 306 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
304 # 307 #
305 # Produce the boost file for the next decoding pass 308 # Produce the boost file for the next decoding pass
306 # 309 #
307 print_info "Produce trigg file : $baseseg " 3 310 print_info "[${BASENAME}] Produce trigg file : $baseseg " 3
308 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 311 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
309 done 312 done
310 313
311 done 314 done
312 315
313 #----------------------------------------------------------------------------------------------- 316 #-----------------------------------------------------------------------------------------------
314 # Build the extended SPEERAL Lexicon 317 # Build the extended SPEERAL Lexicon
315 # 1) Merge OOVs + LEXICON 318 # 1) Merge OOVs + LEXICON
316 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 319 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
317 # 2) The current lexicon is extended with all the valid OOVs 320 # 2) The current lexicon is extended with all the valid OOVs
318 #----------------------------------------------------------------------------------------------- 321 #-----------------------------------------------------------------------------------------------
319 print_info "Build extended Speeral Lexicon" 1 322 print_info "[${BASENAME}] Build extended Speeral Lexicon" 1
320 mkdir -p $EXT_LEX/final 323 mkdir -p $EXT_LEX/final
321 mkdir -p $EXT_LEX/tmp 324 mkdir -p $EXT_LEX/tmp
322 mkdir -p $EXT_LEX/tmp/txt 325 mkdir -p $EXT_LEX/tmp/txt
323 # 326 #
324 # Collect the acousticly found oov and their phonetisation 327 # Collect the acousticly found oov and their phonetisation
325 # 328 #
326 print_info "Get all OOV and retrieve all phonetisation" 2 329 print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 2
327 for i in `ls $SOLR_RES/*.phon_oov` 330 for i in `ls $SOLR_RES/*.phon_oov`
328 do 331 do
329 basename=`basename $i .phon_oov` 332 basename=`basename $i .phon_oov`
330 333
331 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 334 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
332 # list acousticly found for the show 335 # list acousticly found for the show
333 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 336 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
334 do 337 do
335 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 338 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
336 done 339 done
337 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 340 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
338 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 341 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
339 342
340 # 343 #
341 # Extract OOV really added 344 # Extract OOV really added
342 # 345 #
343 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 346 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
344 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT 347 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound
345 # 348 #
346 # Retrieve all phonetisation 349 # Retrieve all phonetisation
347 # 350 #
348 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 351 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
349 done 352 done
350 353
351 # 354 #
352 # Merge OOVs and their phonetisation 355 # Merge OOVs and their phonetisation
353 # 356 #
354 print_info "Merge OOV and their phonetisation" 2 357 print_info "[${BASENAME}] Merge OOV and their phonetisation" 2
355 lexname=$(basename $LEXICON) 358 lexname=$(basename $LEXICON)
356 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 359 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
357 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 360 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
358 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT 361 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon
359 362
360 # 363 #
361 # Collect + clean retrieved txt 364 # Collect + clean retrieved txt
362 # 365 #
363 print_info "Collect and clean SOLR txt answers" 2 366 print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2
364 # choose filter 367 # choose filter
365 # default 368 # default
366 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 369 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
367 # low filter 370 # low filter
368 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 371 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
369 372
370 # 373 #
371 # Construct the map file 374 # Construct the map file
372 # 375 #
373 # Notes: 376 # Notes:
374 # - Expected format : 377 # - Expected format :
375 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 378 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
376 # 379 #
377 print_info "Construct map file" 2 380 print_info "[${BASENAME}] Construct map file" 2
378 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 381 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
379 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 382 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
380 383
381 while read oov 384 while read oov
382 do 385 do
383 oov=`echo $oov | sed "s/\n//g"` 386 oov=`echo $oov | sed "s/\n//g"`
384 # 387 #
385 # Obtain the oov's tag 388 # Obtain the oov's tag
386 # 389 #
387 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 390 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
388 # 391 #
389 # Try to collect text containing the oov word 392 # Try to collect text containing the oov word
390 # 393 #
391 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 394 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
392 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 395 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
393 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 396 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
394 if [ $nbWords -eq 0 ]; then 397 if [ $nbWords -eq 0 ]; then
395 echo "UNVALID OOV: $oov => $nbWords occurrences" 398 echo "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences"
396 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 399 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
397 else 400 else
398 # 401 #
399 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 402 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
400 # 403 #
401 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 404 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
402 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 405 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
403 if [ ! x$candidate = "x" ]; then 406 if [ ! x$candidate = "x" ]; then
404 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 407 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
405 while read phonLine 408 while read phonLine
406 do 409 do
407 #<word> <phon> => <word> <candidate> <phon> 410 #<word> <phon> => <word> <candidate> <phon>
408 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 411 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
409 done < $EXT_LEX/tmp/$oov.phon 412 done < $EXT_LEX/tmp/$oov.phon
410 else 413 else
411 echo "UNVALID OOV: $oov => no availaible Candidate word in LM" 414 echo "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM"
412 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 415 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
413 fi 416 fi
414 fi 417 fi
415 else 418 else
416 echo "UNVALID OOV: $oov" 419 echo "[${BASENAME}] UNVALID OOV: $oov"
417 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 420 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
418 fi 421 fi
419 done < $EXT_LEX/final/all.oov_acousticlyfound 422 done < $EXT_LEX/final/all.oov_acousticlyfound
420 423
421 # 424 #
422 ### Speeral 425 ### Speeral
423 # 426 #
424 427
425 lexname=`basename $LEXICON` 428 lexname=`basename $LEXICON`
426 # 429 #
427 # Build the final trigger file 430 # Build the final trigger file
428 # 431 #
429 print_info "Clean trigg files" 2 432 print_info "[${BASENAME}] Clean trigg files" 2
430 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 433 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
431 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 434 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
432 for i in `ls $TRIGGER_CONFZONE/*.trigg` 435 for i in `ls $TRIGGER_CONFZONE/*.trigg`
433 do 436 do
434 basename=`basename $i .trigg` 437 basename=`basename $i .trigg`
435 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 438 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
436 done 439 done
437 # 440 #
438 # Compile the speeral extended lexicon 441 # Compile the speeral extended lexicon
439 # 442 #
440 print_info "Compile Speeral extended lexicon" 2 443 print_info "[${BASENAME}] Compile Speeral extended lexicon" 2
441 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT 444 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext
442 445
443 if [ $CHECK -eq 1 ] 446 if [ $CHECK -eq 1 ]
444 then 447 then
445 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 448 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
446 if [ $? -eq 1 ] 449 if [ $? -eq 1 ]
447 then 450 then
448 echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE 451 echo -e "[${BASENAME}] ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE
449 exit 1; 452 exit 1;
450 fi 453 fi
451 fi 454 fi
452 455
453 456
454 #-------# 457 #-------#
455 # CLOSE # 458 # CLOSE #
456 #-------# 459 #-------#
457 # Seem OK 460 # Seem OK
458 print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 461 print_info "[${BASENAME}] <= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1
459 echo -e "#Solr $BASENAME " >> $LOGFILE 462 echo -e "[${BASENAME}] #Solr $BASENAME " >> $LOGFILE
460 463
461 # unlock directory 464 # unlock directory
462 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 465 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
main_tools/FirstPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : FirstPass.sh # 4 # File : FirstPass.sh #
5 # Brief : ASR first pass and speaker diarization # 5 # Brief : ASR first pass and speaker diarization #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and Hugo Mauchrétien works) # 8 # and Hugo Mauchrétien works) #
9 # Version : 1.1 # 9 # Version : 1.1 #
10 # Date : 18/06/13 # 10 # Date : 18/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 echo "### FirstPass.sh ###" 13 echo "### FirstPass.sh ###"
14 14
15 # Check OTMEDIA_HOME env var 15 # Check OTMEDIA_HOME env var
16 if [ -z ${OTMEDIA_HOME} ] 16 if [ -z ${OTMEDIA_HOME} ]
17 then 17 then
18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
19 export OTMEDIA_HOME=$OTMEDIA_HOME 19 export OTMEDIA_HOME=$OTMEDIA_HOME
20 fi 20 fi
21 21
22 # where is FirstPass.sh 22 # where is FirstPass.sh
23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
24 24
25 # scripts path 25 # scripts path
26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
27 27
28 # Include scripts 28 # Include scripts
29 . $SCRIPT_PATH"/Tools.sh" 29 . $SCRIPT_PATH"/Tools.sh"
30 . $SCRIPT_PATH"/CheckFirstPass.sh" 30 . $SCRIPT_PATH"/CheckFirstPass.sh"
31 31
32 # where is FirstPass.cfg 32 # where is FirstPass.cfg
33 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" 33 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg"
34 if [ -e $FIRSTPASS_CONFIG_FILE ] 34 if [ -e $FIRSTPASS_CONFIG_FILE ]
35 then 35 then
36 . $FIRSTPASS_CONFIG_FILE 36 . $FIRSTPASS_CONFIG_FILE
37 else 37 else
38 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" > /dev/stderr 38 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" > /dev/stderr
39 echo "exit" > /dev/stderr 39 echo "exit" > /dev/stderr
40 exit 1 40 exit 1
41 fi 41 fi
42 42
43 #---------------# 43 #---------------#
44 # Parse Options # 44 # Parse Options #
45 #---------------# 45 #---------------#
46 while getopts ":hDv:cf:r" opt 46 while getopts ":hDv:cf:r" opt
47 do 47 do
48 case $opt in 48 case $opt in
49 h) 49 h)
50 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" 50 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n"
51 echo -e "\t Options:" 51 echo -e "\t Options:"
52 echo -e "\t\t-h :\tprint this message" 52 echo -e "\t\t-h :\tprint this message"
53 echo -e "\t\t-D :\tDEBUG mode on" 53 echo -e "\t\t-D :\tDEBUG mode on"
54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
55 echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" 55 echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected"
56 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 56 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
57 echo -e "\t\t-r :\tforce rerun the wav file" 57 echo -e "\t\t-r :\tforce rerun the wav file"
58 exit 1 58 exit 1
59 ;; 59 ;;
60 D) 60 D)
61 DEBUG=1 61 DEBUG=1
62 ;; 62 ;;
63 v) 63 v)
64 VERBOSE=$OPTARG 64 VERBOSE=$OPTARG
65 ;; 65 ;;
66 c) 66 c)
67 CHECK=1 67 CHECK=1
68 ;; 68 ;;
69 f) 69 f)
70 FORKS="--forks $OPTARG" 70 FORKS="--forks $OPTARG"
71 ;; 71 ;;
72 r) 72 r)
73 RERUN=1 73 RERUN=1
74 ;; 74 ;;
75 :) 75 :)
76 echo "Option -$OPTARG requires an argument." > /dev/stderr 76 echo "Option -$OPTARG requires an argument." > /dev/stderr
77 exit 1 77 exit 1
78 ;; 78 ;;
79 \?) 79 \?)
80 echo "BAD USAGE : unknow opton -$OPTARG" 80 echo "BAD USAGE : unknow opton -$OPTARG"
81 exit 1 81 exit 1
82 ;; 82 ;;
83 esac 83 esac
84 done 84 done
85 85
86 # mode debug enable 86 # mode debug enable
87 if [ $DEBUG -eq 1 ] 87 if [ $DEBUG -eq 1 ]
88 then 88 then
89 set -x 89 set -x
90 echo -e "## Mode DEBUG ON ##" 90 echo -e "## Mode DEBUG ON ##"
91 REDIRECTION_OUTPUT=""
92 else
93 REDIRECTION_OUTPUT=" 2> /dev/null"
94 fi 91 fi
95 92
96 # mode verbose enable 93 # mode verbose enable
97 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
98 95
99 # Check USAGE by arguments number 96 # Check USAGE by arguments number
100 if [ $(($#-($OPTIND-1))) -ne 2 ] 97 if [ $(($#-($OPTIND-1))) -ne 2 ]
101 then 98 then
102 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" 99 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>"
103 echo "$0 -h for more info" 100 echo "$0 -h for more info"
104 exit 1 101 exit 1
105 fi 102 fi
106 103
107 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
108 # check audio file - First argument 105 # check audio file - First argument
109 if [ -e $1 ] && [ -s $1 ] 106 if [ -e $1 ] && [ -s $1 ]
110 then 107 then
111 # absolute path to wav file 108 # absolute path to wav file
112 WAV_FILE=$(readlink -e $1) 109 WAV_FILE=$(readlink -e $1)
113 # wav filename 110 # wav filename
114 FILENAME=$(basename $WAV_FILE) 111 FILENAME=$(basename $WAV_FILE)
115 # wav filename without extension 112 # wav filename without extension
116 BASENAME=${FILENAME%.*} 113 BASENAME=${FILENAME%.*}
117 114
118 print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 115 print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1
119 print_info "[${BASENAME}] $WAV_FILE OK" 2 116 print_info "[${BASENAME}] $WAV_FILE OK" 2
120 else 117 else
121 print_error "can't find $1 OR file is empty" 118 print_error "can't find $1 OR file is empty"
122 exit 1 119 exit 1
123 fi 120 fi
124 121
125 # check output directory - Second argument 122 # check output directory - Second argument
126 if [ ! -e $2 ] 123 if [ ! -e $2 ]
127 then 124 then
128 mkdir -p $2 125 mkdir -p $2
129 print_info "[${BASENAME}] Make directory $2" 2 126 print_info "[${BASENAME}] Make directory $2" 2
130 fi 127 fi
131 128
132 129
133 #-------------# 130 #-------------#
134 # GLOBAL VARS # 131 # GLOBAL VARS #
135 #-------------# 132 #-------------#
136 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path 133 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path
137 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME 134 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME
138 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file 135 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file
139 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory 136 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory
140 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file 137 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file
141 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file 138 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file
142 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" 139 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1"
143 LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" 140 LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log"
144 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" 141 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log"
145 142
146 #------------------# 143 #------------------#
147 # Create WORKSPACE # 144 # Create WORKSPACE #
148 #------------------# 145 #------------------#
149 if [ ! -e $OUTPUT_DIR_BASENAME ] 146 if [ ! -e $OUTPUT_DIR_BASENAME ]
150 then 147 then
151 mkdir -p $OUTPUT_DIR_BASENAME 148 mkdir -p $OUTPUT_DIR_BASENAME
152 print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 149 print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2
153 fi 150 fi
154 151
155 # Lock directory 152 # Lock directory
156 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi 153 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi
157 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 154 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1
158 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 155 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1
159 156
160 rm -r $PLP_DIR > /dev/null 2>&1; 157 rm -r $PLP_DIR > /dev/null 2>&1;
161 mkdir -p $PLP_DIR 158 mkdir -p $PLP_DIR
162 print_info "[${BASENAME}] Make directory $PLP_DIR" 2 159 print_info "[${BASENAME}] Make directory $PLP_DIR" 2
163 if [ $RERUN -eq 0 ]; 160 if [ $RERUN -eq 0 ];
164 then 161 then
165 rm -r $RES_DIR > /dev/null 2>&1; 162 rm -r $RES_DIR > /dev/null 2>&1;
166 else 163 else
167 rm $RES_DIR/*.lock > /dev/null 2>&1 164 rm $RES_DIR/*.lock > /dev/null 2>&1
168 fi 165 fi
169 mkdir -p $RES_DIR > /dev/null 2>&1 166 mkdir -p $RES_DIR > /dev/null 2>&1
170 print_info "[${BASENAME}] Make directory $RES_DIR" 2 167 print_info "[${BASENAME}] Make directory $RES_DIR" 2
171 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 168 rm $LOGFILE $ERRORFILE > /dev/null 2>&1
172 169
173 #--------------------# 170 #--------------------#
174 # Save configuration # 171 # Save configuration #
175 #--------------------# 172 #--------------------#
176 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg 173 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg
177 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 174 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
178 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 175 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
179 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 176 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
180 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 177 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
181 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 178 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
182 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 179 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
183 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 180 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
184 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 181 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
185 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 182 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
186 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 183 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
187 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 184 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1
188 185
189 #-------------------------# 186 #-------------------------#
190 # Check Audio File Format # 187 # Check Audio File Format #
191 #-------------------------# 188 #-------------------------#
192 error=0 189 error=0
193 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") 190 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz")
194 if [ -z "$temp" ]; then error=1; fi 191 if [ -z "$temp" ]; then error=1; fi
195 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") 192 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels")
196 if [ -z "$temp" ]; then error=1; fi 193 if [ -z "$temp" ]; then error=1; fi
197 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") 194 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16")
198 if [ -z "$temp" ]; then error=1; fi 195 if [ -z "$temp" ]; then error=1; fi
199 196
200 if [ $error -eq 1 ] 197 if [ $error -eq 1 ]
201 then 198 then
202 print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" 199 print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert"
203 print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" 200 print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav"
204 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT 201 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav
205 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav 202 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav
206 FILENAME=$BASENAME.wav 203 FILENAME=$BASENAME.wav
207 print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" 204 print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE"
208 fi 205 fi
209 206
210 #---------------# 207 #---------------#
211 # Get SRT file # 208 # Get SRT file #
212 #---------------# 209 #---------------#
213 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] 210 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ]
214 then 211 then
215 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT 212 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT
216 print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 213 print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3
217 fi 214 fi
218 215
219 #------------# 216 #------------#
220 # WAV -> PLP # 217 # WAV -> PLP #
221 #------------# 218 #------------#
222 print_info "[${BASENAME}] convert WAV -> PLP" 1 219 print_info "[${BASENAME}] convert WAV -> PLP" 1
223 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp 220 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp
224 print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 221 print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3
225 222
226 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT 223 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms
227 224
228 if [ $CHECK -eq 1 ] 225 if [ $CHECK -eq 1 ]
229 then 226 then
230 check_first_pass_plp "$PLP_FILE" 227 check_first_pass_plp "$PLP_FILE"
231 if [ $? -eq 1 ] 228 if [ $? -eq 1 ]
232 then 229 then
233 print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" 230 print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE"
234 echo "[${BASENAME}] ERROR : ${BASENAME} check $ERRORFILE file" > /dev/stderr 231 echo "[${BASENAME}] ERROR : ${BASENAME} check $ERRORFILE file" > /dev/stderr
235 exit 1 232 exit 1
236 fi 233 fi
237 fi 234 fi
238 235
239 rm $OUTPUT_DIR_BASENAME/list.tmp 236 rm $OUTPUT_DIR_BASENAME/list.tmp
240 237
241 #------------------------------# 238 #------------------------------#
242 # S/NS + SPEAKERS SEGMENTATION # 239 # S/NS + SPEAKERS SEGMENTATION #
243 #------------------------------# 240 #------------------------------#
244 print_info "[${BASENAME}] Launch speakers diarization" 1 241 print_info "[${BASENAME}] Launch speakers diarization" 1
245 # Calcul seg file 242 # Calcul seg file
246 print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 243 print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3
247 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME 244 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME
248 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering 245 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering
249 246
250 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) 247 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] )
251 then 248 then
252 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" 249 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE"
253 print_error "[${BASENAME}] ERROR : check $ERRORFILE file" 250 print_error "[${BASENAME}] ERROR : check $ERRORFILE file"
254 exit 1 251 exit 1
255 fi 252 fi
256 253
257 254
258 # Create LBL file 255 # Create LBL file
259 print_info "Extract LBL file from SEG file" 1 256 print_info "Extract LBL file from SEG file" 1
260 257
261 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE 258 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE
262 259
263 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) 260 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] )
264 then 261 then
265 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" 262 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE"
266 print_error "[${BASENAME}] ERROR : check $ERRORFILE file" 263 print_error "[${BASENAME}] ERROR : check $ERRORFILE file"
267 exit 1 264 exit 1
268 fi 265 fi
269 266
270 267
271 #----------------------------------------------------# 268 #----------------------------------------------------#
272 # Cut global PLP file depending to LBL segmentations # 269 # Cut global PLP file depending to LBL segmentations #
273 #----------------------------------------------------# 270 #----------------------------------------------------#
274 print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 271 print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1
275 print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 272 print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3
276 273
277 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT 274 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG
278 275
279 if [ $CHECK -eq 1 ] 276 if [ $CHECK -eq 1 ]
280 then 277 then
281 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE 278 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE
282 if [ $? -eq 1 ] 279 if [ $? -eq 1 ]
283 then 280 then
284 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" 281 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files"
285 print_error "[${BASENAME}] ERROR : check $ERRORFILE file" 282 print_error "[${BASENAME}] ERROR : check $ERRORFILE file"
286 exit 1 283 exit 1
287 fi 284 fi
288 fi 285 fi
289 286
290 # change plp files names 287 # change plp files names
291 cd $PLP_DIR; 288 cd $PLP_DIR;
292 rename -f s/_/#/g *plp 289 rename -f s/_/#/g *plp
293 rename -f s/#/_/ *plp 290 rename -f s/#/_/ *plp
294 cd $OLDPWD 291 cd $OLDPWD
295 292
296 #---------------------------------------------# 293 #---------------------------------------------#
297 # PLP files list depending to acoustic models # 294 # PLP files list depending to acoustic models #
298 #---------------------------------------------# 295 #---------------------------------------------#
299 print_info "[${BASENAME}] Create PLP list depending of the model" 1 296 print_info "[${BASENAME}] Create PLP list depending of the model" 1
300 # Create a list of plp files 297 # Create a list of plp files
301 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst 298 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst
302 299
303 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 300 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1
304 for (( i=0; $i<${#MTAG[@]} ; i++ )) 301 for (( i=0; $i<${#MTAG[@]} ; i++ ))
305 do 302 do
306 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` 303 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst`
307 if [ -n "$a" ]; then 304 if [ -n "$a" ]; then
308 print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 305 print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3
309 grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 306 grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
310 fi 307 fi
311 done 308 done
312 309
313 #-----------------------# 310 #-----------------------#
314 # First Pass (DECODING) # 311 # First Pass (DECODING) #
315 #-----------------------# 312 #-----------------------#
316 # 313 #
317 # For all AM do decoding 314 # For all AM do decoding
318 # if Check error -> iter on undone decoding (max 1 times) 315 # if Check error -> iter on undone decoding (max 1 times)
319 # 316 #
320 print_info "[${BASENAME}] Launch decoding" 1 317 print_info "[${BASENAME}] Launch decoding" 1
321 for (( i=0; $i<${#MTAG[@]} ; i++ )) 318 for (( i=0; $i<${#MTAG[@]} ; i++ ))
322 do 319 do
323 redo=1; # nb of try if not all segs is decoded 320 redo=1; # nb of try if not all segs is decoded
324 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] 321 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]
325 then 322 then
326 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 323 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
327 while [ $redo -gt 0 ]; do 324 while [ $redo -gt 0 ]; do
328 rm $RES_DIR/*.lock > /dev/null 2>&1 325 rm $RES_DIR/*.lock > /dev/null 2>&1
329 print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 326 print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3
330 # Run speeral 327 # Run speeral
331 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT 328 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock
332 329
333 # Check if error 330 # Check if error
334 if [ $CHECK -eq 1 ] 331 if [ $CHECK -eq 1 ]
335 then 332 then
336 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" 333 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR"
337 # if error 334 # if error
338 if [ $? -eq 1 ] 335 if [ $? -eq 1 ]
339 then 336 then
340 # rerun 337 # rerun
341 redo=$(($redo - 1)); 338 redo=$(($redo - 1));
342 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 339 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2
343 print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" 340 print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst"
344 # new plp list 341 # new plp list
345 # list .seg done and compare to list of seg to do 342 # list .seg done and compare to list of seg to do
346 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 343 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
347 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst 344 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst
348 rm ${OUTPUT_DIR_BASENAME}/.tmp 345 rm ${OUTPUT_DIR_BASENAME}/.tmp
349 # log seg to do 346 # log seg to do
350 print_log_file $LOGFILE "Segs not done :" 347 print_log_file $LOGFILE "Segs not done :"
351 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE 348 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE
352 todo=${OUTPUT_DIR_BASENAME}/todo.lst 349 todo=${OUTPUT_DIR_BASENAME}/todo.lst
353 print_warn "[${BASENAME}] Try $redo" 3 350 print_warn "[${BASENAME}] Try $redo" 3
354 fi 351 fi
355 fi 352 fi
356 done 353 done
357 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 354 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1
358 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 355 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
359 rm $RES_DIR/*.lock > /dev/null 2>&1 356 rm $RES_DIR/*.lock > /dev/null 2>&1
360 fi 357 fi
361 done 358 done
362 359
363 ## Check missing seg and log it 360 ## Check missing seg and log it
364 if [ $CHECK -eq 1 ] 361 if [ $CHECK -eq 1 ]
365 then 362 then
366 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 363 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
367 todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) 364 todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l)
368 if [ $todo -eq 0 ]; then todo=1;fi 365 if [ $todo -eq 0 ]; then todo=1;fi
369 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) 366 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l)))
370 pourcentage=$((($notdone*100)/$todo)) 367 pourcentage=$((($notdone*100)/$todo))
371 368
372 if [ $notdone -ne 0 ] 369 if [ $notdone -ne 0 ]
373 then 370 then
374 print_error "[${BASENAME}] ERROR : check $ERRORFILE" 371 print_error "[${BASENAME}] ERROR : check $ERRORFILE"
375 print_log_file "$ERRORFILE" "ERROR : Segs not done [" 372 print_log_file "$ERRORFILE" "ERROR : Segs not done ["
376 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE 373 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE
377 print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" 374 print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME"
378 else 375 else
379 print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 376 print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')"
380 fi 377 fi
381 rm ${OUTPUT_DIR_BASENAME}/.tmp 378 rm ${OUTPUT_DIR_BASENAME}/.tmp
382 fi 379 fi
383 380
384 #---------------# 381 #---------------#
385 # Convert res # 382 # Convert res #
386 #---------------# 383 #---------------#
387 print_info "[${BASENAME}] Convert .res into .ctm" 1 384 print_info "[${BASENAME}] Convert .res into .ctm" 1
388 # .res => .ctm 385 # .res => .ctm
389 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT 386 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm
390 print_info "[${BASENAME}] Convert .res into .trs" 1 387 print_info "[${BASENAME}] Convert .res into .trs" 1
391 # .res => .trs 388 # .res => .trs
392 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 389 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg
393 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT 390 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg
394 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null 391 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null
395 print_info "[${BASENAME}] Convert .res into .txt" 1 392 print_info "[${BASENAME}] Convert .res into .txt" 1
396 # .res => .txt 393 # .res => .txt
397 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT 394 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt
398 395
399 print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 396 print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1
400 # unlock directory 397 # unlock directory
401 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" 398 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock"
402 399
main_tools/SecondPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : SecondPass.sh # 4 # File : SecondPass.sh #
5 # Brief : Speaker adaptation + ASR second pass # 5 # Brief : Speaker adaptation + ASR second pass #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and Hugo Mauchrétien works) # 8 # and Hugo Mauchrétien works) #
9 # Version : 1.1 # 9 # Version : 1.1 #
10 # Date : 18/06/13 # 10 # Date : 18/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 echo "### SecondPass.sh ###" 13 echo "### SecondPass.sh ###"
14 14
15 # Check OTMEDIA_HOME env var 15 # Check OTMEDIA_HOME env var
16 if [ -z ${OTMEDIA_HOME} ] 16 if [ -z ${OTMEDIA_HOME} ]
17 then 17 then
18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
19 export OTMEDIA_HOME=$OTMEDIA_HOME 19 export OTMEDIA_HOME=$OTMEDIA_HOME
20 fi 20 fi
21 21
22 # where is SecondPass.sh 22 # where is SecondPass.sh
23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
24 24
25 # Scripts Path 25 # Scripts Path
26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
27 27
28 # Include scripts 28 # Include scripts
29 . $SCRIPT_PATH"/Tools.sh" 29 . $SCRIPT_PATH"/Tools.sh"
30 . $SCRIPT_PATH"/CheckSecondPass.sh" 30 . $SCRIPT_PATH"/CheckSecondPass.sh"
31 31
32 # where is SecondPass.cfg 32 # where is SecondPass.cfg
33 SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" 33 SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg"
34 if [ -e $SECONDPASS_CONFIG_FILE ] 34 if [ -e $SECONDPASS_CONFIG_FILE ]
35 then 35 then
36 . $SECONDPASS_CONFIG_FILE 36 . $SECONDPASS_CONFIG_FILE
37 else 37 else
38 echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr 38 echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr
39 echo "exit" > /dev/stderr 39 echo "exit" > /dev/stderr
40 exit 1 40 exit 1
41 fi 41 fi
42 42
43 #---------------# 43 #---------------#
44 # Parse Options # 44 # Parse Options #
45 #---------------# 45 #---------------#
46 while getopts ":hDv:crf:" opt 46 while getopts ":hDv:crf:" opt
47 do 47 do
48 case $opt in 48 case $opt in
49 h) 49 h)
50 echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" 50 echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n"
51 echo -e "\t Options:" 51 echo -e "\t Options:"
52 echo -e "\t\t-h :\tprint this message" 52 echo -e "\t\t-h :\tprint this message"
53 echo -e "\t\t-D :\tDEBUG mode on" 53 echo -e "\t\t-D :\tDEBUG mode on"
54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
55 echo -e "\t\t-c :\t Check process, stop if error detected" 55 echo -e "\t\t-c :\t Check process, stop if error detected"
56 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" 56 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)"
57 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" 57 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done"
58 exit 1 58 exit 1
59 ;; 59 ;;
60 D) 60 D)
61 DEBUG=1 61 DEBUG=1
62 ;; 62 ;;
63 v) 63 v)
64 VERBOSE=$OPTARG 64 VERBOSE=$OPTARG
65 ;; 65 ;;
66 c) 66 c)
67 CHECK=1 67 CHECK=1
68 ;; 68 ;;
69 f) 69 f)
70 FORKS="--forks $OPTARG" 70 FORKS="--forks $OPTARG"
71 ;; 71 ;;
72 r) 72 r)
73 RERUN=1 73 RERUN=1
74 ;; 74 ;;
75 :) 75 :)
76 echo "Option -$OPTARG requires an argument." > /dev/stderr 76 echo "Option -$OPTARG requires an argument." > /dev/stderr
77 exit 1 77 exit 1
78 ;; 78 ;;
79 \?) 79 \?)
80 echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr 80 echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr
81 exit 1 81 exit 1
82 ;; 82 ;;
83 esac 83 esac
84 done 84 done
85 85
86 # mode debug enable 86 # mode debug enable
87 if [ $DEBUG -eq 1 ] 87 if [ $DEBUG -eq 1 ]
88 then 88 then
89 set -x 89 set -x
90 echo -e "## Mode DEBUG ON ##" 90 echo -e "## Mode DEBUG ON ##"
91 REDIRECTION_OUTPUT=""
92 else
93 REDIRECTION_OUTPUT=" 2> /dev/null"
94 fi 91 fi
95 92
96 # mode verbose enable 93 # mode verbose enable
97 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
98 95
99 # Check USAGE by arguments number 96 # Check USAGE by arguments number
100 if [ $(($#-($OPTIND-1))) -ne 1 ] 97 if [ $(($#-($OPTIND-1))) -ne 1 ]
101 then 98 then
102 echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" 99 echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>"
103 echo "$0 -h for more info" 100 echo "$0 -h for more info"
104 exit 1 101 exit 1
105 fi 102 fi
106 103
107 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
108 # check FirstPass directory - First argument 105 # check FirstPass directory - First argument
109 if [ -e $1 ] && [ -d $1 ] 106 if [ -e $1 ] && [ -d $1 ]
110 then 107 then
111 FIRSTPASS_DIR=$(readlink -e $1) 108 FIRSTPASS_DIR=$(readlink -e $1)
112 else 109 else
113 print_error "can't find $1 directory" 110 print_error "can't find $1 directory"
114 exit 1 111 exit 1
115 fi 112 fi
116 113
117 print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1 114 print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1
118 115
119 #-------------# 116 #-------------#
120 # GLOBAL VARS # 117 # GLOBAL VARS #
121 #-------------# 118 #-------------#
122 FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" 119 FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg"
123 if [ -e $FIRSTPASS_CONFIG_FILE ] 120 if [ -e $FIRSTPASS_CONFIG_FILE ]
124 then 121 then
125 WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") 122 WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=")
126 BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") 123 BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=")
127 OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") 124 OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=")
128 OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR 125 OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR
129 PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") 126 PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=")
130 PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") 127 PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=")
131 SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") 128 SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=")
132 LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") 129 LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=")
133 RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") 130 RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=")
134 else 131 else
135 print_error "can't find $FIRSTPASS_CONFIG_FILE file" 132 print_error "can't find $FIRSTPASS_CONFIG_FILE file"
136 print_error "exit" 133 print_error "exit"
137 exit 1 134 exit 1
138 fi 135 fi
139 LST=$OUTPUT_DIR_BASENAME"/lists" 136 LST=$OUTPUT_DIR_BASENAME"/lists"
140 HMM=$OUTPUT_DIR_BASENAME"/hmm/" 137 HMM=$OUTPUT_DIR_BASENAME"/hmm/"
141 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" 138 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2"
142 LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log" 139 LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log"
143 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log" 140 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log"
144 141
145 #------------------# 142 #------------------#
146 # Create WORKSPACE # 143 # Create WORKSPACE #
147 #------------------# 144 #------------------#
148 145
149 # Lock directory 146 # Lock directory
150 if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi 147 if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi
151 rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 148 rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1
152 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 149 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1
153 150
154 rm -r $LST > /dev/null 2>&1 151 rm -r $LST > /dev/null 2>&1
155 mkdir -p $LST 152 mkdir -p $LST
156 print_info "[${BASENAME}] Make directory $LST" 2 153 print_info "[${BASENAME}] Make directory $LST" 2
157 if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi 154 if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi
158 mkdir -p $HMM 155 mkdir -p $HMM
159 print_info "[${BASENAME}] Make directory $HMM" 2 156 print_info "[${BASENAME}] Make directory $HMM" 2
160 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi 157 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi
161 mkdir -p $RES_DIR > /dev/null 2>&1 158 mkdir -p $RES_DIR > /dev/null 2>&1
162 print_info "[${BASENAME}] Make directory $RES_DIR" 2 159 print_info "[${BASENAME}] Make directory $RES_DIR" 2
163 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 160 rm $LOGFILE $ERRORFILE > /dev/null 2>&1
164 161
165 #-------------------# 162 #-------------------#
166 # Check Pass # 163 # Check Pass #
167 #-------------------# 164 #-------------------#
168 print_info "[${BASENAME}] Check Pass 2 directory" 1 165 print_info "[${BASENAME}] Check Pass 2 directory" 1
169 for treil in $(ls $RES_DIR/ | grep treil) 166 for treil in $(ls $RES_DIR/ | grep treil)
170 do 167 do
171 if [ ! -s $RES_DIR/$treil ] 168 if [ ! -s $RES_DIR/$treil ]
172 then 169 then
173 bn = $(basename $treil ".treil") 170 bn = $(basename $treil ".treil")
174 rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null 171 rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null
175 print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2 172 print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2
176 fi 173 fi
177 done 174 done
178 175
179 # Check if more then 89% of treil are done 176 # Check if more then 89% of treil are done
180 nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l) 177 nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l)
181 nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l) 178 nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l)
182 if [ $nbres_p1 -gt 0 ] 179 if [ $nbres_p1 -gt 0 ]
183 then 180 then
184 pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) 181 pourcentage=$((($nbtreil_p2*100)/$nbres_p1))
185 if [ $pourcentage -gt 89 ] 182 if [ $pourcentage -gt 89 ]
186 then 183 then
187 print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1 184 print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1
188 exit 0 185 exit 0
189 fi 186 fi
190 else 187 else
191 print_error "[${BASENAME}] No First Pass, No .res -> exit P2" 188 print_error "[${BASENAME}] No First Pass, No .res -> exit P2"
192 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi 189 if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi
193 exit 1 190 exit 1
194 fi 191 fi
195 192
196 #--------------------# 193 #--------------------#
197 # Save configuration # 194 # Save configuration #
198 #--------------------# 195 #--------------------#
199 cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg 196 cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg
200 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 197 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
201 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 198 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
202 echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 199 echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
203 echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 200 echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
204 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 201 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
205 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 202 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
206 echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 203 echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
207 echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 204 echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
208 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 205 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
209 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 206 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1
210 207
211 208
212 #--------------------------------------------------# 209 #--------------------------------------------------#
213 # Speaker Adaptation (AM) + Second pass (DECODING) # 210 # Speaker Adaptation (AM) + Second pass (DECODING) #
214 #--------------------------------------------------# 211 #--------------------------------------------------#
215 print_info "[${BASENAME}] Launch Second Pass" 1 212 print_info "[${BASENAME}] Launch Second Pass" 1
216 213
217 # for all speaker 214 # for all speaker
218 for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) 215 for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq)
219 do 216 do
220 ## get seg file from P1 containing the speaker 217 ## get seg file from P1 containing the speaker
221 find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst 218 find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst
222 print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3 219 print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3
223 if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi 220 if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi
224 221
225 222
226 # for all AM 223 # for all AM
227 for (( i=0; $i<${#MTAG[@]} ; i++ )) 224 for (( i=0; $i<${#MTAG[@]} ; i++ ))
228 do 225 do
229 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then 226 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then
230 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") 227 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst")
231 ## if is the good AM for the speaker 228 ## if is the good AM for the speaker
232 if [ -n "$type" ] 229 if [ -n "$type" ]
233 then 230 then
234 ## HMM adaptation 231 ## HMM adaptation
235 if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) 232 if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
236 then 233 then
237 print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 234 print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3
238 $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ $REDIRECTION_OUTPUT 235 $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/
239 fi 236 fi
240 237
241 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) 238 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
242 then 239 then
243 print_warn "[${BASENAME}] No hmm files created for $speaker" 2 240 print_warn "[${BASENAME}] No hmm files created for $speaker" 2
244 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker" 241 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker"
245 print_error "[${BASENAME}] Check $ERRORFILE" 242 print_error "[${BASENAME}] Check $ERRORFILE"
246 #exit 1 243 #exit 1
247 fi 244 fi
248 245
249 ## cp map files 246 ## cp map files
250 cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map 247 cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map
251 248
252 ## class clustering 249 ## class clustering
253 if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) 250 if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ))
254 then 251 then
255 print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 252 print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3
256 $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls $REDIRECTION_OUTPUT 253 $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls
257 fi 254 fi
258 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) 255 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )
259 then 256 then
260 print_warn "[${BASENAME}] No cls file created for $speaker" 2 257 print_warn "[${BASENAME}] No cls file created for $speaker" 2
261 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers" 258 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers"
262 print_error "[${BASENAME}] Check $ERRORFILE" 259 print_error "[${BASENAME}] Check $ERRORFILE"
263 #exit 1 260 #exit 1
264 fi 261 fi
265 262
266 ## Speeral decoding 263 ## Speeral decoding
267 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] 264 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ]
268 then 265 then
269 print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 266 print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3
270 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT 267 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock
271 else 268 else
272 print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 269 print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2
273 print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 270 print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3
274 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT 271 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock
275 fi 272 fi
276 273
277 if [ $CHECK -eq 1 ] 274 if [ $CHECK -eq 1 ]
278 then 275 then
279 check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" 276 check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR"
280 if [ $? -eq 1 ] 277 if [ $? -eq 1 ]
281 then 278 then
282 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 279 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2
283 print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst" 280 print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst"
284 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 281 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
285 print_log_file $LOGFILE "Segs (and treil) not done :\n[" 282 print_log_file $LOGFILE "Segs (and treil) not done :\n["
286 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $LOGFILE 283 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $LOGFILE
287 print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" 284 print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]"
288 rm ${OUTPUT_DIR_BASENAME}/.tmp 285 rm ${OUTPUT_DIR_BASENAME}/.tmp
289 #exit 1 286 #exit 1
290 fi 287 fi
291 fi 288 fi
292 break 289 break
293 fi 290 fi
294 fi 291 fi
295 done 292 done
296 #rm "$HMM/$speaker.*" > /dev/null 2>&1 293 #rm "$HMM/$speaker.*" > /dev/null 2>&1
297 #rm "$LST/$speaker.lst" > /dev/null 2>&1 294 #rm "$LST/$speaker.lst" > /dev/null 2>&1
298 done 295 done
299 296
300 ## Check missing seg and log it 297 ## Check missing seg and log it
301 if [ $CHECK -eq 1 ] 298 if [ $CHECK -eq 1 ]
302 then 299 then
303 ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 300 ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
304 todo=$(cat ${PLP_FILE_P1} | wc -l) 301 todo=$(cat ${PLP_FILE_P1} | wc -l)
305 if [ $todo -eq 0 ]; then todo=1;fi 302 if [ $todo -eq 0 ]; then todo=1;fi
306 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) 303 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l)))
307 pourcentage=$((($notdone*100)/$todo)) 304 pourcentage=$((($notdone*100)/$todo))
308 if [ $notdone -ne 0 ] 305 if [ $notdone -ne 0 ]
309 then 306 then
310 print_error "[${BASENAME}] ERROR : check $ERRORFILE" 307 print_error "[${BASENAME}] ERROR : check $ERRORFILE"
311 print_log_file "$ERRORFILE" "ERROR : Treil not done [" 308 print_log_file "$ERRORFILE" "ERROR : Treil not done ["
312 diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE 309 diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE
313 print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" 310 print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME"
314 else 311 else
315 print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 312 print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')"
316 fi 313 fi
317 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 314 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1
318 fi 315 fi
319 316
320 #---------------# 317 #---------------#
321 # Convert res # 318 # Convert res #
322 #---------------# 319 #---------------#
323 320
324 print_info "[${BASENAME}] Convert .res into .ctm" 1 321 print_info "[${BASENAME}] Convert .res into .ctm" 1
325 # .res => .ctm 322 # .res => .ctm
326 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm $REDIRECTION_OUTPUT 323 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm
327 print_info "[${BASENAME}] Convert .res into .trs" 1 324 print_info "[${BASENAME}] Convert .res into .trs" 1
328 # .res => .trs 325 # .res => .trs
329 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 326 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
330 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT 327 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
331 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 328 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1
332 print_info "[${BASENAME}] Convert .res into .txt" 1 329 print_info "[${BASENAME}] Convert .res into .txt" 1
333 # .res => .txt 330 # .res => .txt
334 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt $REDIRECTION_OUTPUT 331 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt
335 332
336 print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1 333 print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1
337 # unlock directory 334 # unlock directory
338 mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" 335 mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock"
339 336
340 337
main_tools/ThirdPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ThirdPass.sh # 4 # File : ThirdPass.sh #
5 # Brief : ASR third pass using trigg files # 5 # Brief : ASR third pass using trigg files #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # Version : 1.0 # 7 # Version : 1.0 #
8 # Date : 18/07/13 # 8 # Date : 18/07/13 #
9 ##################################################### 9 #####################################################
10 10
11 echo "### ThirdPass.sh ###" 11 echo "### ThirdPass.sh ###"
12 12
13 # Check OTMEDIA_HOME env var 13 # Check OTMEDIA_HOME env var
14 if [ -z ${OTMEDIA_HOME} ] 14 if [ -z ${OTMEDIA_HOME} ]
15 then 15 then
16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
17 export OTMEDIA_HOME=$OTMEDIA_HOME 17 export OTMEDIA_HOME=$OTMEDIA_HOME
18 fi 18 fi
19 19
20 # where is SecondPass.sh 20 # where is SecondPass.sh
21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
22 22
23 # Scripts Path 23 # Scripts Path
24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
25 25
26 # Include scripts 26 # Include scripts
27 . $SCRIPT_PATH"/Tools.sh" 27 . $SCRIPT_PATH"/Tools.sh"
28 . $SCRIPT_PATH"/CheckThirdPass.sh" 28 . $SCRIPT_PATH"/CheckThirdPass.sh"
29 29
30 # where is ThirdPass.cfg 30 # where is ThirdPass.cfg
31 THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" 31 THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg"
32 if [ -e $THIRDPASS_CONFIG_FILE ] 32 if [ -e $THIRDPASS_CONFIG_FILE ]
33 then 33 then
34 . $THIRDPASS_CONFIG_FILE 34 . $THIRDPASS_CONFIG_FILE
35 else 35 else
36 echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 36 echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2
37 exit 1 37 exit 1
38 fi 38 fi
39 39
40 #---------------# 40 #---------------#
41 # Parse Options # 41 # Parse Options #
42 #---------------# 42 #---------------#
43 while getopts ":hDv:crf:" opt 43 while getopts ":hDv:crf:" opt
44 do 44 do
45 case $opt in 45 case $opt in
46 h) 46 h)
47 echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" 47 echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n"
48 echo -e "\t Options:" 48 echo -e "\t Options:"
49 echo -e "\t\t-h :\tprint this message" 49 echo -e "\t\t-h :\tprint this message"
50 echo -e "\t\t-D :\tDEBUG mode on" 50 echo -e "\t\t-D :\tDEBUG mode on"
51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
52 echo -e "\t\t-c :\t Check process, stop if error detected" 52 echo -e "\t\t-c :\t Check process, stop if error detected"
53 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" 53 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)"
54 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" 54 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done"
55 exit 1 55 exit 1
56 ;; 56 ;;
57 D) 57 D)
58 DEBUG=1 58 DEBUG=1
59 ;; 59 ;;
60 v) 60 v)
61 VERBOSE=$OPTARG 61 VERBOSE=$OPTARG
62 ;; 62 ;;
63 c) 63 c)
64 CHECK=1 64 CHECK=1
65 ;; 65 ;;
66 f) 66 f)
67 FORKS="--forks $OPTARG" 67 FORKS="--forks $OPTARG"
68 ;; 68 ;;
69 r) 69 r)
70 RERUN=1 70 RERUN=1
71 ;; 71 ;;
72 :) 72 :)
73 echo "Option -$OPTARG requires an argument." >&2 73 echo "Option -$OPTARG requires an argument." >&2
74 exit 1 74 exit 1
75 ;; 75 ;;
76 \?) 76 \?)
77 echo "BAD USAGE : unknow opton -$OPTARG" 77 echo "BAD USAGE : unknow opton -$OPTARG"
78 #exit 1 78 #exit 1
79 ;; 79 ;;
80 esac 80 esac
81 done 81 done
82 82
83 # mode debug enable 83 # mode debug enable
84 if [ $DEBUG -eq 1 ] 84 if [ $DEBUG -eq 1 ]
85 then 85 then
86 set -x 86 set -x
87 echo -e "## Mode DEBUG ON ##" 87 echo -e "## Mode DEBUG ON ##"
88 REDIRECTION_OUTPUT=""
89 else
90 REDIRECTION_OUTPUT=" 2> /dev/null"
91 fi 88 fi
92 89
93 # mode verbose enable 90 # mode verbose enable
94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi 91 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi
95 92
96 # Check USAGE by arguments number 93 # Check USAGE by arguments number
97 if [ $(($#-($OPTIND-1))) -ne 1 ] 94 if [ $(($#-($OPTIND-1))) -ne 1 ]
98 then 95 then
99 echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" 96 echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>"
100 echo "$0 -h for more info" 97 echo "$0 -h for more info"
101 exit 1 98 exit 1
102 fi 99 fi
103 100
104 shift $((OPTIND-1)) 101 shift $((OPTIND-1))
105 # check Pass directory - First argument 102 # check Pass directory - First argument
106 if [ -e $1 ] && [ -d $1 ] 103 if [ -e $1 ] && [ -d $1 ]
107 then 104 then
108 PASS_DIR=$(readlink -e $1) 105 PASS_DIR=$(readlink -e $1)
109 else 106 else
110 print_error "can't find $1 directory" 107 print_error "can't find $1 directory"
111 exit 1 108 exit 1
112 fi 109 fi
113 110
114 #-------------# 111 #-------------#
115 # GLOBAL VARS # 112 # GLOBAL VARS #
116 #-------------# 113 #-------------#
117 EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" 114 EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg"
118 if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] 115 if [ -e $EXPLOITCONFPASS_CONFIG_FILE ]
119 then 116 then
120 TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") 117 TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=")
121 LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") 118 LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=")
122 LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") 119 LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=")
123 LST="" 120 LST=""
124 PLP_DIR_P1="" 121 PLP_DIR_P1=""
125 HMM="" 122 HMM=""
126 else 123 else
127 print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" 124 print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file"
128 #exit 1 125 #exit 1
129 TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ 126 TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/
130 LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext 127 LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext
131 LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin 128 LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin
132 fi 129 fi
133 SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" 130 SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg"
134 if [ -e $SECONDPASS_CONFIG_FILE ] 131 if [ -e $SECONDPASS_CONFIG_FILE ]
135 then 132 then
136 LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") 133 LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=")
137 HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") 134 HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=")
138 PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") 135 PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=")
139 else 136 else
140 print_error "can't find $SECONDPASS_CONFIG_FILE file" 137 print_error "can't find $SECONDPASS_CONFIG_FILE file"
141 #exit 1 138 #exit 1
142 LST=$PASS_DIR/lists 139 LST=$PASS_DIR/lists
143 HMM=$PASS_DIR/hmm 140 HMM=$PASS_DIR/hmm
144 PLP_DIR_P1=$PASS_DIR/PLP 141 PLP_DIR_P1=$PASS_DIR/PLP
145 fi 142 fi
146 143
147 BASENAME=$(basename $PASS_DIR) 144 BASENAME=$(basename $PASS_DIR)
148 OUTPUT_DIR_BASENAME=$PASS_DIR 145 OUTPUT_DIR_BASENAME=$PASS_DIR
149 RES_DIR="$PASS_DIR/res_p3" 146 RES_DIR="$PASS_DIR/res_p3"
150 LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" 147 LOGFILE=$(dirname $PASS_DIR)"/info_p3.log"
151 ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" 148 ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log"
152 149
153 #------------------# 150 #------------------#
154 # Create WORKSPACE # 151 # Create WORKSPACE #
155 #------------------# 152 #------------------#
156 153
157 # Lock directory 154 # Lock directory
158 if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi 155 if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi
159 rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 156 rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1
160 touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 157 touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1
161 158
162 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi 159 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi
163 mkdir -p $RES_DIR 160 mkdir -p $RES_DIR
164 print_info "Make directory $RES_DIR" 1 161 print_info "Make directory $RES_DIR" 1
165 162
166 #--------------------# 163 #--------------------#
167 # Save configuration # 164 # Save configuration #
168 #--------------------# 165 #--------------------#
169 cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg 166 cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg
170 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg 167 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg
171 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 168 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1
172 169
173 #--------------------------------------------------# 170 #--------------------------------------------------#
174 # Third Pass using trigger file (DECODING) # 171 # Third Pass using trigger file (DECODING) #
175 #--------------------------------------------------# 172 #--------------------------------------------------#
176 print_info "Launch Third Pass" 1 173 print_info "Launch Third Pass" 1
177 174
178 ## Generate speeral config file adding trigger rep 175 ## Generate speeral config file adding trigger rep
179 cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ 176 cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \
180 | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ 177 | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \
181 | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml 178 | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml
182 SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml 179 SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml
183 180
184 # for all speaker 181 # for all speaker
185 for lspeaker in $(ls $LST/*.lst) 182 for lspeaker in $(ls $LST/*.lst)
186 do 183 do
187 speaker=$(basename $lspeaker ".lst") 184 speaker=$(basename $lspeaker ".lst")
188 # for all AM 185 # for all AM
189 for (( i=0; $i<${#MTAG[@]} ; i++ )) 186 for (( i=0; $i<${#MTAG[@]} ; i++ ))
190 do 187 do
191 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then 188 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then
192 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") 189 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst")
193 ## if is the good AM for the speaker 190 ## if is the good AM for the speaker
194 if [ -n "$type" ] 191 if [ -n "$type" ]
195 then 192 then
196 ## Speeral decoding 193 ## Speeral decoding
197 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] 194 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ]
198 then 195 then
199 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 196 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3
200 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT 197 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock
201 else 198 else
202 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 199 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2
203 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT 200 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock
204 fi 201 fi
205 202
206 if [ $CHECK -eq 1 ] 203 if [ $CHECK -eq 1 ]
207 then 204 then
208 check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" 205 check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR"
209 if [ $? -eq 1 ] 206 if [ $? -eq 1 ]
210 then 207 then
211 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE 208 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE
212 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 209 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
213 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE 210 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE
214 echo -e "] " >> $ERRORFILE 211 echo -e "] " >> $ERRORFILE
215 rm ${OUTPUT_DIR_BASENAME}/.tmp 212 rm ${OUTPUT_DIR_BASENAME}/.tmp
216 #exit 1 213 #exit 1
217 fi 214 fi
218 fi 215 fi
219 break 216 break
220 fi 217 fi
221 fi 218 fi
222 done 219 done
223 done 220 done
224 221
225 ## Check missing seg and log it 222 ## Check missing seg and log it
226 ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 223 ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
227 echo -e "$BASENAME P3 END\n[" >> $LOGFILE 224 echo -e "$BASENAME P3 END\n[" >> $LOGFILE
228 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE 225 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE
229 echo -e "] $BASENAME" >> $LOGFILE 226 echo -e "] $BASENAME" >> $LOGFILE
230 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 227 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1
231 228
232 #---------------# 229 #---------------#
233 # Convert res # 230 # Convert res #
234 #---------------# 231 #---------------#
235 232
236 # .res => .ctm 233 # .res => .ctm
237 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm $REDIRECTION_OUTPUT 234 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm
238 # .res => .trs 235 # .res => .trs
239 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 236 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
240 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT 237 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
241 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 238 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1
242 # .res => .txt 239 # .res => .txt
243 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt $REDIRECTION_OUTPUT 240 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt
244 241
245 242
246 print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 243 print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1
247 244
248 # unlock directory 245 # unlock directory
249 mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" 246 mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock"
250 247
251 248