Blame view
main_tools/SecondPass.sh
13 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 |
#!/bin/bash ##################################################### # File : SecondPass.sh # # Brief : Speaker adaptation + ASR second pass # # Author : Jean-François Rey # # (base on Emmanuel Ferreira # # and Hugo Mauchrétien works) # # Version : 1.1 # # Date : 18/06/13 # ##################################################### |
f37e72eaf up |
12 |
echo "### SecondPass.sh ###" |
e6be5137b reinitialized pro... |
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# Check OTMEDIA_HOME env var if [ -z ${OTMEDIA_HOME} ] then OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) export OTMEDIA_HOME=$OTMEDIA_HOME fi # where is SecondPass.sh MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) # Scripts Path SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts # Include scripts . $SCRIPT_PATH"/Tools.sh" . $SCRIPT_PATH"/CheckSecondPass.sh" # where is SecondPass.cfg SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" if [ -e $SECONDPASS_CONFIG_FILE ] then . $SECONDPASS_CONFIG_FILE else |
9456401f8 modify info and l... |
36 37 |
echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr echo "exit" > /dev/stderr |
e6be5137b reinitialized pro... |
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
exit 1 fi #---------------# # Parse Options # #---------------# while getopts ":hDv:crf:" opt do case $opt in h) echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY> " echo -e "\t Options:" echo -e "\t\t-h :\tprint this message" echo -e "\t\t-D :\tDEBUG mode on" echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" echo -e "\t\t-c :\t Check process, stop if error detected" echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" exit 1 ;; D) DEBUG=1 ;; v) VERBOSE=$OPTARG ;; c) CHECK=1 ;; f) FORKS="--forks $OPTARG" ;; r) RERUN=1 ;; :) |
9456401f8 modify info and l... |
75 |
echo "Option -$OPTARG requires an argument." > /dev/stderr |
e6be5137b reinitialized pro... |
76 77 78 |
exit 1 ;; \?) |
9456401f8 modify info and l... |
79 |
echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr |
e6be5137b reinitialized pro... |
80 81 82 83 84 85 86 87 88 89 90 91 92 |
exit 1 ;; esac done # mode debug enable if [ $DEBUG -eq 1 ] then set -x echo -e "## Mode DEBUG ON ##" fi # mode verbose enable |
1fd315c89 add Extract audio... |
93 |
if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
e6be5137b reinitialized pro... |
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# Check USAGE by arguments number if [ $(($#-($OPTIND-1))) -ne 1 ] then echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" echo "$0 -h for more info" exit 1 fi shift $((OPTIND-1)) # check FirstPass directory - First argument if [ -e $1 ] && [ -d $1 ] then FIRSTPASS_DIR=$(readlink -e $1) else |
b427f103e update log info p... |
109 |
print_error "Can't find $1 directory" |
e6be5137b reinitialized pro... |
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
exit 1 fi #-------------# # GLOBAL VARS # #-------------# FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" if [ -e $FIRSTPASS_CONFIG_FILE ] then WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") else |
b427f103e update log info p... |
129 |
print_error "Can't find $FIRSTPASS_CONFIG_FILE file" |
9456401f8 modify info and l... |
130 |
print_error "exit" |
e6be5137b reinitialized pro... |
131 132 133 134 135 |
exit 1 fi LST=$OUTPUT_DIR_BASENAME"/lists" HMM=$OUTPUT_DIR_BASENAME"/hmm/" RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" |
9456401f8 modify info and l... |
136 137 |
LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log" ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log" |
e6be5137b reinitialized pro... |
138 |
|
b427f103e update log info p... |
139 |
print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
e6be5137b reinitialized pro... |
140 141 142 143 144 |
#------------------# # Create WORKSPACE # #------------------# # Lock directory |
9456401f8 modify info and l... |
145 |
if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi |
e6be5137b reinitialized pro... |
146 147 148 149 150 |
rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 rm -r $LST > /dev/null 2>&1 mkdir -p $LST |
9456401f8 modify info and l... |
151 |
print_info "[${BASENAME}] Make directory $LST" 2 |
e6be5137b reinitialized pro... |
152 153 |
if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi mkdir -p $HMM |
9456401f8 modify info and l... |
154 |
print_info "[${BASENAME}] Make directory $HMM" 2 |
e6be5137b reinitialized pro... |
155 |
if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
1fd315c89 add Extract audio... |
156 |
mkdir -p $RES_DIR > /dev/null 2>&1 |
9456401f8 modify info and l... |
157 158 |
print_info "[${BASENAME}] Make directory $RES_DIR" 2 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
e6be5137b reinitialized pro... |
159 160 161 162 |
#-------------------# # Check Pass # #-------------------# |
9456401f8 modify info and l... |
163 |
print_info "[${BASENAME}] Check Pass 2 directory" 1 |
e6be5137b reinitialized pro... |
164 165 166 167 168 169 |
for treil in $(ls $RES_DIR/ | grep treil) do if [ ! -s $RES_DIR/$treil ] then bn = $(basename $treil ".treil") rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null |
9456401f8 modify info and l... |
170 |
print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2 |
e6be5137b reinitialized pro... |
171 172 173 174 |
fi done # Check if more then 89% of treil are done |
135404bcf Modify verbose an... |
175 176 |
nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l) nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l) |
e6be5137b reinitialized pro... |
177 178 179 180 181 |
if [ $nbres_p1 -gt 0 ] then pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) if [ $pourcentage -gt 89 ] then |
9456401f8 modify info and l... |
182 |
print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1 |
e6be5137b reinitialized pro... |
183 184 |
exit 0 fi |
9456401f8 modify info and l... |
185 186 187 188 |
else print_error "[${BASENAME}] No First Pass, No .res -> exit P2" if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi exit 1 |
e6be5137b reinitialized pro... |
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
fi #--------------------# # Save configuration # #--------------------# cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
9456401f8 modify info and l... |
204 |
print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
e6be5137b reinitialized pro... |
205 206 207 208 209 |
#--------------------------------------------------# # Speaker Adaptation (AM) + Second pass (DECODING) # #--------------------------------------------------# |
9456401f8 modify info and l... |
210 |
print_info "[${BASENAME}] Launch Second Pass" 1 |
e6be5137b reinitialized pro... |
211 212 213 214 215 216 |
# for all speaker for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) do ## get seg file from P1 containing the speaker find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst |
9456401f8 modify info and l... |
217 |
print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3 |
e6be5137b reinitialized pro... |
218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi # for all AM for (( i=0; $i<${#MTAG[@]} ; i++ )) do if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") ## if is the good AM for the speaker if [ -n "$type" ] then ## HMM adaptation if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) then |
9456401f8 modify info and l... |
232 |
print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 |
561670acc remove output red... |
233 |
$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ |
e6be5137b reinitialized pro... |
234 235 236 237 |
fi if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) then |
9456401f8 modify info and l... |
238 239 240 |
print_warn "[${BASENAME}] No hmm files created for $speaker" 2 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker" print_error "[${BASENAME}] Check $ERRORFILE" |
e6be5137b reinitialized pro... |
241 242 243 244 245 246 247 248 249 |
#exit 1 fi ## cp map files cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map ## class clustering if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) then |
9456401f8 modify info and l... |
250 |
print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 |
561670acc remove output red... |
251 |
$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls |
e6be5137b reinitialized pro... |
252 253 254 |
fi if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) then |
9456401f8 modify info and l... |
255 256 257 |
print_warn "[${BASENAME}] No cls file created for $speaker" 2 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers" print_error "[${BASENAME}] Check $ERRORFILE" |
e6be5137b reinitialized pro... |
258 259 260 261 262 263 |
#exit 1 fi ## Speeral decoding if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] then |
561670acc remove output red... |
264 265 |
print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock |
e6be5137b reinitialized pro... |
266 |
else |
9456401f8 modify info and l... |
267 |
print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
561670acc remove output red... |
268 269 |
print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
e6be5137b reinitialized pro... |
270 271 272 273 274 275 276 |
fi if [ $CHECK -eq 1 ] then check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" if [ $? -eq 1 ] then |
9456401f8 modify info and l... |
277 278 |
print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst" |
e6be5137b reinitialized pro... |
279 |
ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
9456401f8 modify info and l... |
280 281 282 283 |
print_log_file $LOGFILE "Segs (and treil) not done : [" diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $LOGFILE print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" |
e6be5137b reinitialized pro... |
284 285 286 287 288 289 290 291 292 293 294 |
rm ${OUTPUT_DIR_BASENAME}/.tmp #exit 1 fi fi break fi fi done #rm "$HMM/$speaker.*" > /dev/null 2>&1 #rm "$LST/$speaker.lst" > /dev/null 2>&1 done |
e6be5137b reinitialized pro... |
295 |
## Check missing seg and log it |
9456401f8 modify info and l... |
296 297 298 299 300 301 302 303 304 |
if [ $CHECK -eq 1 ] then ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp todo=$(cat ${PLP_FILE_P1} | wc -l) if [ $todo -eq 0 ]; then todo=1;fi notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) pourcentage=$((($notdone*100)/$todo)) if [ $notdone -ne 0 ] then |
b427f103e update log info p... |
305 |
print_error "[${BASENAME}] Check $ERRORFILE" |
9456401f8 modify info and l... |
306 307 308 309 |
print_log_file "$ERRORFILE" "ERROR : Treil not done [" diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" else |
b427f103e update log info p... |
310 |
print_log_file "$LOGFILE" "P2 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" |
9456401f8 modify info and l... |
311 312 313 |
fi rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 fi |
e6be5137b reinitialized pro... |
314 315 316 317 |
#---------------# # Convert res # #---------------# |
9456401f8 modify info and l... |
318 |
print_info "[${BASENAME}] Convert .res into .ctm" 1 |
e6be5137b reinitialized pro... |
319 |
# .res => .ctm |
561670acc remove output red... |
320 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm |
9456401f8 modify info and l... |
321 |
print_info "[${BASENAME}] Convert .res into .trs" 1 |
e6be5137b reinitialized pro... |
322 323 324 325 326 |
# .res => .trs echo -e "name $AUTHOR fileName ${BASENAME} fileExt wav segFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
561670acc remove output red... |
327 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
1fd315c89 add Extract audio... |
328 |
rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
9456401f8 modify info and l... |
329 |
print_info "[${BASENAME}] Convert .res into .txt" 1 |
e6be5137b reinitialized pro... |
330 |
# .res => .txt |
561670acc remove output red... |
331 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt |
e6be5137b reinitialized pro... |
332 |
|
9456401f8 modify info and l... |
333 |
print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
e6be5137b reinitialized pro... |
334 335 |
# unlock directory mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" |