Blame view
main_tools/SecondPass.sh
13.3 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 |
#!/bin/bash ##################################################### # File : SecondPass.sh # # Brief : Speaker adaptation + ASR second pass # # Author : Jean-François Rey # # (base on Emmanuel Ferreira # # and Hugo Mauchrétien works) # # Version : 1.1 # # Date : 18/06/13 # ##################################################### |
f37e72eaf up |
12 |
echo "### SecondPass.sh ###" |
e6be5137b reinitialized pro... |
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# Check OTMEDIA_HOME env var if [ -z ${OTMEDIA_HOME} ] then OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) export OTMEDIA_HOME=$OTMEDIA_HOME fi # where is SecondPass.sh MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) # Scripts Path SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts # Include scripts . $SCRIPT_PATH"/Tools.sh" . $SCRIPT_PATH"/CheckSecondPass.sh" # where is SecondPass.cfg SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" if [ -e $SECONDPASS_CONFIG_FILE ] then . $SECONDPASS_CONFIG_FILE else |
d7e9e4b9d update bugfix stderr |
36 37 |
echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" >&2 echo "exit" >&2 |
e6be5137b reinitialized pro... |
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
exit 1 fi #---------------# # Parse Options # #---------------# while getopts ":hDv:crf:" opt do case $opt in h) echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY> " echo -e "\t Options:" echo -e "\t\t-h :\tprint this message" echo -e "\t\t-D :\tDEBUG mode on" echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" echo -e "\t\t-c :\t Check process, stop if error detected" echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" exit 1 ;; D) DEBUG=1 ;; v) VERBOSE=$OPTARG ;; c) CHECK=1 ;; f) FORKS="--forks $OPTARG" ;; r) RERUN=1 ;; :) |
d7e9e4b9d update bugfix stderr |
75 |
echo "Option -$OPTARG requires an argument." >&2 |
e6be5137b reinitialized pro... |
76 77 78 |
exit 1 ;; \?) |
d7e9e4b9d update bugfix stderr |
79 |
echo "BAD USAGE : unknow opton -$OPTARG" >&2 |
e6be5137b reinitialized pro... |
80 81 82 83 84 85 86 87 88 89 90 91 92 |
exit 1 ;; esac done # mode debug enable if [ $DEBUG -eq 1 ] then set -x echo -e "## Mode DEBUG ON ##" fi # mode verbose enable |
1fd315c89 add Extract audio... |
93 |
if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
e6be5137b reinitialized pro... |
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# Check USAGE by arguments number if [ $(($#-($OPTIND-1))) -ne 1 ] then echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" echo "$0 -h for more info" exit 1 fi shift $((OPTIND-1)) # check FirstPass directory - First argument if [ -e $1 ] && [ -d $1 ] then FIRSTPASS_DIR=$(readlink -e $1) else |
b427f103e update log info p... |
109 |
print_error "Can't find $1 directory" |
e6be5137b reinitialized pro... |
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
exit 1 fi #-------------# # GLOBAL VARS # #-------------# FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" if [ -e $FIRSTPASS_CONFIG_FILE ] then WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") |
c8737cbd9 bugfix info on re... |
125 |
PLP_LIST_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_LIST=" | cut -f2 -d"=") |
e6be5137b reinitialized pro... |
126 127 128 129 |
SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") else |
b427f103e update log info p... |
130 |
print_error "Can't find $FIRSTPASS_CONFIG_FILE file" |
9456401f8 modify info and l... |
131 |
print_error "exit" |
e6be5137b reinitialized pro... |
132 133 134 135 136 |
exit 1 fi LST=$OUTPUT_DIR_BASENAME"/lists" HMM=$OUTPUT_DIR_BASENAME"/hmm/" RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" |
9456401f8 modify info and l... |
137 138 |
LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log" ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log" |
e6be5137b reinitialized pro... |
139 |
|
b427f103e update log info p... |
140 |
print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
e6be5137b reinitialized pro... |
141 142 143 144 145 |
#------------------# # Create WORKSPACE # #------------------# # Lock directory |
9456401f8 modify info and l... |
146 |
if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi |
e6be5137b reinitialized pro... |
147 148 149 150 151 |
rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 rm -r $LST > /dev/null 2>&1 mkdir -p $LST |
9456401f8 modify info and l... |
152 |
print_info "[${BASENAME}] Make directory $LST" 2 |
e6be5137b reinitialized pro... |
153 154 |
if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi mkdir -p $HMM |
9456401f8 modify info and l... |
155 |
print_info "[${BASENAME}] Make directory $HMM" 2 |
e6be5137b reinitialized pro... |
156 |
if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
1fd315c89 add Extract audio... |
157 |
mkdir -p $RES_DIR > /dev/null 2>&1 |
9456401f8 modify info and l... |
158 159 |
print_info "[${BASENAME}] Make directory $RES_DIR" 2 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
e6be5137b reinitialized pro... |
160 161 162 163 |
#-------------------# # Check Pass # #-------------------# |
9456401f8 modify info and l... |
164 |
print_info "[${BASENAME}] Check Pass 2 directory" 1 |
e6be5137b reinitialized pro... |
165 166 167 168 169 170 |
for treil in $(ls $RES_DIR/ | grep treil) do if [ ! -s $RES_DIR/$treil ] then bn = $(basename $treil ".treil") rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null |
9456401f8 modify info and l... |
171 |
print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2 |
e6be5137b reinitialized pro... |
172 173 174 175 |
fi done # Check if more then 89% of treil are done |
135404bcf Modify verbose an... |
176 177 |
nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l) nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l) |
e6be5137b reinitialized pro... |
178 179 180 |
if [ $nbres_p1 -gt 0 ] then pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) |
b9a54507e update bugfix + r... |
181 |
if [ $pourcentage -gt 89 ] && [ ${RERUN} -eq 0 ] |
e6be5137b reinitialized pro... |
182 |
then |
9456401f8 modify info and l... |
183 |
print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1 |
e6be5137b reinitialized pro... |
184 185 |
exit 0 fi |
9456401f8 modify info and l... |
186 187 188 189 |
else print_error "[${BASENAME}] No First Pass, No .res -> exit P2" if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi exit 1 |
e6be5137b reinitialized pro... |
190 191 192 193 194 195 196 197 198 199 |
fi #--------------------# # Save configuration # #--------------------# cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
8425b8549 bug in configurat... |
200 |
echo "PLP_FILE_P1=$PLP_FILE_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
c8737cbd9 bugfix info on re... |
201 |
echo "PLP_LIST_P1=$PLP_LIST_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
e6be5137b reinitialized pro... |
202 203 204 205 206 |
echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
9456401f8 modify info and l... |
207 |
print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
e6be5137b reinitialized pro... |
208 209 210 211 212 |
#--------------------------------------------------# # Speaker Adaptation (AM) + Second pass (DECODING) # #--------------------------------------------------# |
9456401f8 modify info and l... |
213 |
print_info "[${BASENAME}] Launch Second Pass" 1 |
e6be5137b reinitialized pro... |
214 215 216 217 218 219 |
# for all speaker for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) do ## get seg file from P1 containing the speaker find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst |
9456401f8 modify info and l... |
220 |
print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3 |
e6be5137b reinitialized pro... |
221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi # for all AM for (( i=0; $i<${#MTAG[@]} ; i++ )) do if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") ## if is the good AM for the speaker if [ -n "$type" ] then ## HMM adaptation if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) then |
9456401f8 modify info and l... |
235 |
print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 |
d7e9e4b9d update bugfix stderr |
236 |
$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ 2> /dev/null |
e6be5137b reinitialized pro... |
237 238 239 240 |
fi if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) then |
9456401f8 modify info and l... |
241 242 243 |
print_warn "[${BASENAME}] No hmm files created for $speaker" 2 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker" print_error "[${BASENAME}] Check $ERRORFILE" |
e6be5137b reinitialized pro... |
244 245 246 247 248 249 250 251 252 |
#exit 1 fi ## cp map files cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map ## class clustering if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) then |
9456401f8 modify info and l... |
253 |
print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 |
d7e9e4b9d update bugfix stderr |
254 |
$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls 2> /dev/null |
e6be5137b reinitialized pro... |
255 256 257 |
fi if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) then |
9456401f8 modify info and l... |
258 259 260 |
print_warn "[${BASENAME}] No cls file created for $speaker" 2 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers" print_error "[${BASENAME}] Check $ERRORFILE" |
e6be5137b reinitialized pro... |
261 262 263 264 265 266 |
#exit 1 fi ## Speeral decoding if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] then |
561670acc remove output red... |
267 268 |
print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock |
e6be5137b reinitialized pro... |
269 |
else |
9456401f8 modify info and l... |
270 |
print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
561670acc remove output red... |
271 272 |
print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
e6be5137b reinitialized pro... |
273 274 275 276 277 278 279 |
fi if [ $CHECK -eq 1 ] then check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" if [ $? -eq 1 ] then |
9456401f8 modify info and l... |
280 281 |
print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst" |
c8737cbd9 bugfix info on re... |
282 |
ls $RES_DIR/*.seg | grep -e "${speaker}" | sed -e "s|${RES_DIR}\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
9456401f8 modify info and l... |
283 284 |
print_log_file $LOGFILE "Segs (and treil) not done : [" |
c8737cbd9 bugfix info on re... |
285 |
diff ${LST}/${speaker}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE |
9456401f8 modify info and l... |
286 |
print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" |
e6be5137b reinitialized pro... |
287 288 289 290 291 292 293 294 295 296 297 |
rm ${OUTPUT_DIR_BASENAME}/.tmp #exit 1 fi fi break fi fi done #rm "$HMM/$speaker.*" > /dev/null 2>&1 #rm "$LST/$speaker.lst" > /dev/null 2>&1 done |
e6be5137b reinitialized pro... |
298 |
## Check missing seg and log it |
9456401f8 modify info and l... |
299 300 301 |
if [ $CHECK -eq 1 ] then ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
c8737cbd9 bugfix info on re... |
302 |
todo=$(cat ${PLP_LIST_P1} | wc -l) |
9456401f8 modify info and l... |
303 304 305 306 307 |
if [ $todo -eq 0 ]; then todo=1;fi notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) pourcentage=$((($notdone*100)/$todo)) if [ $notdone -ne 0 ] then |
b427f103e update log info p... |
308 |
print_error "[${BASENAME}] Check $ERRORFILE" |
9456401f8 modify info and l... |
309 |
print_log_file "$ERRORFILE" "ERROR : Treil not done [" |
c8737cbd9 bugfix info on re... |
310 |
diff ${PLP_LIST_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> ${ERRORFILE} |
9456401f8 modify info and l... |
311 312 |
print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" else |
b427f103e update log info p... |
313 |
print_log_file "$LOGFILE" "P2 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" |
9456401f8 modify info and l... |
314 315 316 |
fi rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 fi |
e6be5137b reinitialized pro... |
317 318 319 320 |
#---------------# # Convert res # #---------------# |
9456401f8 modify info and l... |
321 |
print_info "[${BASENAME}] Convert .res into .ctm" 1 |
e6be5137b reinitialized pro... |
322 |
# .res => .ctm |
561670acc remove output red... |
323 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm |
9456401f8 modify info and l... |
324 |
print_info "[${BASENAME}] Convert .res into .trs" 1 |
e6be5137b reinitialized pro... |
325 326 327 328 329 |
# .res => .trs echo -e "name $AUTHOR fileName ${BASENAME} fileExt wav segFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
561670acc remove output red... |
330 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
1fd315c89 add Extract audio... |
331 |
rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
9456401f8 modify info and l... |
332 |
print_info "[${BASENAME}] Convert .res into .txt" 1 |
e6be5137b reinitialized pro... |
333 |
# .res => .txt |
561670acc remove output red... |
334 |
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt |
e6be5137b reinitialized pro... |
335 |
|
9456401f8 modify info and l... |
336 |
print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
e6be5137b reinitialized pro... |
337 338 |
# unlock directory mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" |