Commit 0bf609bcceb3af008651888fa40b72c381245e37
1 parent
87013ba29c
Exists in
master
update and add script to extract TV corpus
Showing 6 changed files with 24 additions and 17 deletions Side-by-side Diff
README
main_tools/ConfPass.sh
| ... | ... | @@ -204,9 +204,11 @@ |
| 204 | 204 | # create USF configuration file |
| 205 | 205 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg |
| 206 | 206 | # create USF file |
| 207 | -$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | |
| 207 | +$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | |
| 208 | 208 | rm $OUTPUT_DIR/$BASENAME.usf_cfg |
| 209 | +cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE | |
| 209 | 210 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf |
| 211 | +rm $USF_FILE.tmp | |
| 210 | 212 | |
| 211 | 213 | #----------------# |
| 212 | 214 | # Check USF file # |
main_tools/FirstPass.sh
| ... | ... | @@ -85,10 +85,13 @@ |
| 85 | 85 | then |
| 86 | 86 | set -x |
| 87 | 87 | echo -e "## Mode DEBUG ON ##" |
| 88 | + REDIRECTION_OUTPUT="" | |
| 89 | +else | |
| 90 | + REDIRECTION_OUTPUT=" > /dev/null 2>&1" | |
| 88 | 91 | fi |
| 89 | 92 | |
| 90 | 93 | # mode verbose enable |
| 91 | -if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | |
| 94 | +if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; REDIRECTION_OUTPUT=" 2> /dev/null"; fi | |
| 92 | 95 | |
| 93 | 96 | # Check USAGE by arguments number |
| 94 | 97 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
| ... | ... | @@ -160,7 +163,7 @@ |
| 160 | 163 | else |
| 161 | 164 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
| 162 | 165 | fi |
| 163 | -mkdir -p $RES_DIR | |
| 166 | +mkdir -p $RES_DIR $REDIRECTION_OUTPUT | |
| 164 | 167 | print_info "Make directory $RES_DIR" 1 |
| 165 | 168 | |
| 166 | 169 | #--------------------# |
| ... | ... | @@ -194,7 +197,7 @@ |
| 194 | 197 | then |
| 195 | 198 | print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" |
| 196 | 199 | print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" |
| 197 | - avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav | |
| 200 | + avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT | |
| 198 | 201 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav |
| 199 | 202 | FILENAME=$BASENAME.wav |
| 200 | 203 | print_message $INFO 1 "new wav file : $WAV_FILE" |
| ... | ... | @@ -217,7 +220,7 @@ |
| 217 | 220 | print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms |
| 218 | 221 | " 2 |
| 219 | 222 | |
| 220 | -$BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms | |
| 223 | +$BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT | |
| 221 | 224 | |
| 222 | 225 | if [ $CHECK -eq 1 ] |
| 223 | 226 | then |
| ... | ... | @@ -238,7 +241,7 @@ |
| 238 | 241 | # Calcul seg file |
| 239 | 242 | print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2 |
| 240 | 243 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME |
| 241 | -java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering | |
| 244 | +java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering | |
| 242 | 245 | |
| 243 | 246 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) |
| 244 | 247 | then |
| ... | ... | @@ -265,7 +268,7 @@ |
| 265 | 268 | print_info "Cut PLP file depending to LBL segmentations" 1 |
| 266 | 269 | print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2 |
| 267 | 270 | |
| 268 | -$SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG | |
| 271 | +$SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT | |
| 269 | 272 | |
| 270 | 273 | if [ $CHECK -eq 1 ] |
| 271 | 274 | then |
| 272 | 275 | |
| ... | ... | @@ -316,9 +319,9 @@ |
| 316 | 319 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
| 317 | 320 | while [ $redo -gt 0 ]; do |
| 318 | 321 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
| 319 | - print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 2 | |
| 322 | + print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2 | |
| 320 | 323 | # Run speeral |
| 321 | - $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock | |
| 324 | + $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | |
| 322 | 325 | |
| 323 | 326 | # Check if error |
| 324 | 327 | if [ $CHECK -eq 1 ] |
| 325 | 328 | |
| 326 | 329 | |
| ... | ... | @@ -374,13 +377,13 @@ |
| 374 | 377 | #---------------# |
| 375 | 378 | |
| 376 | 379 | # .res => .ctm |
| 377 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm | |
| 380 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT | |
| 378 | 381 | # .res => .trs |
| 379 | 382 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
| 380 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | |
| 381 | -rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | |
| 383 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT | |
| 384 | +rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null | |
| 382 | 385 | # .res => .txt |
| 383 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt | |
| 386 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT | |
| 384 | 387 | |
| 385 | 388 | # unlock directory |
| 386 | 389 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" |
main_tools/ScoringRes.sh
| ... | ... | @@ -138,8 +138,8 @@ |
| 138 | 138 | fi |
| 139 | 139 | done < $SRT_FILE > $SCORING_DIR/$BASENAME.tmp.txt |
| 140 | 140 | |
| 141 | -#cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
| 142 | -cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
| 141 | +cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
| 142 | +#cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
| 143 | 143 | |
| 144 | 144 | $SCRIPT_PATH/srt2stm.pl $SCORING_DIR/$BASENAME.tmp2.txt > "$SCORING_DIR/$BASENAME.stm" |
| 145 | 145 | rm $SCORING_DIR/$BASENAME.tmp.txt $SCORING_DIR/$BASENAME.tmp2.txt |
main_tools/ThirdPass.sh