Commit 0bf609bcceb3af008651888fa40b72c381245e37
1 parent
87013ba29c
Exists in
master
update and add script to extract TV corpus
Showing 6 changed files with 24 additions and 17 deletions Side-by-side Diff
README
main_tools/ConfPass.sh
... | ... | @@ -204,9 +204,11 @@ |
204 | 204 | # create USF configuration file |
205 | 205 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg |
206 | 206 | # create USF file |
207 | -$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | |
207 | +$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | |
208 | 208 | rm $OUTPUT_DIR/$BASENAME.usf_cfg |
209 | +cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE | |
209 | 210 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf |
211 | +rm $USF_FILE.tmp | |
210 | 212 | |
211 | 213 | #----------------# |
212 | 214 | # Check USF file # |
main_tools/FirstPass.sh
... | ... | @@ -85,10 +85,13 @@ |
85 | 85 | then |
86 | 86 | set -x |
87 | 87 | echo -e "## Mode DEBUG ON ##" |
88 | + REDIRECTION_OUTPUT="" | |
89 | +else | |
90 | + REDIRECTION_OUTPUT=" > /dev/null 2>&1" | |
88 | 91 | fi |
89 | 92 | |
90 | 93 | # mode verbose enable |
91 | -if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | |
94 | +if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; REDIRECTION_OUTPUT=" 2> /dev/null"; fi | |
92 | 95 | |
93 | 96 | # Check USAGE by arguments number |
94 | 97 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
... | ... | @@ -160,7 +163,7 @@ |
160 | 163 | else |
161 | 164 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
162 | 165 | fi |
163 | -mkdir -p $RES_DIR | |
166 | +mkdir -p $RES_DIR $REDIRECTION_OUTPUT | |
164 | 167 | print_info "Make directory $RES_DIR" 1 |
165 | 168 | |
166 | 169 | #--------------------# |
... | ... | @@ -194,7 +197,7 @@ |
194 | 197 | then |
195 | 198 | print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" |
196 | 199 | print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" |
197 | - avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav | |
200 | + avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT | |
198 | 201 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav |
199 | 202 | FILENAME=$BASENAME.wav |
200 | 203 | print_message $INFO 1 "new wav file : $WAV_FILE" |
... | ... | @@ -217,7 +220,7 @@ |
217 | 220 | print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms |
218 | 221 | " 2 |
219 | 222 | |
220 | -$BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms | |
223 | +$BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT | |
221 | 224 | |
222 | 225 | if [ $CHECK -eq 1 ] |
223 | 226 | then |
... | ... | @@ -238,7 +241,7 @@ |
238 | 241 | # Calcul seg file |
239 | 242 | print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2 |
240 | 243 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME |
241 | -java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering | |
244 | +java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering | |
242 | 245 | |
243 | 246 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) |
244 | 247 | then |
... | ... | @@ -265,7 +268,7 @@ |
265 | 268 | print_info "Cut PLP file depending to LBL segmentations" 1 |
266 | 269 | print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2 |
267 | 270 | |
268 | -$SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG | |
271 | +$SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT | |
269 | 272 | |
270 | 273 | if [ $CHECK -eq 1 ] |
271 | 274 | then |
272 | 275 | |
... | ... | @@ -316,9 +319,9 @@ |
316 | 319 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
317 | 320 | while [ $redo -gt 0 ]; do |
318 | 321 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
319 | - print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 2 | |
322 | + print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2 | |
320 | 323 | # Run speeral |
321 | - $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock | |
324 | + $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | |
322 | 325 | |
323 | 326 | # Check if error |
324 | 327 | if [ $CHECK -eq 1 ] |
325 | 328 | |
326 | 329 | |
... | ... | @@ -374,13 +377,13 @@ |
374 | 377 | #---------------# |
375 | 378 | |
376 | 379 | # .res => .ctm |
377 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm | |
380 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT | |
378 | 381 | # .res => .trs |
379 | 382 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
380 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | |
381 | -rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | |
383 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT | |
384 | +rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null | |
382 | 385 | # .res => .txt |
383 | -$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt | |
386 | +$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT | |
384 | 387 | |
385 | 388 | # unlock directory |
386 | 389 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" |
main_tools/ScoringRes.sh
... | ... | @@ -138,8 +138,8 @@ |
138 | 138 | fi |
139 | 139 | done < $SRT_FILE > $SCORING_DIR/$BASENAME.tmp.txt |
140 | 140 | |
141 | -#cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
142 | -cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
141 | +cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
142 | +#cat $SCORING_DIR/$BASENAME.tmp.txt | sed -e "s|\n| |g" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | sed -e "s|<s>||g" | sed -e "s|</s>||g" | sed -e "s|\n+| |g" > $SCORING_DIR/$BASENAME.tmp2.txt | |
143 | 143 | |
144 | 144 | $SCRIPT_PATH/srt2stm.pl $SCORING_DIR/$BASENAME.tmp2.txt > "$SCORING_DIR/$BASENAME.stm" |
145 | 145 | rm $SCORING_DIR/$BASENAME.tmp.txt $SCORING_DIR/$BASENAME.tmp2.txt |
main_tools/ThirdPass.sh