Blame view
Scripts/01_init_TEST_data_features_LIA.sh
1.95 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
#!/bin/bash #==============# #for test data # # bash 01_init_TEST_data_features_LIA.sh /local2/PERCOL/bigot/KALDI/EXP_GRNBL/ /local2/PERCOL/bigot/KALDI/MICRO_TEST/TRS/ /local2/PERCOL/bigot/KALDI/MICRO_TEST/WAV/ FORK=8 EXPE_DIR=$1 TRS_DIR=$2 WAV_DIR=$3 . ../LIA_kaldiUtils/path.sh . ../LIA_kaldiUtils/cmd.sh LM_DIR=$EXPE_DIR/TEST/LANGUAGE_MODEL AC_DIR=$EXPE_DIR/TEST/ACOUSTIC_MODEL AC_DATA=$EXPE_DIR/TEST/ac_Data/ LM_DATA=$EXPE_DIR/TEST/ling_Data/ MFCC_DIR=$EXPE_DIR/TEST/MFCC MFCC_DIR_LOG=$EXPE_DIR/TEST/MFCC/log/ mkdir -p $LM_DATA $AC_DATA $MFCC_DIR $MFCC_DIR_LOG TEXT=$LM_DATA/text rm $TEXT # preparing speech turns file data and features for file in $(find $TRS_DIR -name "*.trs") do #trs2stm.pl $file -rmt "noise,hes,pi,pibe,pers" -e "pronounce" -k | \ trs2stm.pl $file -rmt "noise,hes,pi,pibe,pers" -e "pronounce" -l -k | \ reacc_win2bdlex | \ stm_2_kaldi_txt.pl | \ lia_map_words.pl >> $TEXT done sort $TEXT -o $TEXT cp $TEXT $AC_DATA/text awk '{ segment=$1; split(segment,S,"[#]"); spk_id=S[2]; audioname=S[1]; startf=S[3]; endf=S[4]; print segment " " audioname " " startf/100 " " endf/100}' < $TEXT > $AC_DATA/segments # __ preparing audio file list ____ # for file in $(cut -d" " -f2 $AC_DATA/segments | sort -u ); do echo $file $(find $WAV_DIR -name "*$file*" | sort -u) done | sort -u > $AC_DATA/wav.scp cat $AC_DATA/segments | awk '{ split($0, a, " ") ; split(a[1], b, "#"); print a[1] " " b[2] }' > $AC_DATA/utt2spk || exit 1; cat $AC_DATA/utt2spk | sort -k 2 | utt2spk_to_spk2utt.pl > $AC_DATA/spk2utt || exit 1; # --- feature extraction ---- # echo "====> make_mfcc.sh --nj $FORK --mfcc-config $CONF_DIR/mfcc.conf --cmd "$train_cmd" $AC_DATA $MFCC_DIR_LOG $MFCC_DIR" make_mfcc.sh --nj $FORK --mfcc-config $CONF_DIR/mfcc.conf --cmd "$train_cmd" $AC_DATA $MFCC_DIR_LOG $MFCC_DIR echo "======> compute_cmvn_stats.sh $AC_DATA $MFCC_DIR_LOG $MFCC_DIR" compute_cmvn_stats.sh $AC_DATA $MFCC_DIR_LOG $MFCC_DIR |