Blame view
egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
3.44 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
#!/bin/bash # Copyright 2016 Pegah Ghahremani # This script used to generate MFCC+pitch features for input language lang. . ./cmd.sh set -e stage=1 train_stage=-10 generate_alignments=true # If true, it regenerates alignments. speed_perturb=true use_pitch=true # If true, it generates pitch features and combine it with 40dim MFCC. pitch_conf=conf/pitch.conf # Configuration used for pitch extraction. feat_suffix=_hires # feature suffix for training data [ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 . conf/common_vars.sh || exit 1; [ -f local.conf ] && . ./local.conf . ./utils/parse_options.sh lang=$1 # perturbed data preparation train_set=train if [ $# -ne 1 ]; then echo "Usage:$0 [options] <language-id>" echo "e.g. $0 102-assamese" exit 1; fi if [ "$speed_perturb" == "true" ]; then if [ $stage -le 1 ]; then #Although the nnet model will be trained by high resolution data, we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed for datadir in train; do if [ ! -d data/$lang/${datadir}_sp ]; then ./utils/data/perturb_data_dir_speed_3way.sh data/$lang/${datadir} data/$lang/${datadir}_sp # Extract Plp+pitch feature for perturbed data. featdir=plp_perturbed/$lang if $use_pitch; then steps/make_plp_pitch.sh --cmd "$train_cmd" --nj 70 data/$lang/${datadir}_sp exp/$lang/make_plp_pitch/${datadir}_sp $featdir else steps/make_plp.sh --cmd "$train_cmd" --nj 70 data/$lang/${datadir}_sp exp/$lang/make_plp/${datadir}_sp $featdir fi steps/compute_cmvn_stats.sh data/$lang/${datadir}_sp exp/$lang/make_plp/${datadir}_sp $featdir || exit 1; utils/fix_data_dir.sh data/$lang/${datadir}_sp fi done fi train_set=train_sp if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ] && [ ! -f exp/$lang/tri5_ali_sp/.done ]; then #obtain the alignment of the perturbed data steps/align_fmllr.sh \ --nj 70 --cmd "$train_cmd" \ --boost-silence $boost_sil \ data/$lang/$train_set data/$lang/lang exp/$lang/tri5 exp/$lang/tri5_ali_sp || exit 1 touch exp/$lang/tri5_ali_sp/.done fi fi hires_config="--mfcc-config conf/mfcc_hires.conf" mfccdir=mfcc_hires/$lang mfcc_affix="" if $use_pitch; then hires_config="$hires_config --online-pitch-config $pitch_conf" mfccdir=mfcc_hires_pitch/$lang mfcc_affix=_pitch_online fi if [ $stage -le 3 ] && [ ! -f data/$lang/${train_set}${feat_suffix}/.done ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then date=$(date +'%m_%d_%H_%M') utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/$lang-$date/s5c/$mfccdir/storage $mfccdir/storage fi for dataset in $train_set ; do data_dir=data/$lang/${dataset}${feat_suffix} log_dir=exp/$lang/make${feat_suffix}/$dataset utils/copy_data_dir.sh data/$lang/$dataset ${data_dir} || exit 1; # scale the waveforms, this is useful as we don't use CMVN utils/data/perturb_data_dir_volume.sh $data_dir || exit 1; steps/make_mfcc${mfcc_affix}.sh --nj 70 $hires_config \ --cmd "$train_cmd" ${data_dir} $log_dir $mfccdir; steps/compute_cmvn_stats.sh ${data_dir} $log_dir $mfccdir; # Remove the small number of utterances that couldn't be extracted for some # reason (e.g. too short; no such file). utils/fix_data_dir.sh ${data_dir}; done touch ${data_dir}/.done fi exit 0; |