Blame view
egs/tedlium/s5/local/online/run_nnet2_ms_perturbed.sh
8.6 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
#!/bin/bash # Copyright 2013 Johns Hopkins University (author: Daniel Povey) # 2014 Tom Ko # 2014 Vijay Peddinti # Apache 2.0 # This example script demonstrates how speed perturbation of the data helps the nnet training in the SWB setup. . ./cmd.sh set -e stage=0 train_stage=-10 use_gpu=true splice_indexes="layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" common_egs_dir= dir=exp/nnet2_online/nnet_ms_sp has_fisher=true . ./path.sh . ./utils/parse_options.sh if $use_gpu; then if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 # the _a is in case I want to change the parameters. else # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 parallel_opts="--num-threads $num_threads" fi # Run the common stages of training, including training the iVector extractor local/online/run_nnet2_common.sh --stage $stage || exit 1; if [ $stage -le 6 ]; then #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed utils/perturb_data_dir_speed.sh 0.9 data/train data/temp1 utils/perturb_data_dir_speed.sh 1.0 data/train data/temp2 utils/perturb_data_dir_speed.sh 1.1 data/train data/temp3 utils/combine_data.sh --extra-files utt2uniq data/train_sp data/temp1 data/temp2 data/temp3 rm -r data/temp1 data/temp2 data/temp3 mfccdir=mfcc_perturbed for x in train_sp; do steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_sp fi if [ $stage -le 7 ]; then #obtain the alignment of the perturbed data steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ data/train_sp data/lang exp/tri3 exp/tri3_ali_sp || exit 1 fi if [ $stage -le 8 ]; then #Now perturb the high resolution daa utils/perturb_data_dir_speed.sh 0.9 data/train_hires data/temp1 utils/perturb_data_dir_speed.sh 1.0 data/train_hires data/temp2 utils/perturb_data_dir_speed.sh 1.1 data/train_hires data/temp3 utils/combine_data.sh --extra-files utt2uniq data/train_hires_sp data/temp1 data/temp2 data/temp3 rm -r data/temp1 data/temp2 data/temp3 mfccdir=mfcc_perturbed for x in train_hires_sp; do steps/make_mfcc.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ data/$x exp/make_hires/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_hires/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_hires_sp fi if [ $stage -le 9 ]; then # We extract iVectors on all the train data, which will be what we # train the system on. # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_sp data/train_hires_sp_max2 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/train_hires_sp_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors_train_hires_sp2 || exit 1; fi if [ $stage -le 10 ]; then steps/nnet2/train_multisplice_accel2.sh --stage $train_stage \ --num-epochs 3 --num-jobs-initial 2 --num-jobs-final 12 \ --num-hidden-layers 6 --splice-indexes "$splice_indexes" \ --feat-type raw \ --online-ivector-dir exp/nnet2_online/ivectors_train_hires_sp2 \ --cmvn-opts "--norm-means=false --norm-vars=false" \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ --io-opts "--max-jobs-run 12" \ --add-layers-period 1 \ --mix-up 6000 \ --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \ --cmd "$decode_cmd" \ --egs-dir "$common_egs_dir" \ --pnorm-input-dim 3500 \ --pnorm-output-dim 350 \ data/train_hires_sp data/lang exp/tri3_ali_sp $dir || exit 1; fi if [ $stage -le 11 ]; then # dump iVectors for the testing data. for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $num_jobs \ data/${decode_set}_hires exp/nnet2_online/extractor exp/nnet2_online/ivectors_${decode_set}_hires || exit 1; done fi if [ $stage -le 12 ]; then # this does offline decoding that should give about the same results as the # real online decoding (the one with --per-utt true) for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` decode_dir=$dir/decode_${decode_set} steps/nnet2/decode.sh --nj $num_jobs --cmd "$decode_cmd" --config conf/decode.config \ --online-ivector-dir exp/nnet2_online/ivectors_${decode_set}_hires \ exp/tri3/graph data/${decode_set}_hires $decode_dir || exit 1; steps/lmrescore_const_arpa.sh data/lang_test data/lang_rescore data/${decode_set}_hires $decode_dir $decode_dir.rescore || exit 1 done fi if [ $stage -le 13 ]; then # If this setup used PLP features, we'd have to give the option --feature-type plp # to the script below. steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \ data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1; fi wait; if [ $stage -le 14 ]; then # do the actual online decoding with iVectors, carrying info forward from # previous utterances of the same speaker. for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` decode_dir=${dir}_online/decode_${decode_set} steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $num_jobs \ exp/tri3/graph data/${decode_set}_hires $decode_dir || exit 1; steps/lmrescore_const_arpa.sh data/lang_test data/lang_rescore data/${decode_set}_hires $decode_dir $decode_dir.rescore || exit 1 done fi if [ $stage -le 15 ]; then # this version of the decoding treats each utterance separately # without carrying forward speaker information. for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` decode_dir=${dir}_online/decode_${decode_set}_utt steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $num_jobs \ --per-utt true exp/tri3/graph data/${decode_set}_hires $decode_dir || exit 1; steps/lmrescore_const_arpa.sh data/lang_test data/lang_rescore data/${decode_set}_hires $decode_dir $decode_dir.rescore || exit 1 done fi if [ $stage -le 16 ]; then # this version of the decoding treats each utterance separately # without carrying forward speaker information, but looks to the end # of the utterance while computing the iVector (--online false) for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` decode_dir=${dir}_online/decode_${decode_set}_utt_offline steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $num_jobs \ --per-utt true --online false exp/tri3/graph data/${decode_set}_hires \ $decode_dir || exit 1; steps/lmrescore_const_arpa.sh data/lang_test data/lang_rescore data/${decode_set}_hires $decode_dir $decode_dir.rescore || exit 1 done fi wait; if [ $stage -le 17 ]; then # prepare the build for distribution cat <<EOF >${dir}_online/sample_decode.sh . ./cmd.sh data_dir=\$1 # e.g. data/dev_hires (to be prepared by the user, see egs/tedlium/run.sh for examples) model_dir=\$2 # e.g. exp/nnet2_online/nnet_ms_sp_online (provided in the distribution) decode_dir=\$model_dir/\`basename \$data_dir\` num_jobs=\`cat \$data_dir/spk2utt | wc -l\` # note that the graph directory (exp/tri3/graph) is not provided in the distribution steps/online/nnet2/decode.sh --cmd "\$decode_cmd" --nj \$num_jobs \ exp/tri3/graph \$data_dir \$decode_dir ; EOF chmod +x ${dir}_online/sample_decode.sh dist_file=tedlium_`basename $dir`.tgz utils/prepare_online_nnet_dist_build.sh --other-files ${dir}_online/sample_decode.sh data/lang ${dir}_online $dist_file echo "NOTE: If you would like to upload this build ($dist_file) to kaldi-asr.org please check the process at http://kaldi-asr.org/uploads.html" fi exit 0; |