Blame view
egs/gale_arabic/s5/local/online/run_nnet2.sh
7.74 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
#!/bin/bash # Copyright 2014 Vimal Manohar # This is our online neural net build for Gale system . ./cmd.sh stage=-1 train_stage=-10 use_gpu=true mfccdir=mfcc train_nj=120 decode_nj=30 . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if $use_gpu; then if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 # the _a is in case I want to change the parameters. dir=exp/nnet2_online/nnet_a_gpu else # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 parallel_opts="--num-threads $num_threads" dir=exp/nnet2_online/nnet_a fi if [ $stage -le 0 ]; then # this shows how you can split across multiple file-systems. we'll split the # MFCC dir across multiple locations. You might want to be careful here, if you # have multiple copies of Kaldi checked out and run the same recipe, not to let # them overwrite each other. if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then date=$(date +'%m_%d_%H_%M') utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/gale-$date/s5/$mfccdir/storage $mfccdir/storage || exit 1 fi utils/copy_data_dir.sh data/train data/train_hires || exit 1 steps/make_mfcc_pitch_online.sh --nj $train_nj --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir || exit 1; fi if [ $stage -le 1 ]; then # we'll use the features with just MFCC, no pitch, to train the iVector # extractor on. Check that we're using 40-dim features so the command line is correct. ! grep 'num-ceps=40' conf/mfcc_hires.conf >/dev/null && \ echo "Change the script if you change conf/mfcc_hires.conf" && exit 1; steps/select_feats.sh --nj 5 --cmd "$train_cmd" 0-39 data/train_hires \ data/train_hires_mfcconly exp/nnet2_online/select_hires_train $mfccdir || exit 1 steps/compute_cmvn_stats.sh data/train_hires_mfcconly exp/nnet2_online/select_hires_train $mfccdir || exit 1 # Make a subset of about 1/3 of the data. utils/subset_data_dir.sh data/train_hires_mfcconly 100000 \ data/train_hires_mfcconly_100k || exit 1 # make a corresponding subset of normal-dimensional-MFCC training data. utils/subset_data_dir.sh --utt-list <(awk '{print $1}' data/train_hires_mfcconly_100k/utt2spk) \ data/train data/train_100k || exit 1 fi if [ $stage -le 2 ]; then # We need to build a small system just because we need the LDA+MLLT transform # to train the diag-UBM on top of. First align the data of the 100k subset using # the tri3b system and normal MFCC features, so we have alignments to build our # system on hires MFCCs on top of. steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \ data/train_100k data/lang exp/tri3b exp/tri3b_ali_100k || exit 1; # Build a small LDA+MLLT system on top of the hires MFCC features, just # because we need the transform. We use --num-iters 13 because after we get # the transform (12th iter is the last), any further training is pointless. steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 --realign-iters "" \ --splice-opts "--left-context=3 --right-context=3" \ 5000 10000 data/train_hires_mfcconly_100k data/lang exp/tri3b_ali_100k exp/nnet2_online/tri4a || exit 1 fi if [ $stage -le 3 ]; then # Train a diagonal UBM. The input directory exp/nnet2_online/tri3a is only # needed for the splice-opts and the LDA+MLLT transform. steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj $train_nj --num-frames 400000 \ data/train_hires_mfcconly_100k 512 exp/nnet2_online/tri4a exp/nnet2_online/diag_ubm || exit 1 fi if [ $stage -le 4 ]; then # train an iVector extractor on all the mfcconly data. Note: although we use # only 10 job, each job uses 16 processes in total. steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ data/train_hires_mfcconly exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1; fi if [ $stage -le 5 ]; then # extract iVectors for the training data. ivectordir=exp/nnet2_online/ivectors_train if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then # this shows how you can split across multiple file-systems. utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/gale/s5/$ivectordir/storage $ivectordir/storage || exit 1 fi # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_mfcconly data/train_hires_mfcconly_max2 || exit 1 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $train_nj \ data/train_hires_mfcconly_max2 exp/nnet2_online/extractor $ivectordir || exit 1; fi if [ $stage -le 6 ]; then # this shows how you can split across multiple file-systems. if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-online/egs/bolt/s5/$dir/egs $dir/egs/storage || exit 1 fi # Because we have a lot of data here and we don't want the training to take # too long, we reduce the number of epochs from the defaults (15) to (8). # The option "--io-opts '--max-jobs-run 12'" is to have more than the default number # (5) of jobs dumping the egs to disk; this is OK since we're splitting our # data across four filesystems for speed. steps/nnet2/train_pnorm_simple.sh --stage $train_stage \ --num-epochs 8 \ --samples-per-iter 400000 \ --splice-width 7 --feat-type raw \ --online-ivector-dir exp/nnet2_online/ivectors_train \ --cmvn-opts "--norm-means=false --norm-vars=false" \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ --io-opts "--max-jobs-run 12" \ --num-jobs-nnet 6 \ --num-hidden-layers 4 \ --mix-up 12000 \ --initial-learning-rate 0.06 --final-learning-rate 0.006 \ --cmd "$decode_cmd" \ --pnorm-input-dim 3000 \ --pnorm-output-dim 300 \ data/train_hires data/lang exp/tri3b $dir || exit 1; fi if [ $stage -le 7 ]; then steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \ --add-pitch true data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1; fi if [ $stage -le 8 ]; then # do the actual online decoding with iVectors, carrying info forward from # previous utterances of the same speaker. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \ exp/tri3b/graph data/test ${dir}_online/decode_test || exit 1; fi if [ $stage -le 9 ]; then # this version of the decoding treats each utterance separately # without carrying forward speaker information. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \ --per-utt true \ exp/tri3b/graph data/test ${dir}_online/decode_test_utt || exit 1; fi if [ $stage -le 10 ]; then # this version of the decoding treats each utterance separately # without carrying forward speaker information, but looks to the end # of the utterance while computing the iVector. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \ --per-utt true --online false \ exp/tri3b/graph data/test ${dir}_online/decode_test_utt_offline || exit 1; fi exit 0; |