Blame view
egs/heroico/s5/local/nnet3/run_ivector_common.sh
5.25 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
#!/bin/bash set -euo pipefail # This script is called from local/nnet3/run_tdnn.sh and # local/chain/run_tdnn.sh (and may eventually be called by more # scripts). It contains the common feature preparation and # iVector-related parts of the script. See those scripts for examples # of usage. stage=0 nj=56 num_threads_ubm=2 train_set=train test_sets="native nonnative devtest test" gmm=tri3b nnet3_affix= . ./cmd.sh . ./path.sh . utils/parse_options.sh gmm_dir=exp/${gmm} ali_dir=exp/${gmm}_ali_${train_set}_sp for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 fi done if [ $stage -le 1 ]; then # perturb data to get alignments # nnet will be trained by high resolution data # _sp stands for speed-perturbed echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" utils/data/perturb_data_dir_speed_3way.sh \ data/${train_set} \ data/${train_set}_sp echo "$0: making mfcc features for low-resolution speed-perturbed data" steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; utils/fix_data_dir.sh data/${train_set}_sp fi if [ $stage -le 2 ]; then echo "$0: aligning with the perturbed low-resolution data" steps/align_fmllr.sh \ --nj 20 --cmd "$train_cmd" data/${train_set}_sp data/lang $gmm_dir \ $ali_dir || exit 1 fi if [ $stage -le 3 ]; then # Create high-resolution MFCC features (with 40 cepstra instead of 13). echo "$0: creating high-resolution MFCC features" mfccdir=data/${train_set}_sp_hires/data for datadir in ${train_set}_sp ${test_sets}; do utils/copy_data_dir.sh \ data/$datadir \ data/${datadir}_hires done # do volume-perturbation on the training data prior to extracting hires # features; this helps make trained nnets more invariant to test data volume. utils/data/perturb_data_dir_volume.sh \ data/${train_set}_sp_hires || exit 1; for datadir in ${train_set}_sp ${test_sets}; do steps/make_mfcc.sh \ --nj 10 \ --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" \ data/${datadir}_hires || exit 1; steps/compute_cmvn_stats.sh \ data/${datadir}_hires || exit 1; utils/fix_data_dir.sh \ data/${datadir}_hires || exit 1; done fi if [ $stage -le 4 ]; then echo "$0: computing a subset of data to train the diagonal UBM." # We'll use about a quarter of the data. mkdir -p exp/nnet3${nnet3_affix}/diag_ubm temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk) num_utts=$[$num_utts_total/4] utils/data/subset_data_dir.sh \ data/${train_set}_sp_hires \ $num_utts \ ${temp_data_root}/${train_set}_sp_hires_subset echo "$0: computing a PCA transform from the hires data." steps/online/nnet2/get_pca_transform.sh \ --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ --max-utts 10000 \ --subsample 2 \ ${temp_data_root}/${train_set}_sp_hires_subset \ exp/nnet3${nnet3_affix}/pca_transform echo "$0: training the diagonal UBM." # Use 512 Gaussians in the UBM. steps/online/nnet2/train_diag_ubm.sh \ --cmd "$train_cmd" \ --nj 20 \ --num-frames 700000 \ --num-threads 8 \ ${temp_data_root}/${train_set}_sp_hires_subset \ 512 \ exp/nnet3${nnet3_affix}/pca_transform \ exp/nnet3${nnet3_affix}/diag_ubm fi if [ $stage -le 5 ]; then # Train the iVector extractor. # Use all the speed-perturbed data . # iVector extractors can be sensitive to the amount of data. # The script defaults to an iVector dimension of 100. echo "$0: training the iVector extractor" steps/online/nnet2/train_ivector_extractor.sh \ --cmd "$train_cmd" \ --nj 10 \ data/${train_set}_sp_hires \ exp/nnet3${nnet3_affix}/diag_ubm \ exp/nnet3${nnet3_affix}/extractor || exit 1; fi if [ $stage -le 6 ]; then # combine and train system on short segments. # extract iVectors on speed-perturbed training data # With --utts-per-spk-max 2, script pairs utterances into twos. # Treats each pair as one speaker. # Gives more diversity in iVectors. # Extracted online. # note: extract ivectors from max2 data # Why is max2 not encoded in ivectordir name? # valid for non-max2 data # utterance list is the same. ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). temp_data_root=${ivectordir} utils/data/modify_speaker_info.sh \ --utts-per-spk-max 2 \ data/${train_set}_sp_hires \ ${temp_data_root}/${train_set}_sp_hires_max2 steps/online/nnet2/extract_ivectors_online.sh \ --cmd "$train_cmd" \ --nj 20 \ ${temp_data_root}/${train_set}_sp_hires_max2 \ exp/nnet3${nnet3_affix}/extractor \ $ivectordir # Also extract iVectors for test data. # No need for speed perturbation (sp). for data in $test_sets; do steps/online/nnet2/extract_ivectors_online.sh \ --cmd "$train_cmd" \ --nj 8 \ data/${data}_hires \ exp/nnet3${nnet3_affix}/extractor \ exp/nnet3${nnet3_affix}/ivectors_${data}_hires done fi exit 0 |