Blame view
egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
5.95 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/bin/bash . ./cmd.sh set -e stage=1 train_stage=-10 generate_alignments=false # false if doing chain training speed_perturb=true . ./path.sh . ./utils/parse_options.sh # perturbed data preparation train_set=train_nodup if [ "$speed_perturb" == "true" ]; then if [ $stage -le 1 ]; then #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp echo "$0: making MFCC features for low-resolution speed-perturbed data" steps/make_mfcc.sh --nj 70 --cmd "$train_cmd" \ data/${train_set}_sp || exit 1 steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1 utils/fix_data_dir.sh data/${train_set}_sp || exit 1 fi if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then #obtain the alignment of the perturbed data steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ data/${train_set}_sp data/lang exp/tri5a exp/tri5a_ali_nodup_sp || exit 1 fi train_set=${train_set}_sp fi if [ $stage -le 3 ]; then # Create high-resolution MFCC features (with 40 cepstra instead of 13). # this shows how you can split across multiple file-systems. echo "$0: creating high-resolution MFCC features" mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then date=$(date +'%m_%d_%H_%M') utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/mfcc/fisher_swbd-$date/s5b/$mfccdir/storage $mfccdir/storage fi # the 100k_nodup directory is copied seperately, as # we want to use exp/tri1b_ali_100k_nodup for ivector extractor training # the main train directory might be speed_perturbed for dataset in $train_set train_100k_nodup; do utils/copy_data_dir.sh data/$dataset data/${dataset}_hires # do volume-perturbation on the training data prior to extracting hires # features; this helps make trained nnets more invariant to test data volume. utils/data/perturb_data_dir_volume.sh data/${dataset}_hires steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; # Remove the small number of utterances that couldn't be extracted for some # reason (e.g. too short; no such file). utils/fix_data_dir.sh data/${dataset}_hires; done for dataset in eval2000 rt03; do # Create MFCCs for the eval set utils/copy_data_dir.sh data/$dataset data/${dataset}_hires steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ data/${dataset}_hires exp/make_hires/$dataset $mfccdir; steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir; utils/fix_data_dir.sh data/${dataset}_hires # remove segments with problems done # Take the first 30k utterances (about 1/8th of the data) this will be used # for the diagubm training utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr fi if [ $stage -le 5 ]; then echo "$0: computing a PCA transform from the hires data." steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ --max-utts 10000 --subsample 2 \ data/${train_set}_30k_nodup_hires exp/nnet3/pca fi if [ $stage -le 6 ]; then # To train a diagonal UBM we don't need very much data, so use the smallest subset. echo "$0: training the diagonal UBM." steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ data/${train_set}_30k_nodup_hires 512 exp/nnet3/pca exp/nnet3/diag_ubm fi if [ $stage -le 7 ]; then # iVector extractors can be sensitive to the amount of data, but this one has a # fairly small dim (defaults to 100) so we don't use all of it, we use just the # 100k subset (just under half the data). echo "$0: training the iVector extractor" steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ data/train_100k_nodup_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; fi if [ $stage -le 8 ]; then # We extract iVectors on the speed-perturbed training data after combining # short segments, which will be what we train the system on. With # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats # each of these pairs as one speaker; this gives more diversity in iVectors.. # Note that these are extracted 'online'. # note, we don't encode the 'max2' in the name of the ivectordir even though # that's the data we extract the ivectors from, as it's still going to be # valid for the non-'max2' data, the utterance list is the same. ivectordir=exp/nnet3/ivectors_${train_set} if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage fi # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). temp_data_root=${ivectordir} utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \ data/${train_set}_hires ${temp_data_root}/${train_set}_hires_max2 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ ${temp_data_root}/${train_set}_hires_max2 \ exp/nnet3/extractor $ivectordir # Also extract iVectors for the test data for data_set in eval2000 rt03; do steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data_set} || exit 1; done fi exit 0; |