Blame view
egs/hkust/s5/local/nnet3/run_ivector_common.sh
6.15 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
#!/bin/bash # This script is modified based on swbd/s5c/local/nnet3/run_ivector_common.sh # this script contains some common (shared) parts of the run_nnet*.sh scripts. . ./cmd.sh stage=0 num_threads_ubm=32 ivector_extractor= set -e . ./cmd.sh . ./path.sh . ./utils/parse_options.sh gmm_dir=exp/tri5a align_script=steps/align_fmllr.sh if [ $stage -le 1 ] && [ -z $ivector_extractor ]; then # Create high-resolution MFCC features (with 40 cepstra instead of 13) with pitch. # this shows how you can split across multiple file-systems. we'll split the # MFCC dir across multiple locations. You might want to be careful here, if you # have multiple copies of Kaldi checked out and run the same recipe, not to let # them overwrite each other. mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/hkust-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi for datadir in train dev; do utils/copy_data_dir.sh data/$datadir data/${datadir}_hires if [ "$datadir" == "train" ]; then dir=data/train_hires cat $dir/wav.scp | python -c " import sys, os, subprocess, re, random scale_low = 1.0/8 scale_high = 2.0 for line in sys.stdin.readlines(): if len(line.strip()) == 0: continue print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) "| sort -k1,1 -u > $dir/wav.scp_scaled || exit 1; mv $dir/wav.scp $dir/wav.scp_nonorm mv $dir/wav.scp_scaled $dir/wav.scp fi steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; # make MFCC data dir without pitch to extract iVector utils/data/limit_feature_dim.sh 0:39 data/${datadir}_hires data/${datadir}_hires_nopitch || exit 1; steps/compute_cmvn_stats.sh data/${datadir}_hires_nopitch exp/make_hires/$datadir $mfccdir || exit 1; done fi if [ $stage -le 2 ] && [ -z $ivector_extractor ]; then # perform PCA on the data echo "$0: computing a PCA transform from the no-pitch hires data." steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ --max-utts 10000 --subsample 2 \ data/${train_set}_hires_nopitch \ exp/nnet3/tri5_pca fi if [ $stage -le 3 ] && [ -z $ivector_extractor ]; then steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ --num-frames 700000 \ data/train_hires_nopitch 512 exp/nnet3/tri5_pca exp/nnet3/diag_ubm fi if [ $stage -le 4 ] && [ -z $ivector_extractor ]; then # iVector extractors can in general be sensitive to the amount of data, but # this one has a fairly small dim (defaults to 100) steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ data/train_hires_nopitch exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; ivector_extractor=exp/nnet3/extractor fi if [ $stage -le 5 ]; then # Although the nnet will be trained by high resolution data, # we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed utils/perturb_data_dir_speed.sh 0.9 data/train data/temp1 utils/perturb_data_dir_speed.sh 1.0 data/train data/temp2 utils/perturb_data_dir_speed.sh 1.1 data/train data/temp3 utils/combine_data.sh --extra-files utt2uniq data/train_sp data/temp1 data/temp2 data/temp3 rm -r data/temp1 data/temp2 data/temp3 mfccdir=mfcc_perturbed for x in train_sp; do steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_sp $align_script --nj 30 --cmd "$train_cmd" \ data/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1 # Now perturb the high resolution data utils/copy_data_dir.sh data/train_sp data/train_sp_hires mfccdir=mfcc_perturbed_hires for x in train_sp_hires; do steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ data/$x exp/make_hires/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_hires/$x $mfccdir || exit 1; # create MFCC data dir without pitch to extract iVector utils/data/limit_feature_dim.sh 0:39 data/$x data/${x}_nopitch || exit 1; steps/compute_cmvn_stats.sh data/${x}_nopitch exp/make_hires/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_sp_hires fi train_set=train_sp if [ -z $ivector_extractor ]; then echo "iVector extractor is not found!" exit 1; fi if [ $stage -le 6 ]; then rm -f exp/nnet3/.error 2>/dev/null ivectordir=exp/nnet3/ivectors_${train_set} if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/hkust-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage fi # We extract iVectors on all the train data, which will be what we train the # system on. With --utts-per-spk-max 2, the script. pairs the utterances # into twos, and treats each of these pairs as one speaker. Note that these # are extracted 'online'. # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires_nopitch data/${train_set}_hires_nopitch_max2 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${train_set}_hires_nopitch_max2 \ $ivector_extractor $ivectordir \ || touch exp/nnet3/.error [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi if [ $stage -le 7 ]; then rm -f exp/nnet3/.error 2>/dev/null steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ data/dev_hires_nopitch $ivector_extractor exp/nnet3/ivectors_dev || touch exp/nnet3/.error & wait [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi exit 0; |