Blame view

egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh 5.12 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  #!/bin/bash
  
  set -euo pipefail
  
  # This script is called from local/nnet3/run_tdnn.sh and
  # local/chain/run_tdnn.sh (and may eventually be called by more
  # scripts).  It contains the common feature preparation and
  # iVector-related parts of the script.  See those scripts for examples
  # of usage.
  
  stage=0
  train_set=train
  test_sets="devtest test"
  gmm=tri3b
  
  nnet3_affix=
  
  . ./cmd.sh
  . ./path.sh
  . utils/parse_options.sh
  
  gmm_dir=exp/${gmm}
  ali_dir=exp/${gmm}_ali_${train_set}_sp
  
  for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
    if [ ! -f $f ]; then
      echo "$0: expected file $f to exist"
      exit 1
    fi
  done
  
  if [ $stage -le 1 ]; then
      # perturb data to get alignments
      # nnet will be trained by high resolution data
      # _sp stands for speed-perturbed
      echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
      utils/data/perturb_data_dir_speed_3way.sh \
  	data/${train_set} \
  	data/${train_set}_sp
      echo "$0: making mfcc features for low-resolution speed-perturbed data"
      steps/make_mfcc.sh \
  	--cmd "$train_cmd" \
  	--nj 10 \
  	data/${train_set}_sp
      steps/compute_cmvn_stats.sh \
  	data/${train_set}_sp
      utils/fix_data_dir.sh \
  	data/${train_set}_sp
  fi
  
  if [ $stage -le 2 ]; then
      echo "$0: aligning with the perturbed low-resolution data"
      steps/align_fmllr.sh \
  	--nj 20 \
  	--cmd "$train_cmd" \
  	data/${train_set}_sp \
  	data/lang \
  	$gmm_dir \
  	$ali_dir
  fi
  
  if [ $stage -le 3 ]; then
      # Create high-resolution MFCC features (with 40 cepstra instead of 13).
  
      echo "$0: creating high-resolution MFCC features"
      mfccdir=data/${train_set}_sp_hires/data
      for datadir in ${train_set}_sp ${test_sets}; do
  	utils/copy_data_dir.sh \
  	    data/$datadir \
  	    data/${datadir}_hires
      done
  
      # do volume-perturbation on the training data prior to extracting hires
      # features; this helps make trained nnets more invariant to test data volume.
      utils/data/perturb_data_dir_volume.sh \
  	data/${train_set}_sp_hires
  
      for datadir in ${train_set}_sp ${test_sets}; do
  	steps/make_mfcc.sh \
  	    --nj 10 \
  	    --mfcc-config conf/mfcc_hires.conf \
  	    --cmd "$train_cmd" \
  	    data/${datadir}_hires
  	steps/compute_cmvn_stats.sh \
  	    data/${datadir}_hires
  	utils/fix_data_dir.sh \
  	    data/${datadir}_hires
      done
  fi
  
  if [ $stage -le 4 ]; then
      echo "$0: computing a subset of data to train the diagonal UBM."
      # We'll use about a quarter of the data.
      mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
      temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
  
      num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
      num_utts=$[$num_utts_total/4]
      utils/data/subset_data_dir.sh \
  	data/${train_set}_sp_hires \
  	$num_utts \
  	${temp_data_root}/${train_set}_sp_hires_subset
  
      echo "$0: computing a PCA transform from the hires data."
      steps/online/nnet2/get_pca_transform.sh \
  	--cmd "$train_cmd" \
  	--splice-opts "--left-context=3 --right-context=3" \
  	--max-utts 10000 \
  	--subsample 2 \
  	${temp_data_root}/${train_set}_sp_hires_subset \
  	exp/nnet3${nnet3_affix}/pca_transform
  
      echo "$0: training the diagonal UBM."
      # Use 512 Gaussians in the UBM.
      steps/online/nnet2/train_diag_ubm.sh \
  	--cmd "$train_cmd" \
  	--nj 20 \
  	--num-frames 700000 \
  	--num-threads 8 \
  	${temp_data_root}/${train_set}_sp_hires_subset \
  	512 \
  	exp/nnet3${nnet3_affix}/pca_transform \
  	exp/nnet3${nnet3_affix}/diag_ubm
  fi
  
  if [ $stage -le 5 ]; then
      # Train the iVector extractor.
      # Use all the speed-perturbed data .
      # iVector extractors can be sensitive to the amount of data.
      # The script defaults to an iVector dimension of 100.
      echo "$0: training the iVector extractor"
      steps/online/nnet2/train_ivector_extractor.sh \
  	--cmd "$train_cmd" \
  	--nj 10 \
  	data/${train_set}_sp_hires \
  	exp/nnet3${nnet3_affix}/diag_ubm \
  	exp/nnet3${nnet3_affix}/extractor
  fi
  
  # combine   and train system on short segments.
  # extract iVectors on speed-perturbed training data
  # With --utts-per-spk-max 2, script pairs  utterances into twos.
  # Treats each  pair as one speaker.
  # Gives more diversity in iVectors.
  # Extracted online.
  
  # note: extract  ivectors from max2 data
  # Why is max2 not encoded in ivectordir name?
  # valid for non-max2 data
  #  utterance list is the same.
  
  # having a larger number of speakers is helpful for generalization, and to
  # handle per-utterance decoding well (iVector starts at zero).
  
  if [ $stage -le 6 ]; then
      ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
      temp_data_root=${ivectordir}
      utils/data/modify_speaker_info.sh \
  	--utts-per-spk-max 2 \
  	data/${train_set}_sp_hires \
  	${temp_data_root}/${train_set}_sp_hires_max2
  
      steps/online/nnet2/extract_ivectors_online.sh \
  	--cmd "$train_cmd" \
  	--nj 20 \
  	${temp_data_root}/${train_set}_sp_hires_max2 \
  	exp/nnet3${nnet3_affix}/extractor \
  	$ivectordir
  fi
  
  # Also extract iVectors for test data.
  # No need for speed perturbation (sp).
  
  if [ $stage -le 7 ]; then
      for data in $test_sets; do
  	steps/online/nnet2/extract_ivectors_online.sh \
  	    --cmd "$train_cmd" \
  	    --nj 1 \
  	    data/${data}_hires \
  	    exp/nnet3${nnet3_affix}/extractor \
  	    exp/nnet3${nnet3_affix}/ivectors_${data}_hires
      done
  fi
  
  exit 0