Blame view

egs/chime4/s5_1ch/local/run_gmm.sh 9.81 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
  #!/bin/bash
  
  # Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
  #                Inria (Emmanuel Vincent)
  #                Mitsubishi Electric Research Labs (Shinji Watanabe)
  #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  
  # This script is made from the kaldi recipe of the 2nd CHiME Challenge Track 2
  # made by Chao Weng
  
  . ./path.sh
  . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
             ## This relates to the queue.
  
  # Config:
  nj=30
  stage=0 # resume training with --stage=N
  train=noisy # noisy data multi-condition training
  eval_flag=true # make it true when the evaluation data are released
  add_enhanced_data=true # make it true when you want to add enhanced data into training set
  decode_only=false # if true, it wouldn't train a model again and will only do decoding
  
  . utils/parse_options.sh || exit 1;
  
  # This is a shell script, but it's recommended that you run the commands one by
  # one by copying and pasting into the shell.
  
  if [ $# -ne 3 ]; then
    printf "
  USAGE: %s <enhancement method> <enhanced speech directory> <chime4 root directory>
  
  " `basename $0`
    echo "First argument specifies a unique name for different enhancement method"
    echo "Second argument specifies the directory of enhanced wav files"
    echo "Third argument specifies the CHiME4 root directory"
    exit 1;
  fi
  
  # set enhanced data
  enhan=$1
  enhan_data=$2
  # set chime4 data
  chime4_data=$3
  
  # Set bash to 'debug' mode, it will exit on :
  # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
  set -e
  set -u
  set -o pipefail
  
  # check whether run_init is executed
  if [ ! -d data/lang ]; then
    echo "error, execute local/run_init.sh, first"
    exit 1;
  fi
  
  if $decode_only; then
    # check data/loca/data
    mdir=`pwd`
    if [ ! -d $mdir/data/local/data ]; then
      echo "error, set $mdir correctly"
      exit 1;
    elif [ ! -d data/local/data ]; then
      echo "copy $mdir/data/local/data"
      mkdir -p data/local
      cp -r $mdir/data/local/data data/local/
    fi
    # check gmm model
    if [ ! -d $mdir/exp/tri3b_tr05_multi_${train} ]; then
      echo "error, set $mdir correctly"
      exit 1;
    elif [ ! -d exp/tri3b_tr05_multi_${train} ]; then
      echo "copy $mdir/exp/tri3b_tr05_multi_${train}"
      mkdir -p exp
      cp -r $mdir/exp/tri3b_tr05_multi_${train} exp/
    fi
    # process for enhanced data
    if [ ! -d data/dt05_real_$enhan ] || [ ! -d data/et05_real_$enhan ]; then
      local/real_enhan_chime4_data_prep.sh $enhan $enhan_data
      local/simu_enhan_chime4_data_prep.sh $enhan $enhan_data
    fi
    stage=6
  fi
  #######################
  #### training #########
  if [ $stage -le 1 ]; then
    # process for distant talking speech for real and simulation data
    local/real_noisy_chime4_data_prep.sh $chime4_data
    local/simu_noisy_chime4_data_prep.sh $chime4_data
  fi
  # process for enhanced data
  if [ $stage -le 1 ]; then
    if [ ! -d data/dt05_real_$enhan ] || [ ! -d data/et05_real_$enhan ]; then
      local/real_enhan_chime4_data_prep.sh $enhan $enhan_data
      local/simu_enhan_chime4_data_prep.sh $enhan $enhan_data
    fi
  fi
  # Copy enhanced data for 1ch and 2ch experiments
  if [ $stage -le 2 ] && [[ "$PWD" != *s5_6ch* ]]; then
    beamformed=0
    # First remove empty files generated from previous stage
    for d in tr05_{real,simu}_$enhan; do
      [ -d data/$d ] && rm -rf data/$d && \
      echo "remove empty directory $d"
    done
    if [[ "$enhan" == *beamformit_2mics* ]] && [ -d ../s5_6ch/data/tr05_real_beamformit_5mics ]; then
      echo "copy tr05_{real,simu}_beamformit_5mics from ../s5_6ch/data/"
      cp -r ../s5_6ch/data/tr05_real_beamformit_5mics data/tr05_real_beamformit_2mics
      cp -r ../s5_6ch/data/tr05_simu_beamformit_5mics data/tr05_simu_beamformit_2mics
      beamformed=1
    elif [ -d ../s5_6ch/data/tr05_real_$enhan ]; then
      echo "copy enhanced training data ${d} from ../s5_6ch/data/"
      cp -r ../s5_6ch/data/tr05_real_$enhan data/
      cp -r ../s5_6ch/data/tr05_simu_$enhan data/
      beamformed=1
    elif [[ "$enhan" == *isolated_1ch_track* ]]; then
      beamformed=1
    fi
    if [ $beamformed == 0 ]; then
      echo "no such directory tr05_{real,simu}_{beamformit_5mics,blstm_gev,single_BLSTMmask}"
      echo "They are generated by run_beamform_6ch_track.sh in ../s5_6ch/run.sh, please execute it first" && \
      exit 1;
    fi
  fi
  
  # Now make MFCC features for clean, close, and noisy data
  # mfccdir should be some place with a largish disk where you
  # want to store MFCC features.
  mfccdir=mfcc
  if [ $stage -le 3 ]; then
    if $add_enhanced_data; then
      if $eval_flag; then
        tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} et05_real_${train} et05_simu_${train} tr05_real_$enhan tr05_simu_$enhan"
      else
        tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} tr05_real_$enhan tr05_simu_$enhan"
      fi
    else
      if $eval_flag; then
        tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} et05_real_${train} et05_simu_${train}"
      else
        tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train}"
      fi
    fi
    for x in $tasks; do
      steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" \
        data/$x exp/make_mfcc/$x $mfccdir
      steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
    done
  fi
  
  # make mixed training set from real and simulation training data
  # multi = simu + real
  # Note that we are combining enhanced training data with noisy training data
  if [ $stage -le 4 ]; then
    if $add_enhanced_data; then
      utils/combine_data.sh data/tr05_multi_${train} data/tr05_simu_${train} data/tr05_real_${train} data/tr05_simu_$enhan data/tr05_real_$enhan
    else
      utils/combine_data.sh data/tr05_multi_${train} data/tr05_simu_${train} data/tr05_real_${train}
    fi
    utils/combine_data.sh data/dt05_multi_${train} data/dt05_simu_${train} data/dt05_real_${train}
    if $eval_flag; then
      utils/combine_data.sh data/et05_multi_${train} data/et05_simu_${train} data/et05_real_${train}
    fi
  fi
  
  # training models for noisy data
  if [ $stage -le 5 ]; then
    nspk=`wc -l data/tr05_multi_${train}/spk2utt | awk '{print $1}'`
    if [ $nj -gt $nspk ]; then
      nj2=$nspk
    else
      nj2=$nj
    fi
    # training monophone model
    steps/train_mono.sh --boost-silence 1.25 --nj $nj2 --cmd "$train_cmd" \
      data/tr05_multi_${train} data/lang exp/mono0a_tr05_multi_${train}
    steps/align_si.sh --boost-silence 1.25 --nj $nj2 --cmd "$train_cmd" \
      data/tr05_multi_${train} data/lang exp/mono0a_tr05_multi_${train} exp/mono0a_ali_tr05_multi_${train}
  
    # training triphone model with lda mllt features
    steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
      2000 10000 data/tr05_multi_${train} data/lang exp/mono0a_ali_tr05_multi_${train} exp/tri1_tr05_multi_${train}
    steps/align_si.sh --nj $nj2 --cmd "$train_cmd" \
      data/tr05_multi_${train} data/lang exp/tri1_tr05_multi_${train} exp/tri1_ali_tr05_multi_${train}
  
    steps/train_lda_mllt.sh --cmd "$train_cmd" \
      --splice-opts "--left-context=3 --right-context=3" \
      2500 15000 data/tr05_multi_${train} data/lang exp/tri1_ali_tr05_multi_${train} exp/tri2b_tr05_multi_${train}
    steps/align_si.sh  --nj $nj2 --cmd "$train_cmd" \
      --use-graphs true data/tr05_multi_${train} data/lang exp/tri2b_tr05_multi_${train} exp/tri2b_ali_tr05_multi_${train}
  
    steps/train_sat.sh --cmd "$train_cmd" \
      2500 15000 data/tr05_multi_${train} data/lang exp/tri2b_ali_tr05_multi_${train} exp/tri3b_tr05_multi_${train}
    utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b_tr05_multi_${train} exp/tri3b_tr05_multi_${train}/graph_tgpr_5k
  fi
  #### training done ####
  #######################
  
  
  #####################
  #### tsting #########
  # Now make MFCC features for enhanced data
  # mfccdir should be some place with a largish disk where you
  # want to store MFCC features.
  mfccdir=mfcc/$enhan
  if [ $stage -le 6 ]; then
    if $eval_flag; then
      tasks="dt05_real_$enhan dt05_simu_$enhan et05_real_$enhan et05_simu_$enhan"
    else
      tasks="dt05_real_$enhan dt05_simu_$enhan"
    fi
    for x in $tasks; do
      if [ ! -e data/$x/feats.scp ]; then
        steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" \
          data/$x exp/make_mfcc/$x $mfccdir
        steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
      fi
    done
  fi
  
  # make mixed training set from real and simulation enhanced data
  # multi = simu + real
  if [ $stage -le 7 ]; then
    if [ ! -d data/dt05_multi_$enhan ]; then
      utils/combine_data.sh data/dt05_multi_$enhan data/dt05_simu_$enhan data/dt05_real_$enhan
      if $eval_flag; then
        utils/combine_data.sh data/et05_multi_$enhan data/et05_simu_$enhan data/et05_real_$enhan
      fi
    fi
  fi
  
  # decode enhanced speech using AMs trained with enhanced data
  if [ $stage -le 8 ]; then
    steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
      exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/dt05_real_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_dt05_real_$enhan &
    steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
      exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/dt05_simu_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_dt05_simu_$enhan &
    if $eval_flag; then
      steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
        exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/et05_real_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_et05_real_$enhan &
      steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
        exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/et05_simu_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_et05_simu_$enhan &
    fi
    wait;
  fi
  
  # scoring
  if [ $stage -le 9 ]; then
    # decoded results of enhanced speech using AMs trained with enhanced data
    local/chime4_calc_wers.sh exp/tri3b_tr05_multi_${train} $enhan exp/tri3b_tr05_multi_${train}/graph_tgpr_5k \
      > exp/tri3b_tr05_multi_${train}/best_wer_$enhan.result
    head -n 15 exp/tri3b_tr05_multi_${train}/best_wer_$enhan.result
  fi
  #### tsting done ####
  #####################
  
  echo "`basename $0` Done."