Blame view

egs/reverb/s5/run.sh 6.34 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  #!/bin/bash
  
  # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe)
  #                     Johns Hopkins University (author: Szu-Jui Chen)
  #                     Johns Hopkins University (author: Aswin Shanmugam Subramanian)
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  # This is a shell script, but it's recommended that you run the commands one by
  # one by copying and pasting into the shell.
  # Caution: some of the graph creation steps use quite a bit of memory, so you
  # should run this on a machine that has sufficient memory.
  
  # Requirements) matlab and tcsh
  if [ ! `which tcsh` ]; then
    echo "Install tcsh, which is used in some REVERB scripts"
    exit 1
  fi
  if [ ! `which matlab` ]; then
    echo "Install matlab, which is used to generate multi-condition data"
    exit 1
  fi
  
  . ./cmd.sh
  . ./path.sh
  
  stage=0
  nch_se=8
  # flag for turing on computation of dereverberation measures
  compute_se=true
  # please make sure that you or your institution have the license to report PESQ before turning on the below flag
  enable_pesq=false
  
  . utils/parse_options.sh
  # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :
  # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',
  set -euxo pipefail
  
  # please make sure to set the paths of the REVERB and WSJ0 data
  if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then
    reverb=/export/corpora5/REVERB_2014/REVERB
    export wsjcam0=/export/corpora3/LDC/LDC95S24/wsjcam0
    # set LDC WSJ0 directory to obtain LMs
    # REVERB data directory only provides bi-gram (bcb05cnp), but this recipe also uses 3-gram (tcb05cnp.z)
    export wsj0=/export/corpora5/LDC/LDC93S6A/11-13.1 #LDC93S6A or LDC93S6B
    # It is assumed that there will be a 'wsj0' subdirectory
    # within the top-level corpus directory
  else
    echo "Set the data directory locations." && exit 1;
  fi
  
  #training set and test set
  train_set=tr_simu_8ch
  test_sets="dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_cln et_cln"
  
  # The language models with which to decode (tg_5k or bg_5k)
  lm="tg_5k"
  
  # number of jobs for feature extraction and model training
  nj=92
  # number of jobs for decoding
  decode_nj=10
  
  wavdir=${PWD}/wav
  pesqdir=${PWD}/local
  if [ ${stage} -le 1 ]; then
    # data preparation
    echo "stage 0: Data preparation"
    local/generate_data.sh --wavdir ${wavdir} ${wsjcam0}
    local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0}
    local/prepare_real_data.sh --wavdir ${wavdir} ${reverb}
  fi
  
  if [ $stage -le 2 ]; then
    local/run_wpe.sh --cmd "$train_cmd"
    local/run_beamform.sh --cmd "$train_cmd" ${wavdir}/WPE/
  fi
  
  # Compute dereverberation scores
  if [ $stage -le 3 ] && $compute_se; then
    if [ ! -d local/REVERB_scores_source ] || [ ! -d local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools/SRMRToolbox ] || [ ! -f local/PESQ ]; then
      # download and install speech enhancement evaluation tools
      local/download_se_eval_tool.sh
    fi
    local/compute_se_scores.sh --nch $nch_se --enable_pesq $enable_pesq $reverb $wavdir $pesqdir
    cat exp/compute_se_${nch_se}ch/scores/score_SimData
    cat exp/compute_se_${nch_se}ch/scores/score_RealData
  fi
  
  if [ $stage -le 4 ]; then
    # Prepare wsjcam0 clean data and wsj0 language model.
    local/wsjcam0_data_prep.sh $wsjcam0 $wsj0
    
    # Prepare merged BEEP/CMU dictionary.
    local/wsj_prepare_beep_dict.sh
  
    # Prepare wordlists, etc.
    utils/prepare_lang.sh data/local/dict "<NOISE>" data/local/lang_tmp data/lang
  
    # Prepare directory structure for clean data. Apply some language model fixes.
    local/wsjcam0_format_data.sh
  fi
  
  if [ $stage -le 5 ]; then
    for dset in ${train_set} ${test_sets}; do
      utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit
      utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset}
    done
  fi
  
  if [ $stage -le 6 ]; then
    # Extract MFCC features for train and test sets.
    mfccdir=mfcc
    for x in ${train_set} ${test_sets}; do
     steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \
       data/$x exp/make_mfcc/$x $mfccdir
     steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
    done
  fi
  
  if [ $stage -le 7 ]; then
    # Starting basic training on MFCC features
    steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
  		      data/${train_set} data/lang exp/mono
  fi
  
  if [ $stage -le 8 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/mono exp/mono_ali
  
    steps/train_deltas.sh --cmd "$train_cmd" \
  			2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1
  fi
  
  if [ $stage -le 9 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/tri1 exp/tri1_ali
  
    steps/train_lda_mllt.sh --cmd "$train_cmd" \
  			  4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2
  fi
  
  if [ $stage -le 10 ]; then
    utils/mkgraph.sh data/lang_test_$lm exp/tri2 exp/tri2/graph
    for dset in ${test_sets}; do
      steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
  		    exp/tri2/graph data/${dset} exp/tri2/decode_${dset} &
    done
    wait
  fi
  
  if [ $stage -le 11 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/tri2 exp/tri2_ali
  
    steps/train_sat.sh --cmd "$train_cmd" \
  		     5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
  fi
  
  if [ $stage -le 12 ]; then
    utils/mkgraph.sh data/lang_test_$lm exp/tri3 exp/tri3/graph
    for dset in ${test_sets}; do
      steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
  			  exp/tri3/graph data/${dset} exp/tri3/decode_${dset} &
    done
    wait
  fi
  
  if [ $stage -le 13 ]; then
    # chain TDNN
    local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \
    --lm-suffix _test_$lm
  fi
  
  # get all WERs. 
  if [ $stage -le 14 ]; then
    local/get_results.sh
  fi