Blame view

egs/wsj/s5/steps/online/prepare_online_decoding.sh 10.3 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  #!/bin/bash
  
  # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
  # Apache 2.0
  
  # Begin configuration.
  stage=0 # This allows restarting after partway, when something when wrong.
  feature_type=mfcc
  online_cmvn_config=conf/online_cmvn.conf
  add_pitch=false
  pitch_config=conf/pitch.conf
  pitch_process_config=conf/pitch_process.conf
  per_utt_basis=true # If true, then treat each utterance as a separate speaker
                     # for purposes of basis training... this is recommended if
                     # the number of actual speakers in your training set is less
                     # than (feature-dim) * (feature-dim+1).
  per_utt_cmvn=false # If true, apply online CMVN normalization per utterance
                     # rather than per speaker.
  silence_weight=0.01
  cmd=run.pl
  cleanup=true
  # End configuration.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh;
  . parse_options.sh || exit 1;
  
  if [ $# -ne 4 -a $# -ne 5 ]; then
     echo "Usage: $0 [options] <data-dir> <lang-dir> <sat-model-dir> [<MMI-model>] <output-dir>"
     echo "e.g.: $0 data/train data/lang exp/tri3b exp/tri3b_mmi/final.mdl exp/tri3b_online"
     echo "main options (for others, see top of script file)"
     echo "  --feature-type <mfcc|plp>                        # Type of the base features; "
     echo "                                                   # important to generate the correct"
     echo "                                                   # configs in <output-dir>/conf/"
     echo "  --online-cmvn-config <config>                    # config for online cmvn,"
     echo "                                                   # default conf/online_cmvn.conf"
     echo "  --add-pitch <true|false>                         # Append pitch features to cmvn"
     echo "                                                   # (default: false)"
     echo "  --per-utt-cmvn <true|false>                      # Apply online CMVN per utt, not"
     echo "                                                   # per speaker (default: false)"
     echo "  --per-utt-basis <true|false>                     # Do basis computation per utterance"
     echo "                                                   # (default: true)"
     echo "  --silence-weight <weight>                        # Weight on silence for basis fMLLR;"
     echo "                                                   # default 0.01."
     echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
     echo "  --config <config-file>                           # config containing options"
     echo "  --stage <stage>                                  # stage to do partial re-run from."
     exit 1;
  fi
  
  
  if [ $# -eq 5 ]; then
    data=$1
    lang=$2
    srcdir=$3
    mmi_model=$4
    dir=$5
  else
    data=$1
    lang=$2
    srcdir=$3
    mmi_model=$srcdir/final.mdl
    dir=$4
  fi
  
  
  for f in $srcdir/final.mdl $srcdir/ali.1.gz $data/feats.scp $lang/phones.txt \
      $mmi_model $online_cmvn_config; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  nj=`cat $srcdir/num_jobs` || exit 1;
  sdata=$data/split$nj;
  [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
  
  mkdir -p $dir/log
  echo $nj >$dir/num_jobs || exit 1;
  
  utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
  cp $lang/phones.txt $dir || exit 1;
  
  splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
  cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
  silphonelist=`cat $lang/phones/silence.csl` || exit 1;
  cp $srcdir/splice_opts $srcdir/cmvn_opts $srcdir/final.mat $srcdir/final.mdl $dir/ 2>/dev/null
  
  cp $mmi_model $dir/final.rescore_mdl
  
  # Set up the unadapted features "$sifeats".
  if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
  if ! $per_utt_cmvn; then
    online_cmvn_spk2utt_opt=
  else
    online_cmvn_spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
  fi
  
  
  # create global_cmvn.stats
  if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
    echo "$0: Error summing cmvn stats"
    exit 1
  fi
  
  if $add_pitch; then
    skip_opt="--skip-dims=13:14:15" # should make this more general.
  fi
  
  echo "$0: feature type is $feat_type";
  case $feat_type in
    delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
          online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $dir/global_cmvn.stats $online_cmvn_spk2utt_opt scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
    lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
         online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $online_cmvn_spk2utt_opt $dir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
    *) echo "Invalid feature type $feat_type" && exit 1;
  esac
  
  # Set up the adapted features "$feats" for training set.
  if [ -f $srcdir/trans.1 ]; then
    feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$srcdir/trans.JOB ark:- ark:- |";
  else
    feats="$sifeats";
  fi
  
  
  if $per_utt_basis; then
    spk2utt_opt=  # treat each utterance as separate speaker when computing basis.
    echo "Doing per-utterance adaptation for purposes of computing the basis."
  else
    echo "Doing per-speaker adaptation for purposes of computing the basis."
    [ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
      echo "Warning: number of speakers is small, might be better to use --per-utt=true."
    spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
  fi
  
  if [ $stage -le 0 ]; then
    echo "$0: Accumulating statistics for basis-fMLLR computation"
  # Note: we get Gaussian level alignments with the "final.mdl" and the
  # speaker adapted features.
    $cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
      ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:- \| \
      weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
      gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
      gmm-basis-fmllr-accs-gpost $spk2utt_opt \
      $dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1;
  fi
  
  if [ $stage -le 1 ]; then
    echo "$0: computing the basis matrices."
    $cmd $dir/log/basis_training.log \
      gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
    if $cleanup; then
      rm $dir/basis.acc.* 2>/dev/null
    fi
  fi
  
  if [ $stage -le 2 ]; then
    echo "$0: accumulating stats for online alignment model."
  
    # Accumulate stats for "online alignment model"-- this model is computed with
    # the speaker-independent features and online CMVN, but matches
    # Gaussian-for-Gaussian with the final speaker-adapted model.
  
    $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
      ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:-  \| \
      gmm-acc-stats-twofeats $dir/final.mdl "$feats" "$online_sifeats" \
      ark,s,cs:- $dir/final.JOB.acc || exit 1;
    [ `ls $dir/final.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
    # Update model.
    $cmd $dir/log/est_online_alimdl.log \
      gmm-est --remove-low-count-gaussians=false $dir/final.mdl \
      "gmm-sum-accs - $dir/final.*.acc|" $dir/final.oalimdl  || exit 1;
    if $cleanup; then
      rm $dir/final.*.acc
    fi
  fi
  
  if [ $stage -le 3 ]; then
    mkdir -p $dir/conf
    rm $dir/{plp,mfcc}.conf 2>/dev/null
    echo "$0: preparing configuration files in $dir/conf"
    if [ -f $dir/conf/online_decoding.conf ]; then
      echo "$0: moving $dir/conf/online_decoding.conf to $dir/conf/online_decoding.conf.bak"
      mv $dir/conf/online_decoding.conf $dir/conf/online_decoding.conf.bak
    fi
    conf=$dir/conf/online_decoding.conf
    echo -n >$conf
    case "$feature_type" in
      mfcc)
        echo "$0: creating $dir/conf/mfcc.conf"
        echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
        cp conf/mfcc.conf $dir/conf/ ;;
      plp)
        echo "$0: enabling plp features"
        echo "--feature-type=plp" >>$conf
        echo "$0: creating $dir/conf/plp.conf"
        echo "--plp-config=$dir/conf/plp.conf" >>$conf
        cp conf/plp.conf $dir/conf/ ;;
      *)
        echo "Unknown feature type $feature_type"
    esac
    if ! cp $online_cmvn_config $dir/conf/online_cmvn.conf; then
      echo "$0: error copying online cmvn config to $dir/conf/"
      exit 1;
    fi
    echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$conf
    if [ -f $dir/final.mat ]; then
      echo "$0: enabling feature splicing"
      echo "--splice-feats" >>$conf
      echo "$0: creating $dir/conf/splice.conf"
      for x in $(cat $dir/splice_opts); do echo $x; done > $dir/conf/splice.conf
      echo "--splice-config=$dir/conf/splice.conf" >>$conf
      echo "$0: enabling LDA"
      echo "--lda-matrix=$dir/final.mat" >>$conf
    else
      echo "$0: enabling deltas"
      echo "--add-deltas" >>$conf
    fi
    if $add_pitch; then
      echo "$0: enabling pitch features"
      echo "--add-pitch" >>$conf
      echo "$0: creating $dir/conf/pitch.conf"
      echo "--pitch-config=$dir/conf/pitch.conf" >>$conf
      if ! cp $pitch_config $dir/conf/pitch.conf; then
        echo "$0: error copying pitch config to $dir/conf/"
        exit 1;
      fi;
      echo "$0: creating $dir/conf/pitch_process.conf"
      echo "--pitch-process-config=$dir/conf/pitch_process.conf" >>$conf
      if ! cp $pitch_process_config $dir/conf/pitch_process.conf; then
        echo "$0: error copying pitch process config to $dir/conf/"
        exit 1;
      fi;
      nfields=$(sed -n '2,2p' $dir/global_cmvn.stats | \
        perl -e '$_ = <>; s/^\s+|\s+$//g; print scalar(split);');
      if [ $nfields != 17 ]; then
        echo "$0: $dir/global_cmvn.stats has $nfields entries per row (expected 17)."
        echo "$0: Did you append pitch features?"
        exit 1;
      fi
      #offset=$(sed -n '2,2p' $dir/global_cmvn.stats | \
      #  perl -e '$_ = <>; s/^\s+|\s+$//g; ($t, $c) = (split)[13, 16]; print -$t/$c;');
      #echo "--pov-offset=$offset" >>$dir/conf/pitch_process.conf
    fi
  
    echo "--fmllr-basis=$dir/fmllr.basis" >>$conf
    echo "--online-alignment-model=$dir/final.oalimdl" >>$conf
    echo "--model=$dir/final.mdl" >>$conf
    if ! cmp --quiet $dir/final.mdl $dir/final.rescore_mdl; then
      echo "--rescore-model=$dir/final.rescore_mdl" >>$conf
    fi
    echo "--silence-phones=$silphonelist" >>$conf
    echo "--endpoint.silence-phones=$silphonelist" >>$conf
    echo "--global-cmvn-stats=$dir/global_cmvn.stats" >>$conf
    echo "$0: created config file $conf"
  fi