Blame view

egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh 7.86 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
  #!/bin/bash
  
  # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
  #           2015  Guoguo Chen
  #           2017  Hainan Xu
  #           2017  Szu-Jui Chen
  
  # This script trains LMs on the Chime4 data.
  # rnnlm/train_rnnlm.sh: best iteration (out of 120) was 91, linking it to final iteration.
  # rnnlm/train_rnnlm.sh: train/dev perplexity was 23.2 / 25.6.
  # Train objf: -5.63 -4.52 -4.20 -4.05 -3.96 -3.89 -3.83 -3.79 -3.76 -3.73 -3.70 -3.67 -3.65
  # -3.63 -3.61 -3.59 -3.58 -3.56 -3.54 -3.53 -3.52 -3.50 -3.49 -3.48 -3.47 -3.46 -3.45 -3.44
  # -3.43 -3.42 -3.43 -3.41 -3.39 -3.38 -3.38 -3.37 -3.35 -3.34 -3.34 -3.33 -3.32 -3.31 -3.31
  # -3.30 -3.29 -3.28 -3.28 -3.27 -3.26 -3.25 -3.25 -3.25 -3.23 -3.22 -3.23 -3.22 -3.21 -3.20
  # -3.20 -3.19 -3.19 -3.18 -3.18 -3.17 -3.16 -3.15 -3.16 -3.15 -3.14 -3.13 -3.13 -3.13 -3.12
  # -3.11 -3.12 -3.11 -3.10 -3.09 -3.09 -3.09 -3.08 -3.07 -3.07 -3.07 -3.06 -3.05 -3.05 -3.05
  # -3.04 -3.04 -3.04 -3.03 -3.00 -3.02 -3.00 -2.99 -3.00 -2.99 -2.99 -2.98 -2.96 -2.97 -2.96
  # -2.95 -2.96 -2.95 -2.95 -2.94 -2.93 -2.93 -2.92 -2.91 -2.92 -2.91 -2.91 -2.91 -2.89 -2.90 -2.89 -2.88 
  #Dev objf:   -11.73 -5.17 -4.46 -4.21 -4.06 -3.96 -3.88 -3.82 -3.79 -3.73 -3.69 -3.68 -3.63 
  # -3.61 -3.59 -3.58 -3.54 -3.54 -3.53 -3.51 -3.50 -3.47 -3.47 -3.46 -3.44 -3.44 -3.42 -3.42 
  # -3.42 -3.42 -3.40 -3.36 -3.35 -3.35 -3.34 -3.34 -3.34 -3.33 -3.32 -3.32 -3.31 -3.31 -3.31 
  # -3.30 -3.29 -3.29 -3.29 -3.28 -3.28 -3.28 -3.27 -3.27 -3.26 -3.27 -3.27 -3.26 -3.25 -3.26 
  # -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.25 -3.24 -3.25 -3.25 -3.24 -3.24 
  # -3.25 -3.25 -3.24 -3.24 -3.25 -3.26 -3.25 -3.25 -3.24 -3.25 -3.25 -3.24 -3.25 -3.25 -3.25 
  # -3.24 -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.26 -3.26 -3.26 -3.26 
  # -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.28 -3.28 -3.28 -3.28 -3.29 -3.29 -3.29 
  
  # Begin configuration section.
  affix=1a
  dir=exp/rnnlm_lstm_${affix}
  enhan=$1
  embedding_dim=2048
  lstm_rpd=512
  lstm_nrpd=512
  stage=-10
  train_stage=-10
  
  # variables for lattice rescoring
  run_lat_rescore=true
  run_nbest_rescore=true
  use_backward_model=true
  ac_model_dir=exp/chain/tdnn1a_sp
  decode_dir_suffix=rnnlm_lstm_${affix}
  ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
                # if it's set, it merges histories in the lattice if they share
                # the same ngram history and this prevents the lattice from 
                # exploding exponentially
  
  
  . cmd.sh
  . utils/parse_options.sh
  
  srcdir=data/local/local_lm
  lexicon=data/local/dict/lexiconp.txt
  text_dir=data/rnnlm/text_nosp_${affix}
  mkdir -p $dir/config
  set -e
  
  for f in $lexicon; do
    [ ! -f $f ] && \
      echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
  done
  
  #prepare training and dev data
  if [ $stage -le 0 ]; then
    mkdir -p $text_dir
    cp $srcdir/train.rnn $text_dir/chime4.txt.tmp
    sed -e "s/<RNN_UNK>/<UNK>/g" $text_dir/chime4.txt.tmp > $text_dir/chime4.txt
    cp $srcdir/valid.rnn $text_dir/dev.txt
  fi
  
  if [ $stage -le 1 ]; then
    cp data/lang_chain/words.txt $dir/config/words.txt
    n=`cat $dir/config/words.txt | wc -l`
    echo "<brk> $n" >> $dir/config/words.txt
    # words that are not present in words.txt but are in the training or dev data, will be
    # mapped to <SPOKEN_NOISE> during training.
    echo "<UNK>" >$dir/config/oov.txt
  
    cat > $dir/config/data_weights.txt <<EOF
  chime4   3   1.0
  EOF
  
    rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
                               --unk-word="<UNK>" \
                               --data-weights-file=$dir/config/data_weights.txt \
                               $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
  
    # choose features
    rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
                             --use-constant-feature=true \
                             --special-words='<s>,</s>,<UNK>,<brk>' \
                             $dir/config/words.txt > $dir/config/features.txt
  
    cat >$dir/config/xconfig <<EOF
  input dim=$embedding_dim name=input
  relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
  fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
  relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
  fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
  relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
  output-layer name=output include-log-softmax=false dim=$embedding_dim
  EOF
    rnnlm/validate_config_dir.sh $text_dir $dir/config
  fi
  
  if [ $stage -le 2 ]; then
    rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
  fi
  
  # Train model with forward data(forward model)
  if [ $stage -le 3 ]; then
    rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
                    --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
  fi
  
  # Train another model with reversed data(backward model)
  if [ $stage -le 4 ] && $use_backward_model; then
    local/rnnlm/run_lstm_back.sh --embedding-dim $embedding_dim \
      --lstm-rpd $lstm_rpd --lstm-nrpd $lstm_nrpd \
      --affix $affix
  fi
  
  # Since lattice-rescoring performs worse but faster than nbest-rescoring,
  # we only use it to evaluate how good our forward model is.
  LM=5gkn_5k # using the 5-gram lm from run_lmrescore_tdnn.sh
  tgtdir=${ac_model_dir}_smbr_lmrescore
  if [ $stage -le 5 ] && $run_lat_rescore; then
    echo "$0: Perform lattice-rescoring on $ac_model_dir"
    for decode_set in dt05_real dt05_simu et05_real et05_simu; do
      decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
  
      # Lattice rescoring
      rnnlm/lmrescore_pruned.sh \
        --cmd "$train_cmd --mem 2G" \
        --weight 0.8 --max-ngram-order $ngram_order \
        data/lang_test_$LM $dir \
        data/${decode_set}_${enhan}_chunked ${decode_dir} \
        $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix} &
    done
    
    wait
    
    # calc wers for lattice-rescoring results
    local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix} \
        $tgtdir/graph_tgpr_5k \
        > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
    head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
  fi
  
  nbest=100
  rnnweight=0.8
  if [ $stage -le 6 ] && $run_nbest_rescore; then
    echo "$0: Perform nbest-rescoring on $ac_model_dir"
    for decode_set in dt05_real dt05_simu et05_real et05_simu; do
      decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
      (
      # Lattice rescoring
      rnnlm/lmrescore_nbest.sh \
        --cmd "$train_cmd --mem 2G" --N $nbest \
        $rnnweight data/lang_test_$LM $dir \
        data/${decode_set}_${enhan}_chunked ${decode_dir} \
        $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}
  
      if $use_backward_model; then
        rnnlm/lmrescore_nbest_back.sh \
          --cmd "$train_cmd --mem 2G" --N $nbest \
          $rnnweight data/lang_test_$LM ${dir}_back \
          data/${decode_set}_${enhan}_chunked \
          $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
          $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi
      fi
      ) &
    done
    wait
    # calc wers for nbest-rescoring results
    if $use_backward_model; then
      local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi \
        $tgtdir/graph_tgpr_5k \
        > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
      head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
    else
      local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
          $tgtdir/graph_tgpr_5k \
          > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
      head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
    fi
  fi
  
  exit 0