Blame view

egs/chime4/s5_1ch/local/run_lmrescore.sh 4.13 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  #!/bin/bash
  
  # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
  #                Inria (Emmanuel Vincent)
  #                Mitsubishi Electric Research Labs (Shinji Watanabe)
  #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  
  # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
  
  nj=12
  stage=1
  order=5
  hidden=300
  rnnweight=0.5
  nbest=100
  train=noisy
  eval_flag=true # make it true when the evaluation data are released
  
  . utils/parse_options.sh || exit 1;
  
  . ./path.sh
  . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
             ## This relates to the queue.
  
  # This is a shell script, but it's recommended that you run the commands one by
  # one by copying and pasting into the shell.
  
  if [ $# -ne 2 ]; then
    printf "
  USAGE: %s <Chime4 root directory> <enhancement method>
  
  " `basename $0`
    echo "First argument specifies a root directory of Chime4 data"
    echo "Second argument specifies a unique name for different enhancement method"
    exit 1;
  fi
  
  # set language models
  lm_suffix=${order}gkn_5k
  rnnlm_suffix=rnnlm_5k_h${hidden}
  
  # data root
  chime4_data=$1
  # enhan data
  enhan=$2
  
  # check data
  if [ ! -d $chime4_data ]; then
    echo "$chime4_data does not exist. Please specify chime4 data root correctly" && exit 1
  fi
  
  # check whether run_dnn is executed
  srcdir=exp/tri4a_dnn_tr05_multi_${train}_smbr_i1lats
  if [ ! -d $srcdir ]; then
    echo "error, execute local/run_dnn.sh, first"
    exit 1;
  fi
  
  # train a high-order n-gram language model
  if [ $stage -le 1 ]; then
    local/chime4_train_lms.sh $chime4_data || exit 1;
  fi
  
  # train a RNN language model
  if [ $stage -le 2 ]; then
    local/chime4_train_rnnlms.sh $chime4_data || exit 1;
  fi
  
  # preparation
  dir=exp/tri4a_dnn_tr05_multi_${train}_smbr_lmrescore
  mkdir -p $dir
  # make a symbolic link to graph info
  if [ ! -e $dir/graph_tgpr_5k ]; then
    if [ ! -e exp/tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k ]; then
      echo "graph is missing, execute local/run_dnn.sh, correctly"
      exit 1;
    fi
    pushd . ; cd $dir
    ln -s ../tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k .
    popd
  fi
  
  # rescore lattices by a high-order N-gram
  if [ $stage -le 3 ]; then
    # check the best iteration
    if [ ! -f $srcdir/log/best_wer_$enhan ]; then
      echo "error, execute local/run_dnn.sh, first"
      exit 1;
    fi
    it=`cut -f 1 -d" " $srcdir/log/best_wer_$enhan | awk -F'[_]' '{print $1}'`
    # rescore lattices
    if $eval_flag; then
      tasks="dt05_simu dt05_real et05_simu et05_real"
    else
      tasks="dt05_simu dt05_real"
    fi
    for t in $tasks; do
      steps/lmrescore.sh --mode 3 \
        data/lang_test_tgpr_5k \
        data/lang_test_${lm_suffix} \
        data-fmllr-tri3b/${t}_$enhan \
        $srcdir/decode_tgpr_5k_${t}_${enhan}_it$it \
        $dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix}
    done
    # rescored results by high-order n-gram LM
    mkdir -p $dir/log
    local/chime4_calc_wers.sh $dir ${enhan}_${lm_suffix} $dir/graph_tgpr_5k \
        > $dir/best_wer_${enhan}_${lm_suffix}.result
    head -n 15 $dir/best_wer_${enhan}_${lm_suffix}.result
  fi
  
  # N-best rescoring using a RNNLM
  if [ $stage -le 4 ]; then
    # check the best lmw
    if [ ! -f $dir/log/best_wer_${enhan}_${lm_suffix} ]; then
      echo "error, rescoring with a high-order n-gram seems to be failed"
      exit 1;
    fi
    lmw=`cut -f 1 -d" " $dir/log/best_wer_${enhan}_${lm_suffix} | awk -F'[_]' '{print $NF}'`
    # rescore n-best list for all sets
    if $eval_flag; then
      tasks="dt05_simu dt05_real et05_simu et05_real"
    else
      tasks="dt05_simu dt05_real"
    fi
    for t in $tasks; do
      steps/rnnlmrescore.sh --inv-acwt $lmw --N $nbest --use-phi true \
        $rnnweight \
        data/lang_test_${lm_suffix} \
        data/lang_test_${rnnlm_suffix} \
        data-fmllr-tri3b/${t}_$enhan \
        $dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix} \
        $dir/decode_tgpr_5k_${t}_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}
    done
    # calc wers for RNNLM results
    local/chime4_calc_wers.sh $dir ${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest} $dir/graph_tgpr_5k \
        > $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result
    head -n 15 $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result
  fi