Blame view
egs/chime4/s5_1ch/local/run_lmrescore.sh
4.13 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/bin/bash # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer) # Inria (Emmanuel Vincent) # Mitsubishi Electric Research Labs (Shinji Watanabe) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori) nj=12 stage=1 order=5 hidden=300 rnnweight=0.5 nbest=100 train=noisy eval_flag=true # make it true when the evaluation data are released . utils/parse_options.sh || exit 1; . ./path.sh . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. if [ $# -ne 2 ]; then printf " USAGE: %s <Chime4 root directory> <enhancement method> " `basename $0` echo "First argument specifies a root directory of Chime4 data" echo "Second argument specifies a unique name for different enhancement method" exit 1; fi # set language models lm_suffix=${order}gkn_5k rnnlm_suffix=rnnlm_5k_h${hidden} # data root chime4_data=$1 # enhan data enhan=$2 # check data if [ ! -d $chime4_data ]; then echo "$chime4_data does not exist. Please specify chime4 data root correctly" && exit 1 fi # check whether run_dnn is executed srcdir=exp/tri4a_dnn_tr05_multi_${train}_smbr_i1lats if [ ! -d $srcdir ]; then echo "error, execute local/run_dnn.sh, first" exit 1; fi # train a high-order n-gram language model if [ $stage -le 1 ]; then local/chime4_train_lms.sh $chime4_data || exit 1; fi # train a RNN language model if [ $stage -le 2 ]; then local/chime4_train_rnnlms.sh $chime4_data || exit 1; fi # preparation dir=exp/tri4a_dnn_tr05_multi_${train}_smbr_lmrescore mkdir -p $dir # make a symbolic link to graph info if [ ! -e $dir/graph_tgpr_5k ]; then if [ ! -e exp/tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k ]; then echo "graph is missing, execute local/run_dnn.sh, correctly" exit 1; fi pushd . ; cd $dir ln -s ../tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k . popd fi # rescore lattices by a high-order N-gram if [ $stage -le 3 ]; then # check the best iteration if [ ! -f $srcdir/log/best_wer_$enhan ]; then echo "error, execute local/run_dnn.sh, first" exit 1; fi it=`cut -f 1 -d" " $srcdir/log/best_wer_$enhan | awk -F'[_]' '{print $1}'` # rescore lattices if $eval_flag; then tasks="dt05_simu dt05_real et05_simu et05_real" else tasks="dt05_simu dt05_real" fi for t in $tasks; do steps/lmrescore.sh --mode 3 \ data/lang_test_tgpr_5k \ data/lang_test_${lm_suffix} \ data-fmllr-tri3b/${t}_$enhan \ $srcdir/decode_tgpr_5k_${t}_${enhan}_it$it \ $dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix} done # rescored results by high-order n-gram LM mkdir -p $dir/log local/chime4_calc_wers.sh $dir ${enhan}_${lm_suffix} $dir/graph_tgpr_5k \ > $dir/best_wer_${enhan}_${lm_suffix}.result head -n 15 $dir/best_wer_${enhan}_${lm_suffix}.result fi # N-best rescoring using a RNNLM if [ $stage -le 4 ]; then # check the best lmw if [ ! -f $dir/log/best_wer_${enhan}_${lm_suffix} ]; then echo "error, rescoring with a high-order n-gram seems to be failed" exit 1; fi lmw=`cut -f 1 -d" " $dir/log/best_wer_${enhan}_${lm_suffix} | awk -F'[_]' '{print $NF}'` # rescore n-best list for all sets if $eval_flag; then tasks="dt05_simu dt05_real et05_simu et05_real" else tasks="dt05_simu dt05_real" fi for t in $tasks; do steps/rnnlmrescore.sh --inv-acwt $lmw --N $nbest --use-phi true \ $rnnweight \ data/lang_test_${lm_suffix} \ data/lang_test_${rnnlm_suffix} \ data-fmllr-tri3b/${t}_$enhan \ $dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix} \ $dir/decode_tgpr_5k_${t}_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest} done # calc wers for RNNLM results local/chime4_calc_wers.sh $dir ${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest} $dir/graph_tgpr_5k \ > $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result head -n 15 $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result fi |