Blame view
egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
3.45 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
#!/bin/bash # Copyright 2015 Guoguo Chen # 2017 Hainan Xu # Apache 2.0 # This script rescores lattices with RNNLM. See also rnnlmrescore.sh which is # an older script using n-best lists. # Begin configuration section. cmd=run.pl skip_scoring=false max_ngram_order=4 acwt=0.1 weight=0.5 # Interpolation weight for RNNLM. rnnlm_ver= # End configuration section. echo "$0 $@" # Print the command line for logging . ./utils/parse_options.sh if [ $# != 5 ]; then echo "Does language model rescoring of lattices (remove old LM, add new LM)" echo "with RNNLM." echo "" echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\" echo " <data-dir> <input-decode-dir> <output-decode-dir>" echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\" echo " exp/tri3/test_tg exp/tri3/test_rnnlm" echo "options: [--cmd (run.pl|queue.pl [queue opts])]" exit 1; fi [ -f path.sh ] && . ./path.sh; oldlang=$1 rnnlm_dir=$2 data=$3 indir=$4 outdir=$5 rescoring_binary=lattice-lmrescore-rnnlm first_arg=ark:$rnnlm_dir/unk.probs # this is for mikolov's rnnlm extra_arg= if [ "$rnnlm_ver" == "cuedrnnlm" ]; then layer_string=`cat $rnnlm_dir/layer_string | sed "s=:= =g"` total_size=`wc -l $rnnlm_dir/unigram.counts | awk '{print $1}'` rescoring_binary="lattice-lmrescore-cuedrnnlm" cat $rnnlm_dir/rnnlm.input.wlist.index | tail -n +2 | awk '{print $1-1,$2}' > $rnnlm_dir/rnn.wlist extra_arg="--full-voc-size=$total_size --layer-sizes=\"$layer_string\"" first_arg=$rnnlm_dir/rnn.wlist fi oldlm=$oldlang/G.fst if [ -f $oldlang/G.carpa ]; then oldlm=$oldlang/G.carpa fi [ ! -f $oldlm ] && echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" && exit 1; [ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1; [ ! -f $rnnlm_dir/unk.probs ] &&\ echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1; [ ! -f $oldlang/words.txt ] &&\ echo "$0: Missing file $oldlang/words.txt" && exit 1; ! ls $indir/lat.*.gz >/dev/null &&\ echo "$0: No lattices input directory $indir" && exit 1; awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ || exit 1; oldlm_command="fstproject --project_output=true $oldlm |" mkdir -p $outdir/log nj=`cat $indir/num_jobs` || exit 1; cp $indir/num_jobs $outdir oldlm_weight=`perl -e "print -1.0 * $weight;"` if [ "$oldlm" == "$oldlang/G.fst" ]; then $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore --lm-scale=$oldlm_weight \ "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \ $rescoring_binary $extra_arg --lm-scale=$weight \ --max-ngram-order=$max_ngram_order \ $first_arg $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; else $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \ "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:- \| \ $rescoring_binary $extra_arg --lm-scale=$weight \ --max-ngram-order=$max_ngram_order \ $first_arg $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; fi if ! $skip_scoring ; then err_msg="Not scoring because local/score.sh does not exist or not executable." [ ! -x local/score.sh ] && echo $err_msg && exit 1; local/score.sh --cmd "$cmd" $data $oldlang $outdir else echo "$0: Not scoring because --skip-scoring was specified." fi exit 0; |