Blame view
Scripts/utils/reverse_lm_test.sh
4.05 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#!/bin/bash # Copyright 2012 Brno University of Technology (Author: Mirko Hannemann) # Apache 2.0 # configuration section utterances=4 maxlen=30 nbest=10 # end config section echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; if [ $# != 2 ]; then echo "Usage: utils/reverse_lm_test.sh [options] <fwd-lm-dir> <bwd-lm-dir>" echo "example: utils/reverse_lm_test.sh data/lang_test_tgpr_5k data/lang_test_tgpr_5k.reverse" echo "options:" echo " --utterances <int> number of random test utterances" echo " --maxlen <int> max number of arcs (words) in utterance" echo " --nbest <int> compare n best paths" exit 1; fi test_fwd=$1 test_bwd=$2 nb=`echo $nbest | awk '{print $1-1;}'` # For each language model the corresponding FST in lang_test_* directory. echo "compare LM scores using "$test_fwd/G.fst" and "$test_bwd/G.fst for utt in `seq 1 $utterances` do # generate random sentence with forward language model len=1000 # big number while [ $len -gt $maxlen ] do fstrandgen --npath=1 $test_fwd/G.fst | fstprint --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt > sent$utt len=`cat sent$utt | wc -l` done cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{printf "utterance:"; for(i=1;i<=length(a);i++) {printf " %s",a[i];} printf " ";}' # get n best paths with forward language model cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[i];} print length(a);}' > sent$utt.forward fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.forward > sent$utt.forward.fst fstcompose $test_fwd/G.fst sent$utt.forward.fst > sent$utt.composed.forward.fst fstshortestpath --nshortest=$nbest sent$utt.composed.forward.fst | fstprint > sent$utt.composed.forward.n rm sent$utt.forward.scores 2>/dev/null for n in `seq 0 $nb` do # select path with rank n cat sent$utt.composed.forward.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.forward.$n.fst fstprint sent$utt.composed.forward.$n.fst > sent$utt.composed.forward.$n # compute shortest distance to final states fstshortestdistance sent$utt.composed.forward.$n.fst | \ awk -v list=sent$utt.composed.forward.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \ { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \ >> sent$utt.forward.scores done # get n best paths with reverse language model cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[length(a)-i+1];} print length(a);}' > sent$utt.reverse fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.reverse > sent$utt.reverse.fst fstcompose $test_bwd/G.fst sent$utt.reverse.fst > sent$utt.composed.reverse.fst fstshortestpath --nshortest=$nbest sent$utt.composed.reverse.fst | fstprint > sent$utt.composed.reverse.n rm sent$utt.reverse.scores 2>/dev/null for n in `seq 0 $nb` do # select path with rank n cat sent$utt.composed.reverse.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.reverse.$n.fst fstprint sent$utt.composed.reverse.$n.fst > sent$utt.composed.reverse.$n # compute shortest distance to final states fstshortestdistance sent$utt.composed.reverse.$n.fst | \ awk -v list=sent$utt.composed.reverse.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \ { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \ >> sent$utt.reverse.scores done # present results paste sent$utt.forward.scores sent$utt.reverse.scores | \ awk '{diff=$1-$2; if ( (diff<0?-diff:diff) > 0.001 ) print NR,$1,$2,"!!!"; else print NR,$1,$2;}' # clean up rm sent$utt rm sent$utt.* done |