get_nbest.sh
1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env bash
# Author : Gaurav Kumar, Johns Hopkins University
# Creates n-best lists from Kaldi lattices
# This script needs to be run from one level above this directory
. ./path.sh
if [ $# -lt 3 ]; then
echo "Enter the latdir (where the n-best will be put), the decode dir containing lattices and the acoustic scale"
exit 1
fi
noNBest=100
maxProcesses=10
latdir=$1
decode_dir=$2
acoustic_scale=$3
partition=$4
symTable=exp/tri5a/graph/words.txt
scriptSymTable=data/local/dict/lexicon_script.txt
stage=0
if [ -d $decode_dir ]
then
allNBest=$latdir/$partition.all.nbest
runningProcesses=0
for l in $decode_dir/lat.*.gz
do
(
# Extract file name and unzip the file first
bname=${l##*/}
bname="$latdir/$partition.${bname%.gz}"
gunzip -c $l > "$bname.bin"
if [ $stage -le 0 ]; then
# Extract n-best from the lattices
lattice-to-nbest --acoustic-scale=$acoustic_scale --n=$noNBest \
ark:$bname.bin ark:$bname.nbest
#Convert the n-best lattice to linear word based sentences
nbest-to-linear ark,t:$bname.nbest ark,t:$bname.ali ark,t:$bname.words \
ark,t:$bname.lmscore ark,t:$bname.acscore
#Convert the int to word for each sentence
cat $bname.words | utils/int2sym.pl -f 2- \
$symTable >> $allNBest.roman
#Use the script version on the symbol table too
cat $bname.words | utils/int2sym.pl -f 2- \
$scriptSymTable >> $allNBest.script
fi
echo "Done getting n-best"
) &
runningProcesses=$((runningProcesses+1))
echo "#### Processes running = " $runningProcesses " ####"
if [ $runningProcesses -eq $maxProcesses ]; then
echo "#### Waiting for slot ####"
wait
runningProcesses=0
echo "#### Done waiting ####"
fi
done
wait
fi