latconvert.sh
3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env bash
# Author : Gaurav Kumar, Johns Hopkins University
# Creates OpenFST lattices from Kaldi lattices
# This script needs to be run from one level above this directory
. ./path.sh
if [ $# -lt 3 ]; then
echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale"
exit 1
fi
prunebeam=2
latdir=$1
decode_dir=$2
acoustic_scale=$3
#latdir="latjosh-2-callhome"
#decode_dir=exp/tri5a/decode_$partition
#acoustic_scale=0.077
stage=0
if [ -d $decode_dir ]
then
# TODO:Add scaling factor for weights, how?
rawLatDir="lattices"
compiledLatDir="lattices-bin"
preplfLatDir="lattices-pushed"
mkdir -p $latdir
mkdir -p $latdir/$rawLatDir
mkdir -p $latdir/$compiledLatDir
mkdir -p $latdir/$preplfLatDir
for l in $decode_dir/lat.*.gz
do
(
# Extract file name and unzip the file first
bname=${l##*/}
bname="$latdir/${bname%.gz}"
gunzip -c $l > "$bname.bin"
if [ $stage -le 0 ]; then
# Now copy into ark format
$KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw"
# Prune lattices
$KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned"
# Convert to an openfst compatible format
$KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst
fi
if [ $stage -le 1 ]; then
fileName=""
fileLine=0
while read line; do
if [ $fileLine = 0 ]; then
fileName="$line"
fileLine=1
continue
fi
if [ -z "$line" ]; then
fileLine=0
continue
fi
# Replace laugh, unk, oov, noise with eps
echo "$line" | awk '{if ($3 == 2038 || $3 == 2039 || $3 == 2040) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat"
done < $bname.ark.fst
echo "Done isolating lattices"
fi
) &
done
wait
rm $latdir/*.bin
rm $latdir/*.pruned
if [ $stage -le 2 ]; then
#Compile lattices
for l in $latdir/$rawLatDir/*.lat
do
(
# Arc type needs to be log
bname=${l##*/}
fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname
) &
done
wait
echo "Done compiling lattices."
fi
if [ $stage -le 3 ]; then
#Sanjeev's Recipe for creating valid PLF compatible FSTs"
# Create a dummy FST with one state and no arcs first
echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst
# Push Lattice weights towards initial state
for l in $latdir/$compiledLatDir/*.lat
do
(
bname=${l##*/}
fstrmepsilon $latdir/$compiledLatDir/$bname | \
fstpush --push_weights --remove_total_weight - | \
# Do not topo sort here, do it before converting into PLF
# Sanjeev's Recipe : Concatenate with dummy FST
fstconcat - $latdir/$preplfLatDir/dummy.fst | \
fstreverse - | \
fstrmepsilon - | \
fstreverse - $latdir/$preplfLatDir/$bname
) &
done
wait
# Let's take a moment to thank the dummy FST for playing its
# part in this process. However, it has to go now.
rm $latdir/$preplfLatDir/dummy.fst
echo "Done performing fst push (initial state)"
fi
else
echo "Complete training and decoding first"
fi