word_align_lattices.sh
1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012
# Apache 2.0.
# Begin configuration section.
silence_label=0
cmd=run.pl
# End configuration section.
echo "$0 $@" # Print the command line for logging
for x in `seq 2`; do
[ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
[ "$1" == "--cmd" ] && cmd="$2" && shift 2;
done
if [ $# != 3 ]; then
echo "Word-align lattices (make the arcs sync up with words)"
echo ""
echo "Usage: $0 [options] <lang-dir> <decode-dir-in> <decode-dir-out>"
echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label <integer-id-of-silence-word>]"
exit 1;
fi
. ./path.sh || exit 1;
lang=$1
indir=$2
outdir=$3
mdl=`dirname $indir`/final.mdl
wbfile=$lang/phones/word_boundary.int
for f in $mdl $wbfile $indir/num_jobs; do
[ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
done
mkdir -p $outdir/log
cp $indir/num_jobs $outdir;
nj=`cat $indir/num_jobs`
$cmd JOB=1:$nj $outdir/log/align.JOB.log \
lattice-align-words --silence-label=$silence_label --test=true \
$wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;