train_mono.sh
5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from ..
# Flat start and monophone training, with delta-delta features.
# This script applies cepstral mean normalization (per speaker).
# Begin configuration section.
nj=4
cmd=run.pl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
num_iters=40 # Number of iterations of training
max_iter_inc=30 # Last iter to increase #Gauss on.
totgauss=1000 # Target #Gaussians.
boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
config= # name of config file.
stage=-4
power=0.25 # exponent to determine number of gaussians from occurrence counts
feat_dim=-1 # This option is now ignored but retained for compatibility.
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/train_mono.sh [options] <data-dir> <lang-dir> <exp-dir>"
echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --feat_dim <dim> # This option is ignored now but"
echo " # retained for back-compatibility."
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
dir=$3
oov_sym=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
example_feats="`echo $feats | sed s/JOB/1/g`";
echo "$0: Initializing monophone system."
[ ! -f $lang/phones/sets.int ] && exit 1;
shared_phones_opt="--shared-phones=$lang/phones/sets.int"
if [ $stage -le -3 ]; then
# Note: JOB=1 just uses the 1st part of the features-- we only need a subset anyway.
feat_dim=`feat-to-dim "$example_feats" - 2>/dev/null`
[ -z "$feat_dim" ] && echo "error getting feature dimension" && exit 1;
$cmd JOB=1 $dir/log/init.log \
gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
$dir/0.mdl $dir/tree || exit 1;
fi
numgauss=`gmm-info --print-args=false $dir/0.mdl | grep gaussians | awk '{print $NF}'`
incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
if [ $stage -le -2 ]; then
echo "$0: Compiling training graphs"
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
"ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
if [ $stage -le -1 ]; then
echo "$0: Aligning data equally (pass 0)"
$cmd JOB=1:$nj $dir/log/align.0.JOB.log \
align-equal-compiled "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" ark,t:- \| \
gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
$dir/0.JOB.acc || exit 1;
fi
# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
# we fail to est "rare" phones and later on, they never align properly.
if [ $stage -le 0 ]; then
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss --power=$power \
$dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl 2> $dir/log/update.0.log || exit 1;
rm $dir/0.*.acc
fi
beam=6 # will change to 10 below after 1st pass
# note: using slightly wider beams for WSJ vs. RM.
x=1
while [ $x -lt $num_iters ]; do
echo "$0: Pass $x"
if [ $stage -le $x ]; then
if echo $realign_iters | grep -w $x >/dev/null; then
echo "$0: Aligning data"
mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
$cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] "$mdl" \
"ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \
|| exit 1;
fi
$cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
gmm-acc-stats-ali $dir/$x.mdl "$feats" "ark:gunzip -c $dir/ali.JOB.gz|" \
$dir/$x.JOB.acc || exit 1;
$cmd $dir/log/update.$x.log \
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power $dir/$x.mdl \
"gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
fi
if [ $x -le $max_iter_inc ]; then
numgauss=$[$numgauss+$incgauss];
fi
beam=10
x=$[$x+1]
done
( cd $dir; rm final.{mdl,occs} 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
utils/summarize_warnings.pl $dir/log
echo Done
# example of showing the alignments:
# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/ali.0.gz|" | head -4