run_chain_common.sh
2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
# this script has common stages shared across librispeech chain recipes.
# It generates a new topology in a new lang directory, gets the alignments as
# lattices, and builds a tree for the new topology
set -e
stage=11
# input directory names. These options are actually compulsory, and they have
# been named for convenience
gmm_dir=
ali_dir=
lores_train_data_dir=
num_leaves=6000
# output directory names. They are also compulsory.
lang=
lat_dir=
tree_dir=
# End configuration section.
echo "$0 $@" # Print the command line for logging
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
done
if [ $stage -le 11 ]; then
echo "$0: creating lang directory with one state per phone."
# Create a version of the lang/ directory that has one state per phone in the
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d $lang ]; then
if [ $lang/L.fst -nt data/lang/L.fst ]; then
echo "$0: $lang already exists, not overwriting it; continuing"
else
echo "$0: $lang already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
fi
fi
if [ $stage -le 12 ]; then
# Get the alignments as lattices (gives the chain training more freedom).
# use the same num-jobs as the alignments
nj=$(cat ${ali_dir}/num_jobs) || exit 1;
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
$lang $gmm_dir $lat_dir
rm $lat_dir/fsts.*.gz # save space
fi
if [ $stage -le 13 ]; then
# Build a tree using our new topology. We know we have alignments for the
# speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
# those.
if [ -f $tree_dir/final.mdl ]; then
echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
exit 1;
fi
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
--context-opts "--context-width=2 --central-position=1" \
--cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
fi
exit 0;