run.sh
4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/bin/bash
# Change this location to somewhere where you want to put the data.
data=$HOME/vystadial_cz
# Load training parameters
. ./env_voip_cs.sh
. ./cmd.sh
. ./path.sh
stage=0
. utils/parse_options.sh
set -euo pipefail
mkdir -p $data
if [ $stage -le 0 ]; then
local/download_cs_data.sh $data || exit 1;
fi
lm="build3"
if [ $stage -le 1 ]; then
local/data_split.sh --every_n 1 $data data "$lm" "dev test"
local/create_LMs.sh data/local data/train/trans.txt \
data/test/trans.txt data/local/lm "$lm"
gzip data/local/lm/$lm
local/prepare_cs_transcription.sh data/local data/local/dict
local/create_phone_lists.sh data/local/dict
utils/prepare_lang.sh data/local/dict '_SIL_' data/local/lang data/lang
utils/format_lm.sh data/lang data/local/lm/$lm.gz data/local/dict/lexicon.txt data/lang_test
for part in dev test train; do
mv data/$part/trans.txt data/$part/text
done
fi
if [ $stage -le 2 ]; then
mfccdir=mfcc
for part in dev train; do
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$part exp/make_mfcc/$part $mfccdir
steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
done
# Get the shortest 10000 utterances first because those are more likely
# to have accurate alignments.
utils/subset_data_dir.sh --shortest data/train 10000 data/train_10kshort
fi
# train a monophone system
if [ $stage -le 3 ]; then
steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
data/train_10kshort data/lang exp/mono
(
utils/mkgraph.sh data/lang_test \
exp/mono exp/mono/graph
for test in dev; do
steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono/graph \
data/$test exp/mono/decode_$test
done
)&
steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/mono exp/mono_ali_train
fi
# train a first delta + delta-delta triphone system on all utterances
if [ $stage -le 4 ]; then
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
2000 10000 data/train data/lang exp/mono_ali_train exp/tri1
# decode using the tri1 model
(
utils/mkgraph.sh data/lang_test \
exp/tri1 exp/tri1/graph
for test in dev; do
steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri1/graph \
data/$test exp/tri1/decode_$test
done
)&
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/tri1 exp/tri1_ali_train
fi
# train an LDA+MLLT system.
if [ $stage -le 5 ]; then
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" 2500 15000 \
data/train data/lang exp/tri1_ali_train exp/tri2b
# decode using the LDA+MLLT model
(
utils/mkgraph.sh data/lang_test \
exp/tri2b exp/tri2b/graph
for test in dev; do
steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph \
data/$test exp/tri2b/decode_$test
done
)&
# Align utts using the tri2b model
steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
data/train data/lang exp/tri2b exp/tri2b_ali_train
fi
# Train tri3b, which is LDA+MLLT+SAT
if [ $stage -le 6 ]; then
steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
data/train data/lang exp/tri2b_ali_train exp/tri3b
# decode using the tri3b model
(
utils/mkgraph.sh data/lang_test \
exp/tri3b exp/tri3b/graph
for test in dev; do
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph data/$test \
exp/tri3b/decode_$test
done
)&
fi
# Now we compute the pronunciation and silence probabilities from training data,
# and re-create the lang directory.
if [ $stage -le 7 ]; then
steps/get_prons.sh --cmd "$train_cmd" \
data/train data/lang exp/tri3b
utils/dict_dir_add_pronprobs.sh --max-normalize true \
data/local/dict \
exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \
exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict_sp
utils/prepare_lang.sh data/local/dict_sp "_SIL_" data/local/lang_tmp data/lang_sp
utils/format_lm.sh data/lang_sp data/local/lm/$lm.gz data/local/dict_sp/lexicon.txt data/lang_sp_test
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang_sp exp/tri3b exp/tri3b_ali_train_sp
fi
if [ $stage -le 8 ]; then
# Test the tri3b system with the silprobs and pron-probs.
# decode using the tri3b model
utils/mkgraph.sh data/lang_sp_test \
exp/tri3b exp/tri3b/graph_sp
for test in dev; do
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph_sp data/$test \
exp/tri3b/decode_sp_$test
done
fi
# Train a chain model
if [ $stage -le 9 ]; then
local/chain/run_tdnn.sh --stage 0
fi
# Don't finish until all background decoding jobs are finished.
wait