run_gmm.sh
3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/bin/bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
# 2018 Emotech LTD (Author: Xuechen LIU)
# Apache 2.0
set -e
# number of jobs
nj=20
stage=1
. ./cmd.sh
[ -f ./path.sh ] && . ./path.sh;
. ./utils/parse_options.sh
# nj for dev and test
dev_nj=$(wc -l data/dev/spk2utt | awk '{print $1}' || exit 1;)
test_nj=$(wc -l data/test/spk2utt | awk '{print $1}' || exit 1;)
# Now make MFCC features.
if [ $stage -le 1 ]; then
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
for x in train dev test; do
steps/make_mfcc_pitch.sh --pitch-config conf/pitch.conf --cmd "$train_cmd" --nj $nj \
data/$x exp/make_mfcc/$x mfcc || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1;
utils/fix_data_dir.sh data/$x || exit 1;
done
# subset the training data for fast startup
for x in 100 300; do
utils/subset_data_dir.sh data/train ${x}000 data/train_${x}k
done
fi
# mono
if [ $stage -le 2 ]; then
# training
steps/train_mono.sh --cmd "$train_cmd" --nj $nj \
data/train_100k data/lang exp/mono || exit 1;
# decoding
utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph || exit 1;
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
exp/mono/graph data/dev exp/mono/decode_dev
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
exp/mono/graph data/test exp/mono/decode_test
# alignment
steps/align_si.sh --cmd "$train_cmd" --nj $nj \
data/train_300k data/lang exp/mono exp/mono_ali || exit 1;
fi
# tri1
if [ $stage -le 3 ]; then
# training
steps/train_deltas.sh --cmd "$train_cmd" \
4000 32000 data/train_300k data/lang exp/mono_ali exp/tri1 || exit 1;
# decoding
utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1;
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
exp/tri1/graph data/dev exp/tri1/decode_dev
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
exp/tri1/graph data/test exp/tri1/decode_test
# alignment
steps/align_si.sh --cmd "$train_cmd" --nj $nj \
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
fi
# tri2
if [ $stage -le 4 ]; then
# training
steps/train_deltas.sh --cmd "$train_cmd" \
7000 56000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1;
# decoding
utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
exp/tri2/graph data/dev exp/tri2/decode_dev
steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
exp/tri2/graph data/test exp/tri2/decode_test
# alignment
steps/align_si.sh --cmd "$train_cmd" --nj $nj \
data/train data/lang exp/tri2 exp/tri2_ali || exit 1;
fi
# tri3
if [ $stage -le 5 ]; then
# training [LDA+MLLT]
steps/train_lda_mllt.sh --cmd "$train_cmd" \
10000 80000 data/train data/lang exp/tri2_ali exp/tri3 || exit 1;
# decoding
utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1;
steps/decode.sh --cmd "$decode_cmd" --nj ${dev_nj} --config conf/decode.conf \
exp/tri3/graph data/dev exp/tri3/decode_dev
steps/decode.sh --cmd "$decode_cmd" --nj ${test_nj} --config conf/decode.conf \
exp/tri3/graph data/test exp/tri3/decode_test
# alignment
steps/align_si.sh --cmd "$train_cmd" --nj $nj \
data/train data/lang exp/tri3 exp/tri3_ali || exit 1;
steps/align_si.sh --cmd "$train_cmd" --nj ${nj} \
data/dev data/lang exp/tri3 exp/tri3_ali_dev || exit 1;
fi
echo "local/run_gmm.sh succeeded"
exit 0;