run.sh
5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/bin/bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a
# This is a shell script, but it's recommended that you run the commands one by
# one by copying and pasting into the shell.
# Download the corpus and prepare parallel lists of sound files and text files
# Divide the corpus into train, dev and test sets
local/sprak_data_prep.sh || exit 1;
utils/fix_data_dir.sh data/train || exit 1;
# Perform text normalisation, prepare dict folder and LM data transcriptions
local/copy_dict || exit 1;
utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang_tmp data/lang || exit 1;
# Now make MFCC features.
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcctest
# Extract mfccs
# p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some
# wave files are corrupt
# Will return a warning message because of the corrupt audio files, but compute them anyway
# If this step fails and prints a partial diff, rerun from sprak_data_prep.sh
steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/test exp/make_mfcc/test test mfcc || exit 1;
steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/train exp/make_mfcc/train mfcc || exit 1;
# Compute cepstral mean and variance normalisation
steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test mfcc || exit 1;
steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train mfcc || exit 1;
# Repair data set (remove corrupt data points with corrupt audio)
utils/fix_data_dir.sh data/test || exit 1;
utils/fix_data_dir.sh data/train || exit 1;
# Train LM with irstlm
#creates 3g or 4g dictionary and importantly G.fst
#local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "3g" data/lang data/local/train3_lm &> data/local/3g.log &
local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "4g" data/lang data/local/train4_lm &> data/local/4g.log || exit 1;
#speed test only 120 utterances per speaker
utils/subset_data_dir.sh --per-spk data/test 120 data/test120_p_spk || exit 1;
# Train monophone model on short utterances AFTER THIS ONE CAN SEE THE ALIGNMNT BETWEEN FRAMES AND PHONES USING COMMAND SHOW_ALIGNMENTS
steps/train_mono.sh --nj 10 --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;
# Ensure that LMs are created
utils/mkgraph.sh data/lang_test_4g exp/mono exp/mono/graph_4g || exit 1;
# Ensure that all graphs are constructed
steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \
exp/mono/graph_4g data/test120_p_spk exp/mono/decode || exit 1;
# Get alignments from monophone system.
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/mono exp/mono_ali || exit 1;
# train tri1 [first triphone pass]
# steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
steps/train_deltas.sh --cmd "$train_cmd" \
5800 96000 data/train data/lang exp/mono_ali exp/tri1|| exit 1;
#make graph
utils/mkgraph.sh data/lang_test_4g exp/tri1 exp/tri1/graph_4g || exit 1;
steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \
exp/tri1/graph_4g data/test120_p_spk exp/tri1/decode_test120_p_spk || exit 1;
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
# Train tri2a, which is deltas + delta-deltas.
steps/train_deltas.sh --cmd "$train_cmd" \
7500 125000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
utils/mkgraph.sh data/lang_test_4g exp/tri2a exp/tri2a/graph_4g || exit 1;
steps/decode.sh --nj 10 --cmd "$decode_cmd" \
exp/tri2a/graph_4g data/test120_p_spk exp/tri2a/decode_test120_p_spk|| exit 1;
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=5 --right-context=5" \
7500 125000 data/train data/lang exp/tri1_ali exp/tri2b || exit 1;
utils/mkgraph.sh data/lang_test_4g exp/tri2b exp/tri2b/graph_4g || exit 1;
steps/decode.sh --nj 10 --cmd "$decode_cmd" \
exp/tri2b/graph_4g data/test120_p_spk exp/tri2b/decode_test120_p_spk || exit 1;
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
--use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1;
# From 2b system, train 3b which is LDA + MLLT + SAT.
steps/train_sat.sh --cmd "$train_cmd" \
7500 125000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1;
# Trying 4-gram language model
utils/mkgraph.sh data/lang_test_4g exp/tri3b exp/tri3b/graph_4g || exit 1;
steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
exp/tri3b/graph_4g data/test120_p_spk exp/tri3b/decode_test120_p_spk || exit 1;
# This is commented out for now as it's not important for the main recipe.
## Train RNN for reranking
#local/sprak_train_rnnlms.sh data/local/dict data/dev/transcripts.uniq data/local/rnnlms/g_c380_d1k_h100_v130k
## Consumes a lot of memory! Do not run in parallel
#local/sprak_run_rnnlms_tri3b.sh data/lang_test_3g data/local/rnnlms/g_c380_d1k_h100_v130k data/test1k exp/tri3b/decode_3g_test1k
# From 3b system
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/tri3b exp/tri3b_ali || exit 1;
# From 3b system, train another SAT system (tri4a) with all the si284 data.
steps/train_sat.sh --cmd "$train_cmd" \
13000 300000 data/train data/lang exp/tri3b_ali exp/tri4a || exit 1;
utils/mkgraph.sh data/lang_test_4g exp/tri4a exp/tri4a/graph_4g || exit 1;
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri4a/graph_4g data/test120_p_spk exp/tri4a/decode_test120_p_spk || exit 1;
# alignment used to train nnets
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
data/train data/lang exp/tri4a exp/tri4a_ali || exit 1;
## Works
local/sprak_run_nnet_cpu.sh 4g test120_p_spk || exit 1;
# Getting results [see RESULTS file]
for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done