run.sh
2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash
. ./path.sh || exit 1
. ./cmd.sh || exit 1
########
# Config
########
train_cmd="utils/run.pl"
decode_cmd="utils/run.pl"
CORPUS_DIR="CorpusDimex100"
N_HMM=2000 # leaves
N_GAUSSIANS=11000
#################
# Download corpus
#################
echo
echo Downloading corpus
echo
if [ ! -d "$CORPUS_DIR" ]; then
wget http://turing.iimas.unam.mx/~luis/DIME/DIMEx100/DVD/DVDCorpusDimex100.zip || exit 1;
unzip DVDCorpusDimex100.zip || exit 1;
fi
##################
# Data preparation
##################
echo
echo Data preparation
echo
rm -rf data exp mfcc
local/data_prep.sh "$CORPUS_DIR"
utils/fix_data_dir.sh "data/train"
utils/fix_data_dir.sh "data/test"
#####################
# Features generation
#####################
echo
echo Features generation
echo
steps/make_mfcc.sh --cmd "$train_cmd" "data/train" "exp/make_mfcc/train" mfcc
steps/make_mfcc.sh --cmd "$train_cmd" "data/test" "exp/make_mfcc/test" mfcc
steps/compute_cmvn_stats.sh "data/train" "exp/make_mfcc/train" mfcc
steps/compute_cmvn_stats.sh "data/test" "exp/make_mfcc/test" mfcc
utils/validate_data_dir.sh "data/train"
utils/validate_data_dir.sh "data/test"
#######################
# Lang data preparation
#######################
echo
echo Language data preparation
echo
rm -rf data/local/dict
local/lang_prep.sh "$CORPUS_DIR"
utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
utils/fix_data_dir.sh "data/train"
utils/fix_data_dir.sh "data/test"
############################
# Language model preparation
############################
echo
echo Language model preparation
echo
local/lm_prep.sh
#######################
# Training and Decoding
#######################
echo
echo Training
echo
# utils/subset_data_dir.sh --first data/train 500 data/train_500
# Training and aligning
steps/train_mono.sh --cmd "$train_cmd" data/train data/lang exp/mono || exit 1
steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_aligned || exit 1
steps/train_deltas.sh "$N_HMM" "$N_GAUSSIANS" data/train data/lang exp/mono_aligned exp/tri1 || exit 1
steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_aligned || exit 1
# train tri2b [LDA+MLLT]
steps/train_lda_mllt.sh --cmd "$train_cmd" "$N_HMM" "$N_GAUSSIANS" data/train data/lang exp/tri1_aligned exp/tri2b || exit 1;
utils/mkgraph.sh data/lang exp/tri2b exp/tri2b/graph
steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/tri2b exp/tri2b_aligned || exit 1
# Do MMI on top of LDA+MLLT.
steps/make_denlats.sh --cmd "$train_cmd" data/train data/lang exp/tri2b exp/tri2b_denlats || exit 1;
steps/train_mmi.sh --boost 0.05 data/train data/lang exp/tri2b_aligned exp/tri2b_denlats exp/tri2b_mmi_b0.05 || exit 1;
# Decoding
echo
echo Decoding
echo
steps/decode.sh --config conf/decode.config --cmd "$decode_cmd" exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_test
for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done