run_sgmm_multiling.sh
5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
# Multilingual setup for SGMMs.
# Caution: this is just a stub, intended to show some others what to do, it
# is not functional yet.
# We treat the WSJ setup as the "other language"-- in fact it's the same language,
# of course, but we treat the phones there as a distinct set.
# The only important thing is that the WSJ data has the same sample rate as the
# RM data.
# add the prefix to all the words and phones:
mkdir -p data_ml exp_ml # ml stands for "multilingual"
utils/add_lang_prefix.sh data/lang rm: data_ml/lang_rm
utils/add_lang_prefix.sh ../../wsj/s5/data/lang wsj: data_ml/lang_wsj
# add the prefix to all the words, utterance-ids, and speaker-ids.
utils/add_data_prefix.sh data/train rm: data_ml/train_rm
utils/add_data_prefix.sh ../../wsj/s5/data/train_si284 wsj: data_ml/train_si284_wsj
# Merge the "lang" directories. This will change the phones.txt and words.txt,
# to incorporate all the symbols in the original setups.
utils/merge_lang.sh data_ml/lang_rm data_ml/lang_wsj data_ml/lang_rm_wsj
utils/combine_data.sh data_ml/train_rm data_ml/train_si284_wsj data_ml/train
# the call to utils/convert_models.sh below will
# convert the RM LDA+MLLT system to use the new "lang" directory.
# This script converts the models in the directory to use the new integer values
# for the phones, as in data/lang_rm_wsj.
# Everything else will be copied. The only thing changed in the models is
# the transition-ids. We'll need a program call like
# gmm-convert <phone-map-file> <model-in> <model-out>
# where each line of phone-map-file has two lines, (phone-in phone-out).
# This will just affect the transition model, by mapping all the phone-ids.
# We'll also need a program
# convert-tree <phone-map-file> <tree-in> <tree-out>
utils/convert_models.sh exp/tri2b data_ml/lang_rm exp_ml/tri2b_rm data_ml/lang_rm_wsj
utils/convert_models.sh ../../wsj/exp/tri4b data_ml/lang_wsj exp_ml/tri4b_wsj data_ml/lang
# Re-do the alignment of the RM tri2b setup with the converted models
# (this avoids the hassle of converting the alignment.)
steps/align_si.sh --nj 8 --cmd "$train_cmd" data_ml/train_rm data_ml/lang exp_ml/tri2b_rm \
exp_ml/tri2b_rm_ali || exit 1;
# Now, starting from those alignments train an RM system with the same LDA+MLLT
# matrix as the WSJ system. The training script takes this from the alignment directory,
# so it's sufficient to put it there:
cp exp_ml/tri4b_wsj/final.mat exp_ml/tri2b_rm_ali/final.mat
steps/train_sat.sh 1800 9000 data/train data/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm_ali || exit 1;
# Train an LDA+MLLT+SAT system for RM that uses the same LDA+MLLT transforms as for WSJ.
steps/train_sat.sh 1800 9000 data_ml/train_rm data_ml/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm || exit 1;
# Now merge the RM and WSJ models. This will create trees and transition-models
# that handle the two (disjoint) sets of phones that the RM and WSJ models
# contain. We'll need a program "merge-tree" and a program "gmm-merge". The
# "merge-tree" program will need, for each tree, a record of which sets of
# phones it was supposed to handle, since this is not recorded in the tree
# itself-- we can get this from the transition models which do record this.
# probably the "merge-tree" program will have usage:
# merge-tree <tree1> <phone-set-1> <tree2> <phone-set-2> ... <tree-out>
# where the phone-set-n's will probably be filenames that contain lists of
# the phones.
# The "gmm-merge" program will have the usage:
# gmm-merge <model1> <model2> ... <model-out>
steps/merge_models.sh data_ml/tri3b_rm data_ml/tri4b_wsj data_ml/tri4b
steps/align_fmllr.sh --nj 32 --cmd "$train_cmd" data_ml/train data_ml/lang exp_ml/tri4b \
exp_ml/tri4b_ali || exit 1;
steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 600 \
data_ml/train data_ml/lang exp_ml/tri4b_ali exp_ml/ubm5a || exit 1;
# Use slightly larger SGMM parameters than the WSJ setup.
steps/train_sgmm2.sh --cmd "$train_cmd" \
15000 30000 data_ml/train data_ml/lang exp_ml/tri4b_ali \
exp_ml/ubm5a/final.ubm exp_ml/sgmm2_5a || exit 1;
# This convert_models.sh script will also have the effect of subsetting
# the model, because some of the phones are undefined in the destination.
# We should make sure that the programs "gmm-convert" and "convert-tree"
# accept a phone map that does not map all of the phones we have-- it would
# delete those phones. The --reduce option to the script would be passed
# into those programs, and confirm to them that that's "really" what we want
# to do.
utils/convert_models.sh --reduce true exp_ml/sgmm2_5a data_ml/lang exp/sgmm2_5c_ml data/lang
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_tgpr
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm2_5c_ml/graph_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
exp/sgmm2_5c_ml/graph_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_tgpr_eval92
utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_bd_tgpr || exit 1;
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/sgmm2_5c_ml/graph_bd_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_bd_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/sgmm2_5c_ml/graph_bd_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_bd_tgpr_eval92
) &