run_dnn.sh
4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# 2014 Guoguo Chen
# Apache 2.0
# This example script trains a DNN on top of fMLLR features.
# The training is done in 3 stages,
#
# 1) RBM pre-training:
# in this unsupervised stage we train stack of RBMs,
# a good starting point for frame cross-entropy trainig.
# 2) frame cross-entropy training:
# the objective is to classify frames to correct pdfs.
# 3) sequence-training optimizing sMBR:
# the objective is to emphasize state-sequences with better
# frame accuracy w.r.t. reference alignment.
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
# Config:
gmmdir=exp/tri5a
data_fmllr=data-fmllr-tri5a
stage=0 # resume training with --stage=N
# End of config.
. utils/parse_options.sh || exit 1;
#
if [ $stage -le 0 ]; then
# Store fMLLR features, so we can train on them easily,
# dev
dir=$data_fmllr/dev
steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode \
$dir data/dev $gmmdir $dir/log $dir/data || exit 1
# train
dir=$data_fmllr/train
steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/train $gmmdir $dir/log $dir/data || exit 1
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 || exit 1
fi
if [ $stage -le 1 ]; then
# Pre-train DBN, i.e. a stack of RBMs
dir=exp/dnn5b_pretrain-dbn
(tail --pid=$$ -F $dir/log/pretrain_dbn.log 2>/dev/null)& # forward log
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --rbm-iter 1 --nn-depth 4 --hid-dim 2000 \
$data_fmllr/train $dir || exit 1;
fi
if [ $stage -le 2 ]; then
# Train the DNN optimizing per-frame cross-entropy.
dir=exp/dnn5b_pretrain-dbn_dnn
ali=${gmmdir}_ali
feature_transform=exp/dnn5b_pretrain-dbn/final.feature_transform
dbn=exp/dnn5b_pretrain-dbn/4.dbn
(tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
$data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir || exit 1;
# Decode with the trigram language model.
steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
--config conf/decode_dnn.config --acwt 0.1 \
$gmmdir/graph $data_fmllr/dev \
$dir/decode || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. We use usually good acwt 0.1
# Lattices are re-generated after 1st epoch, to get faster convergence.
dir=exp/dnn5b_pretrain-dbn_dnn_smbr
srcdir=exp/dnn5b_pretrain-dbn_dnn
acwt=0.1
if [ $stage -le 3 ]; then
# First we generate lattices and alignments:
steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
$data_fmllr/train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 10 --sub-split 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--acwt $acwt $data_fmllr/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 4 ]; then
# Re-train the DNN by 1 iteration of sMBR
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \
$data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode (reuse HCLG graph)
for ITER in 1; do
# Decode with the trigram swbd language model.
steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
--config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmmdir/graph $data_fmllr/dev \
$dir/decode || exit 1;
done
fi
# Re-generate lattices, run 2 more sMBR iterations
dir=exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats
srcdir=exp/dnn5b_pretrain-dbn_dnn_smbr
acwt=0.0909
if [ $stage -le 5 ]; then
# First we generate lattices and alignments:
#steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
# $data_fmllr/train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 10 --sub-split 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--acwt $acwt $data_fmllr/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 6 ]; then
# Re-train the DNN by 2 iteration of sMBR
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 2 --acwt $acwt --do-smbr true \
$data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode (reuse HCLG graph)
for ITER in 1 2; do
# Decode with the trigram language model.
steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
--config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmmdir/graph $data_fmllr/dev \
$dir/decode || exit 1;
done
fi
# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done