Blame view
egs/aurora4/s5/run.sh
5.41 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
#!/bin/bash . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. #clean LDC wsj0 corpus available in CLSP server: /export/corpora5/LDC/LDC93S6B #aurora4 directory in CLSP server: /export/corpora5/AURORA #aurora4=/mnt/spdb/aurora4 aurora4=/export/corpora5/AURORA #we need lm, trans, from WSJ0 CORPUS #wsj0=/mnt/spdb/wall_street_journal wsj0=/export/corpora5/LDC/LDC93S6B local/aurora4_data_prep.sh $aurora4 $wsj0 local/wsj_prepare_dict.sh || exit 1; utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1; local/aurora4_format_data.sh || exit 1; # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc for x in train_si84_clean train_si84_multi test_eval92 test_0166 dev_0330 dev_1206; do steps/make_mfcc.sh --nj 10 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done # make fbank features fbankdir=fbank mkdir -p data-fbank for x in train_si84_clean train_si84_multi dev_0330 dev_1206 test_eval92 test_0166; do cp -r data/$x data-fbank/$x steps/make_fbank.sh --nj 10 \ data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1; done # Note: the --boost-silence option should probably be omitted by default # for normal setups. It doesn't always help. [it's to discourage non-silence # models from modeling silence.] #steps/train_mono.sh --boost-silence 1.25 --nj 10 \ # data/train_si84_clean data/lang exp/mono0a || exit 1; steps/train_mono.sh --boost-silence 1.25 --nj 10 \ data/train_si84_multi data/lang exp/mono0a_multi || exit 1; #( # utils/mkgraph.sh data/lang_test_tgpr exp/mono0a exp/mono0a/graph_tgpr && \ # steps/decode.sh --nj 8 \ # exp/mono0a/graph_tgpr data/test_eval92 exp/mono0a/decode_tgpr_eval92 #) & #steps/align_si.sh --boost-silence 1.25 --nj 10 \ # data/train_si84_clean data/lang exp/mono0a exp/mono0a_ali || exit 1; steps/align_si.sh --boost-silence 1.25 --nj 10 \ data/train_si84_multi data/lang exp/mono0a_multi exp/mono0a_multi_ali || exit 1; #steps/train_deltas.sh --boost-silence 1.25 \ # 2000 10000 data/train_si84_clean data/lang exp/mono0a_ali exp/tri1 || exit 1; steps/train_deltas.sh --boost-silence 1.25 \ 2000 10000 data/train_si84_multi data/lang exp/mono0a_multi_ali exp/tri1_multi || exit 1; steps/align_si.sh --nj 10 \ data/train_si84_multi data/lang exp/tri1_multi exp/tri1_multi_ali_si84 || exit 1; steps/train_deltas.sh \ 2500 15000 data/train_si84_multi data/lang exp/tri1_multi_ali_si84 exp/tri2a_multi || exit 1; steps/train_lda_mllt.sh \ --splice-opts "--left-context=3 --right-context=3" \ 2500 15000 data/train_si84_multi data/lang exp/tri1_multi_ali_si84 exp/tri2b_multi || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2b_multi exp/tri2b_multi/graph_tgpr_5k || exit 1; steps/decode.sh --nj 8 \ exp/tri2b_multi/graph_tgpr_5k data/test_eval92 exp/tri2b_multi/decode_tgpr_5k_eval92 || exit 1; # Align tri2b system with si84 multi-condition data. steps/align_si.sh --nj 10 \ --use-graphs true data/train_si84_multi data/lang exp/tri2b_multi exp/tri2b_multi_ali_si84 || exit 1; steps/align_si.sh --nj 10 \ data/dev_0330 data/lang exp/tri2b_multi exp/tri2b_multi_ali_dev_0330 || exit 1; steps/align_si.sh --nj 10 \ data/dev_1206 data/lang exp/tri2b_multi exp/tri2b_multi_ali_dev_1206 || exit 1; #Now begin train DNN systems on multi data . ./path.sh #RBM pretrain dir=exp/tri3a_dnn_pretrain $cuda_cmd $dir/_pretrain_dbn.log \ steps/nnet/pretrain_dbn.sh --nn-depth 7 --rbm-iter 3 data-fbank/train_si84_multi $dir dir=exp/tri3a_dnn ali=exp/tri2b_multi_ali_si84 ali_dev=exp/tri2b_multi_ali_dev_0330 feature_transform=exp/tri3a_dnn_pretrain/final.feature_transform dbn=exp/tri3a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_multi data-fbank/dev_0330 data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3a_dnn exp/tri3a_dnn/graph_tgpr_5k || exit 1; dir=exp/tri3a_dnn steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri3a_dnn/graph_tgpr_5k data-fbank/test_eval92 $dir/decode_tgpr_5k_eval92 || exit 1; #realignments srcdir=exp/tri3a_dnn steps/nnet/align.sh --nj 10 \ data-fbank/train_si84_multi data/lang $srcdir ${srcdir}_ali_si84_multi || exit 1; steps/nnet/align.sh --nj 10 \ data-fbank/dev_0330 data/lang $srcdir ${srcdir}_ali_dev_0330 || exit 1; #train system again dir=exp/tri4a_dnn ali=exp/tri3a_dnn_ali_si84_multi ali_dev=exp/tri3a_dnn_ali_dev_0330 feature_transform=exp/tri3a_dnn_pretrain/final.feature_transform dbn=exp/tri3a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_multi data-fbank/dev_0330 data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri4a_dnn exp/tri4a_dnn/graph_tgpr_5k || exit 1; dir=exp/tri4a_dnn steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri4a_dnn/graph_tgpr_5k data-fbank/test_eval92 $dir/decode_tgpr_5k_eval92 || exit 1; # DNN Sequential DT training #...... |