Blame view
egs/aishell2/s5/local/run_gmm.sh
3.69 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
#!/bin/bash # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG) # 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU) # 2018 Emotech LTD (Author: Xuechen LIU) # Apache 2.0 set -e # number of jobs nj=20 stage=1 . ./cmd.sh [ -f ./path.sh ] && . ./path.sh; . ./utils/parse_options.sh # nj for dev and test dev_nj=$(wc -l data/dev/spk2utt | awk '{print $1}' || exit 1;) test_nj=$(wc -l data/test/spk2utt | awk '{print $1}' || exit 1;) # Now make MFCC features. if [ $stage -le 1 ]; then # mfccdir should be some place with a largish disk where you # want to store MFCC features. for x in train dev test; do steps/make_mfcc_pitch.sh --pitch-config conf/pitch.conf --cmd "$train_cmd" --nj $nj \ data/$x exp/make_mfcc/$x mfcc || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1; utils/fix_data_dir.sh data/$x || exit 1; done # subset the training data for fast startup for x in 100 300; do utils/subset_data_dir.sh data/train ${x}000 data/train_${x}k done fi # mono if [ $stage -le 2 ]; then # training steps/train_mono.sh --cmd "$train_cmd" --nj $nj \ data/train_100k data/lang exp/mono || exit 1; # decoding utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph || exit 1; steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \ exp/mono/graph data/dev exp/mono/decode_dev steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \ exp/mono/graph data/test exp/mono/decode_test # alignment steps/align_si.sh --cmd "$train_cmd" --nj $nj \ data/train_300k data/lang exp/mono exp/mono_ali || exit 1; fi # tri1 if [ $stage -le 3 ]; then # training steps/train_deltas.sh --cmd "$train_cmd" \ 4000 32000 data/train_300k data/lang exp/mono_ali exp/tri1 || exit 1; # decoding utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \ exp/tri1/graph data/dev exp/tri1/decode_dev steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \ exp/tri1/graph data/test exp/tri1/decode_test # alignment steps/align_si.sh --cmd "$train_cmd" --nj $nj \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; fi # tri2 if [ $stage -le 4 ]; then # training steps/train_deltas.sh --cmd "$train_cmd" \ 7000 56000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1; # decoding utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \ exp/tri2/graph data/dev exp/tri2/decode_dev steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \ exp/tri2/graph data/test exp/tri2/decode_test # alignment steps/align_si.sh --cmd "$train_cmd" --nj $nj \ data/train data/lang exp/tri2 exp/tri2_ali || exit 1; fi # tri3 if [ $stage -le 5 ]; then # training [LDA+MLLT] steps/train_lda_mllt.sh --cmd "$train_cmd" \ 10000 80000 data/train data/lang exp/tri2_ali exp/tri3 || exit 1; # decoding utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1; steps/decode.sh --cmd "$decode_cmd" --nj ${dev_nj} --config conf/decode.conf \ exp/tri3/graph data/dev exp/tri3/decode_dev steps/decode.sh --cmd "$decode_cmd" --nj ${test_nj} --config conf/decode.conf \ exp/tri3/graph data/test exp/tri3/decode_test # alignment steps/align_si.sh --cmd "$train_cmd" --nj $nj \ data/train data/lang exp/tri3 exp/tri3_ali || exit 1; steps/align_si.sh --cmd "$train_cmd" --nj ${nj} \ data/dev data/lang exp/tri3 exp/tri3_ali_dev || exit 1; fi echo "local/run_gmm.sh succeeded" exit 0; |