Blame view
egs/sprakbanken/s5/run.sh
6.18 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
#!/bin/bash . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a nj=12 stage=0 . utils/parse_options.sh if [ $stage -le 0 ]; then # Download the corpus and prepare parallel lists of sound files and text files # Divide the corpus into train, dev and test sets local/sprak_data_prep.sh || exit 1; fi if [ $stage -le 1 ]; then # Perform text normalisation, prepare dict folder and LM data transcriptions # This setup uses previsously prepared data. eSpeak must be installed and in PATH to use dict_prep.sh # local/dict_prep.sh || exit 1; local/copy_dict.sh || exit 1; fi if [ $stage -le 2 ]; then utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang_tmp data/lang || exit 1; fi if [ $stage -le 3 ]; then # Extract mfccs # p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some # wave files are corrupt # Will return a warning message because of the corrupt audio files, but compute them anyway # If this step fails and prints a partial diff, rerun from sprak_data_prep.sh for dataset in train test dev; do steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/$dataset || exit 1; # Compute cepstral mean and variance normalisation steps/compute_cmvn_stats.sh data/$dataset || exit 1; # Repair data set (remove corrupt data points with corrupt audio) utils/fix_data_dir.sh data/$dataset || exit 1; done # Make a subset of the training data with the shortest 120k utterances. utils/subset_data_dir.sh --shortest data/train 120000 data/train_120kshort || exit 1; fi if [ $stage -le 4 ]; then # Train LM with irstlm local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "tg" data/lang data/local/train3_lm &> data/local/tg.log || exit 1; local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "fg" data/lang data/local/train4_lm &> data/local/fg.log || exit 1; fi if [ $stage -le 5 ]; then # Train monophone model on short utterances steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ data/train_120kshort data/lang exp/mono0a || exit 1; utils/mkgraph.sh --mono data/lang_test_tg exp/mono0a exp/mono0a/graph_tg || exit 1; steps/decode.sh --nj 12 --cmd "$decode_cmd" \ exp/mono0a/graph_tg data/dev exp/mono0a/decode_tg_dev || exit 1; fi if [ $stage -le 6 ]; then # Train tri1 (delta+delta-delta) steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 3000 40000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; # Decode dev set with both LMs utils/mkgraph.sh data/lang_test_tg exp/tri1 exp/tri1/graph_tg || exit 1; utils/mkgraph.sh data/lang_test_fg exp/tri1 exp/tri1/graph_fg || exit 1; steps/decode.sh --nj 12 --cmd "$decode_cmd" \ exp/tri1/graph_fg data/dev exp/tri1/decode_fg_dev || exit 1; steps/decode.sh --nj 12 --cmd "$decode_cmd" \ exp/tri1/graph_tg data/dev exp/tri1/decode_tg_dev || exit 1; fi if [ $stage -le 7 ]; then # Train tri2a (delta + delta-delta) steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 5000 60000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; utils/mkgraph.sh data/lang_test_tg exp/tri2a exp/tri2a/graph_tg || exit 1; steps/decode.sh --nj 12 --cmd "$decode_cmd" \ exp/tri2a/graph_tg data/dev exp/tri2a/decode_tg_dev || exit 1; fi if [ $stage -le 8 ]; then # Train tri2b (LDA+MLLT) steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri2a exp/tri2a_ali || exit 1; steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=5 --right-context=5" \ 6500 75000 data/train data/lang exp/tri2a_ali exp/tri2b || exit 1; utils/mkgraph.sh data/lang_test_tg exp/tri2b exp/tri2b/graph_tg || exit 1; steps/decode.sh --nj 12 --cmd "$decode_cmd" \ exp/tri2b/graph_tg data/dev exp/tri2b/decode_tg_dev || exit 1; fi if [ $stage -le 9 ]; then # From 2b system, train 3b which is LDA + MLLT + SAT. steps/align_si.sh --nj $nj --cmd "$train_cmd" \ --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; steps/train_sat.sh --cmd "$train_cmd" \ 7500 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; # Decode dev with 4gram and 3gram LMs utils/mkgraph.sh data/lang_test_tg exp/tri3b exp/tri3b/graph_tg || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ exp/tri3b/graph_tg data/dev exp/tri3b/decode_tg_dev || exit 1; utils/mkgraph.sh data/lang_test_fg exp/tri3b exp/tri3b/graph_fg || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ exp/tri3b/graph_fg data/dev exp/tri3b/decode_fg_dev || exit 1; # Decode test with 4gram and 3gram LMs # there are fewer speaker (n=7) and decoding usually ends up waiting # for a single job so we use --num-threads 2 to speed up steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ exp/tri3b/graph_tg data/test exp/tri3b/decode_tg_test || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ exp/tri3b/graph_fg data/test exp/tri3b/decode_fg_test || exit 1; fi if [ $stage -le 10 ]; then # Alignment used to train nnets and sgmms steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; fi ## Works #local/sprak_run_nnet_cpu.sh tg dev ## Works #local/sprak_run_sgmm2.sh dev # Run neural network setups based in the TEDLIUM recipe # Running the nnet3-tdnn setup will train an ivector extractor that # is used by the subsequent nnet3 and chain systems (why --stage is # specified) #local/nnet3/run_tdnn.sh --tdnn-affix "0" --nnet3-affix "" # nnet3 LSTM #local/nnet3/run_lstm.sh --stage 13 --affix "0" # nnet3 bLSTM #local/nnet3/run_blstm.sh --stage 12 # chain TDNN # This setup creates a new lang directory that is also used by the # TDNN-LSTM system #local/chain/run_tdnn.sh --stage 14 # chain TDNN-LSTM local/chain/run_tdnn_lstm.sh --stage 17 # Getting results [see RESULTS file] local/generate_results_file.sh 2> /dev/null > RESULTS |