Blame view
egs/sprakbanken_swe/s5/run.sh
5.82 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
#!/bin/bash . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. # Download the corpus and prepare parallel lists of sound files and text files # Divide the corpus into train, dev and test sets local/sprak_data_prep.sh || exit 1; utils/fix_data_dir.sh data/train || exit 1; # Perform text normalisation, prepare dict folder and LM data transcriptions local/copy_dict || exit 1; utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang_tmp data/lang || exit 1; # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcctest # Extract mfccs # p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some # wave files are corrupt # Will return a warning message because of the corrupt audio files, but compute them anyway # If this step fails and prints a partial diff, rerun from sprak_data_prep.sh steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/test exp/make_mfcc/test test mfcc || exit 1; steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/train exp/make_mfcc/train mfcc || exit 1; # Compute cepstral mean and variance normalisation steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test mfcc || exit 1; steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train mfcc || exit 1; # Repair data set (remove corrupt data points with corrupt audio) utils/fix_data_dir.sh data/test || exit 1; utils/fix_data_dir.sh data/train || exit 1; # Train LM with irstlm #creates 3g or 4g dictionary and importantly G.fst #local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "3g" data/lang data/local/train3_lm &> data/local/3g.log & local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "4g" data/lang data/local/train4_lm &> data/local/4g.log || exit 1; #speed test only 120 utterances per speaker utils/subset_data_dir.sh --per-spk data/test 120 data/test120_p_spk || exit 1; # Train monophone model on short utterances AFTER THIS ONE CAN SEE THE ALIGNMNT BETWEEN FRAMES AND PHONES USING COMMAND SHOW_ALIGNMENTS steps/train_mono.sh --nj 10 --cmd "$train_cmd" data/train data/lang exp/mono || exit 1; # Ensure that LMs are created utils/mkgraph.sh data/lang_test_4g exp/mono exp/mono/graph_4g || exit 1; # Ensure that all graphs are constructed steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \ exp/mono/graph_4g data/test120_p_spk exp/mono/decode || exit 1; # Get alignments from monophone system. steps/align_si.sh --nj 10 --cmd "$train_cmd" \ data/train data/lang exp/mono exp/mono_ali || exit 1; # train tri1 [first triphone pass] # steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ steps/train_deltas.sh --cmd "$train_cmd" \ 5800 96000 data/train data/lang exp/mono_ali exp/tri1|| exit 1; #make graph utils/mkgraph.sh data/lang_test_4g exp/tri1 exp/tri1/graph_4g || exit 1; steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \ exp/tri1/graph_4g data/test120_p_spk exp/tri1/decode_test120_p_spk || exit 1; steps/align_si.sh --nj 10 --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; # Train tri2a, which is deltas + delta-deltas. steps/train_deltas.sh --cmd "$train_cmd" \ 7500 125000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; utils/mkgraph.sh data/lang_test_4g exp/tri2a exp/tri2a/graph_4g || exit 1; steps/decode.sh --nj 10 --cmd "$decode_cmd" \ exp/tri2a/graph_4g data/test120_p_spk exp/tri2a/decode_test120_p_spk|| exit 1; steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=5 --right-context=5" \ 7500 125000 data/train data/lang exp/tri1_ali exp/tri2b || exit 1; utils/mkgraph.sh data/lang_test_4g exp/tri2b exp/tri2b/graph_4g || exit 1; steps/decode.sh --nj 10 --cmd "$decode_cmd" \ exp/tri2b/graph_4g data/test120_p_spk exp/tri2b/decode_test120_p_spk || exit 1; steps/align_si.sh --nj 10 --cmd "$train_cmd" \ --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; # From 2b system, train 3b which is LDA + MLLT + SAT. steps/train_sat.sh --cmd "$train_cmd" \ 7500 125000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; # Trying 4-gram language model utils/mkgraph.sh data/lang_test_4g exp/tri3b exp/tri3b/graph_4g || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \ exp/tri3b/graph_4g data/test120_p_spk exp/tri3b/decode_test120_p_spk || exit 1; # This is commented out for now as it's not important for the main recipe. ## Train RNN for reranking #local/sprak_train_rnnlms.sh data/local/dict data/dev/transcripts.uniq data/local/rnnlms/g_c380_d1k_h100_v130k ## Consumes a lot of memory! Do not run in parallel #local/sprak_run_rnnlms_tri3b.sh data/lang_test_3g data/local/rnnlms/g_c380_d1k_h100_v130k data/test1k exp/tri3b/decode_3g_test1k # From 3b system steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; # From 3b system, train another SAT system (tri4a) with all the si284 data. steps/train_sat.sh --cmd "$train_cmd" \ 13000 300000 data/train data/lang exp/tri3b_ali exp/tri4a || exit 1; utils/mkgraph.sh data/lang_test_4g exp/tri4a exp/tri4a/graph_4g || exit 1; steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ exp/tri4a/graph_4g data/test120_p_spk exp/tri4a/decode_test120_p_spk || exit 1; # alignment used to train nnets steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; ## Works local/sprak_run_nnet_cpu.sh 4g test120_p_spk || exit 1; # Getting results [see RESULTS file] for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done |