Blame view
egs/tidigits/s5/run.sh
2.49 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
#!/bin/bash # Note: this TIDIGITS setup has not been tuned at all and has some obvious # deficiencies; this has been created as a starting point for a tutorial. # We're just using the "adults" data here, not the data from children. . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. tidigits=/export/corpora5/LDC/LDC93S10 #tidigits=/mnt/matylda2/data/TIDIGITS # The following command prepares the data/{train,dev,test} directories. local/tidigits_data_prep.sh $tidigits || exit 1; local/tidigits_prepare_lang.sh || exit 1; utils/validate_lang.pl data/lang/ # Note; this actually does report errors, # and exits with status 1, but we've checked them and seen that they # don't matter (this setup doesn't have any disambiguation symbols, # and the script doesn't like that). # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc for x in test train; do steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done utils/subset_data_dir.sh data/train 1000 data/train_1k # try --boost-silence 1.25 to some of the scripts below (also 1.5, if that helps... # effect may not be clear till we test triphone system. See # wsj setup for examples (../../wsj/s5/run.sh) steps/train_mono.sh --nj 4 --cmd "$train_cmd" \ data/train_1k data/lang exp/mono0a utils/mkgraph.sh data/lang exp/mono0a exp/mono0a/graph && \ steps/decode.sh --nj 10 --cmd "$decode_cmd" \ exp/mono0a/graph data/test exp/mono0a/decode steps/align_si.sh --nj 4 --cmd "$train_cmd" \ data/train data/lang exp/mono0a exp/mono0a_ali steps/train_deltas.sh --cmd "$train_cmd" \ 300 3000 data/train data/lang exp/mono0a_ali exp/tri1 utils/mkgraph.sh data/lang exp/tri1 exp/tri1/graph steps/decode.sh --nj 10 --cmd "$decode_cmd" \ exp/tri1/graph data/test exp/tri1/decode # Example of looking at the output. # utils/int2sym.pl -f 2- data/lang/words.txt exp/tri1/decode/scoring/19.tra | sed "s/ $//" | sort | diff - data/test/text # Getting results [see RESULTS file] # for x in exp/*/decode*; do [ -d $x ] && grep SER $x/wer_* | utils/best_wer.sh; done #exp/mono0a/decode/wer_17:%SER 3.67 [ 319 / 8700 ] #exp/tri1/decode/wer_19:%SER 2.64 [ 230 / 8700 ] |