Blame view
egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh
5.46 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
#!/bin/bash # Copyright 2017 Hossein Hadian # This script does end2end chain training (i.e. from scratch) # local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ # System e2e_cnn_1a # score_basic rescoring + nomalized # WER 16.24 11.0 # WER (rescored) 15.63 10.5 # CER 5.98 5.6 # CER (rescored) 5.66 5.3 # Final train prob 0.1376 # Final valid prob 0.1913 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a # exp/chain/e2e_cnn_1a: num-iters=27 nj=5..8 num-params=3.0M dim=40->470 combine=0.091->0.091 (over 1) logprob:train/valid[17,26,final]=(0.135,0.137,0.138/0.191,0.191,0.191) set -e # configs for 'chain' stage=0 nj=30 train_stage=-10 get_egs_stage=-10 affix=1a # training options tdnn_dim=450 minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4 cmvn_opts="--norm-means=false --norm-vars=false" train_set=train # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. EOF fi lang=data/lang_e2e treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building) dir=exp/chain/e2e_cnn_${affix} if [ $stage -le 0 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] rm -rf $lang cp -r data/lang $lang silphonelist=$(cat $lang/phones/silence.csl) || exit 1; nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi if [ $stage -le 1 ]; then steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \ --shared-phones true \ --type mono \ data/$train_set $lang $treedir $cmd $treedir/log/make_phone_lm.log \ cat data/$train_set/text \| \ steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \ utils/sym2int.pl -f 2- data/lang/phones.txt \| \ chain-est-phone-lm --num-extra-lm-states=500 \ ark:- $treedir/phone_lm.fst fi if [ $stage -le 2 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') cnn_opts="l2-regularize=0.075" tdnn_opts="l2-regularize=0.075" output_opts="l2-regularize=0.1" common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" mkdir -p $dir/configs cat <<EOF > $dir/configs/network.xconfig input dim=40 name=input conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts ## adding the layers for chain branch relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs fi if [ $stage -le 3 ]; then steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ --cmd "$cmd" \ --feat.cmvn-opts "$cmvn_opts" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.apply-deriv-weights true \ --egs.stage $get_egs_stage \ --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 4 \ --trainer.add-option="--optimization.memory-compression-level=2" \ --trainer.num-chunk-per-minibatch $minibatch_size \ --trainer.frames-per-iter 1500000 \ --trainer.num-epochs 3 \ --trainer.optimization.momentum 0 \ --trainer.optimization.num-jobs-initial 5 \ --trainer.optimization.num-jobs-final 8 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ --trainer.max-param-change 2.0 \ --cleanup.remove-egs true \ --feat-dir data/${train_set} \ --tree-dir $treedir \ --dir $dir || exit 1; fi |