Blame view
egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
7.1 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!/bin/bash # this script is a modified version of run_tdnn_5n.sh. It uses # a new configs convention for chain model after kaldi 5.2. set -e # configs for 'chain' stage=0 train_stage=-10 get_egs_stage=-10 xent_regularize=0.1 dir=exp/chain/tdnn_5o # training options num_epochs=13 initial_effective_lrate=0.005 final_effective_lrate=0.0005 max_param_change=2.0 final_layer_normalize_target=0.5 num_jobs_initial=2 num_jobs_final=4 minibatch_size=128 frames_per_eg=150 remove_egs=false #common_egs_dir=exp/chain/tdnn_5g/egs/ common_egs_dir= # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. EOF fi # The iVector-extraction and feature-dumping parts are the same as the standard # nnet2 setup, and you can skip them by setting "--stage 4" if you have already # run those things. ali_dir=exp/tri3b_ali treedir=exp/chain/tri4_5o_tree lang=data/lang_chain_5o local/online/run_nnet2_common.sh --stage $stage || exit 1; if [ $stage -le 4 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments nj=$(cat exp/tri3b_ali/num_jobs) || exit 1; steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/train \ data/lang exp/tri3b exp/tri3b_lats rm exp/tri3b_lats/fsts.*.gz # save space fi if [ $stage -le 5 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] rm -rf $lang cp -r data/lang $lang silphonelist=$(cat $lang/phones/silence.csl) || exit 1; nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; # Use our special topology... note that later on may have to tune this # topology. steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi if [ $stage -le 6 ]; then # Build a tree using our new topology. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ --cmd "$train_cmd" 1200 data/train $lang $ali_dir $treedir fi if [ $stage -le 7 ]; then mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python) tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" prefinal_opts="l2-regularize=0.01" output_opts="l2-regularize=0.005" mkdir -p $dir/configs cat <<EOF > $dir/configs/network.xconfig input dim=50 name=ivector input dim=13 name=input # please note that it is important to have input layer with the name=input # as the layer immediately preceding the fixed-affine-layer to enable # the use of short notation for the descriptor fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat # the first splicing is moved before the lda layer, so no splicing here relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=768 tdnnf-layer name=tdnnf2 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 tdnnf-layer name=tdnnf3 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 tdnnf-layer name=tdnnf4 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 tdnnf-layer name=tdnnf5 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=0 tdnnf-layer name=tdnnf6 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf7 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf8 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf9 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf10 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf11 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf12 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 tdnnf-layer name=tdnnf13 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 linear-component name=prefinal-l dim=192 $linear_opts ## adding the layers for chain branch prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts small-dim=192 big-dim=768 output-layer name=output include-log-softmax=false dim=$num_targets $output_opts # adding the layers for xent branch prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts small-dim=192 big-dim=768 output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi if [ $stage -le 8 ]; then steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir exp/nnet2_online/ivectors \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ --chain.xent-regularize 0.1 \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ --chain.lm-opts="--num-extra-lm-states=200" \ --egs.dir "$common_egs_dir" \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $frames_per_eg \ --trainer.num-chunk-per-minibatch $minibatch_size \ --trainer.frames-per-iter 1000000 \ --trainer.num-epochs $num_epochs \ --trainer.optimization.num-jobs-initial $num_jobs_initial \ --trainer.optimization.num-jobs-final $num_jobs_final \ --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ --cleanup.remove-egs $remove_egs \ --feat-dir data/train_hires \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ --dir $dir fi if [ $stage -le 9 ]; then steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 4 \ data/test_hires exp/nnet2_online/extractor exp/nnet2_online/ivectors_test || exit 1; fi if [ $stage -le 10 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" \ --online-ivector-dir exp/nnet2_online/ivectors_test \ $dir/graph data/test_hires $dir/decode || exit 1; fi if [ $stage -le 11 ]; then utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj 20 --cmd "$decode_cmd" \ --online-ivector-dir exp/nnet2_online/ivectors_test \ $dir/graph_ug data/test_hires $dir/decode_ug || exit 1; fi wait; exit 0; |