Blame view
egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
4.33 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
#!/bin/bash # This script is based on swbd/s5c/local/nnet3/run_tdnn.sh # this is the standard "tdnn" system, built in nnet3; it's what we use to # call multi-splice. # At this script level we don't support not running on GPU, as it would be painfully slow. # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, # --num-threads 16 and --minibatch-size 128. # results # local/nnet3/compare_wer.sh exp/nnet3/tdnn_sp/ # Model tdnn_sp # WER(%) 11.20 # Final train prob -0.9601 # Final valid prob -1.0819 set -e stage=0 train_stage=-10 affix= common_egs_dir= # training options initial_effective_lrate=0.0015 final_effective_lrate=0.00015 num_epochs=4 num_jobs_initial=2 num_jobs_final=6 nj=30 remove_egs=true # feature options use_ivectors=false # End configuration section. . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. EOF fi # we use 40-dim high-resolution mfcc features (w/o pitch and ivector) for nn training # no utt- and spk- level cmvn dir=exp/nnet3/tdnn_sp${affix:+_$affix} gmm_dir=exp/tri3 test_sets="dev test" train_set=train ali_dir=${gmm_dir}_ali graph_dir=$gmm_dir/graph if [ $stage -le 6 ]; then mfccdir=mfcc_hires for datadir in ${train_set} ${test_sets}; do utils/copy_data_dir.sh data/${datadir} data/${datadir}_hires utils/data/perturb_data_dir_volume.sh data/${datadir}_hires || exit 1; steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj data/${datadir}_hires exp/make_mfcc/ ${mfccdir} done fi if [ $stage -le 7 ]; then echo "$0: creating neural net configs"; num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}') mkdir -p $dir/configs cat <<EOF > $dir/configs/network.xconfig input dim=40 name=input # please note that it is important to have input layer with the name=input # as the layer immediately preceding the fixed-affine-layer to enable # the use of short notation for the descriptor fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat # the first splicing is moved before the lda layer, so no splicing here relu-batchnorm-layer name=tdnn1 dim=850 relu-batchnorm-layer name=tdnn2 dim=850 input=Append(-1,0,2) relu-batchnorm-layer name=tdnn3 dim=850 input=Append(-3,0,3) relu-batchnorm-layer name=tdnn4 dim=850 input=Append(-7,0,2) relu-batchnorm-layer name=tdnn5 dim=850 input=Append(-3,0,3) relu-batchnorm-layer name=tdnn6 dim=850 output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi if [ $stage -le 8 ]; then #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then # utils/create_split_dir.pl \ # /export/b0{5,6,7,8}/$USER/kaldi-data/egs/aishell-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage #fi steps/nnet3/train_dnn.py --stage=$train_stage \ --cmd="$decode_cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --trainer.num-epochs $num_epochs \ --trainer.optimization.num-jobs-initial $num_jobs_initial \ --trainer.optimization.num-jobs-final $num_jobs_final \ --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --egs.dir "$common_egs_dir" \ --cleanup.remove-egs $remove_egs \ --cleanup.preserve-model-interval 500 \ --use-gpu true \ --feat-dir=data/${train_set}_hires \ --ali-dir $ali_dir \ --lang data/lang \ --reporting.email="$reporting_email" \ --dir=$dir || exit 1; fi if [ $stage -le 9 ]; then for decode_set in $test_sets; do # this version of the decoding treats each utterance separately # without carrying forward speaker information. num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` decode_dir=${dir}/decode_$decode_set steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ $graph_dir data/${decode_set}_hires $decode_dir || exit 1; done fi wait; echo "local/nnet3/run_tdnn.sh succeeded" exit 0; |