Yannick Estève / ONTRAC-Kaldi

Blame view

egs/tedlium/s5/local/nnet/run_dnn_bn.sh 7.81 KB
  #!/bin/bash
  #
  # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
  # it consists of TED talks with cleaned automatic transcripts:
  #
  # http://www-lium.univ-lemans.fr/en/content/ted-lium-corpus
  # http://www.openslr.org/resources (Mirror).
  #
  # The data is distributed under 'Creative Commons BY-NC-ND 3.0' license,
  # which allow free non-commercial use, while only a citation is required.
  #
  # Copyright  2014 Nickolay V. Shmyrev 
  #            2015 Brno University of Technology (Author: Karel Vesely)
  #            2015 Alex Glubshev
  # Apache 2.0
  #
  
  . ./cmd.sh
  . ./path.sh
  
  nj=32
  njdec=11
  njfea=10
  
  # label,
  exp=BN
  
  # source data,
  ali_src=exp/tri3_ali
  graph_src=exp/tri3/graph
  
  # fbank features
  test=data-fbank/test
  train=data-fbank/train
  
  test_original=data/test
  train_original=data/train
  
  # bn features,
  test_bn=data-fbank-${exp}-bn/test
  train_bn=data-fbank-${exp}-bn/train
  
  # fmllr features,
  test_bn_fmllr=data-fbank-${exp}-bn-fmllr/test
  train_bn_fmllr=data-fbank-${exp}-bn-fmllr/train
  
  stage=0
  . utils/parse_options.sh # accept options
  
  # Make the kaldi FBANK+PITCH features,
  [ ! -e $test ] && if [ $stage -le 0 ]; then
    # Test set
    utils/copy_data_dir.sh $test_original $test || exit 1; rm $test/{cmvn,feats}.scp
    steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
      $test $test/log $test/data || exit 1;
    steps/compute_cmvn_stats.sh $test $test/log $test/data || exit 1;  
    
    # Train set
    utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
    steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
       $train $train/log $train/data || exit 1;
      steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
    
    # Split to training 90%, cv 10%
    utils/subset_data_dir_tr_cv.sh $train ${train}_tr90 ${train}_cv10 || exit 1;
  fi
  
  # Train the bottleneck network,
  lang=data/lang_test
  if [ $stage -le 1 ]; then
    dir=exp/dnn8a_${exp}_bn-feat
    ali=$ali_src
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh --hid-layers 2 --hid-dim 1500 --bn-dim 40 \
      --cmvn-opts "--norm-means=true --norm-vars=false" --feat-type traps \
      --splice 5 --traps-dct-basis 6 --learn-rate 0.008 \
      ${train}_tr90 ${train}_cv10 $lang $ali $ali $dir || exit 1
    
    # Decode test,
    steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
      $graph_src $test $dir/decode_test || exit 1
  fi
  
  # Store the bottleneck features,
  if [ $stage -le 2 ]; then
    dir=exp/dnn8a_${exp}_bn-feat
    # dev
    steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $test_bn $test $dir $test_bn/log $test_bn/data || exit 1 
    steps/compute_cmvn_stats.sh $test_bn $test_bn/log $test_bn/data || exit 1;
    # train
    steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $train_bn $train $dir $train_bn/log $train_bn/data || exit 1
    steps/compute_cmvn_stats.sh $train_bn $train_bn/log $train_bn/data || exit 1;
  fi
  
  # Train GMM on bottleneck features,
  lang_test=data/lang_test
  if [ $stage -le 3 ]; then
    dir=exp/dnn8b_${exp}_bn-gmm
    # Train,
    # gmm on bn features, no cmvn, no lda-mllt,
    steps/train_deltas.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
      --delta-opts "--delta-order=0" \
      --cmvn-opts "--norm-means=false --norm-vars=false" \
      --beam 20 --retry-beam 80 \
      5000 80000 $train_bn $lang $ali_src $dir || exit 1
    # Decode,
    utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1
    steps/decode.sh --nj $njdec --cmd "$decode_cmd" \
      --acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
      $dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
    # Align,
    steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      $train_bn $lang $dir ${dir}_ali || exit 1;
  fi
  
  # Train SAT-adapted GMM on bottleneck features,
  if [ $stage -le 4 ]; then
    dir=exp/dnn8c_${exp}_fmllr-gmm
    ali=exp/dnn8b_${exp}_bn-gmm_ali
    # Train,
    # fmllr-gmm system on bottleneck features, 
    # - no cmvn, put fmllr to the features directly (no lda),
    # - note1 : we don't need cmvn, similar effect has diagonal of fmllr transform,
    # - note2 : lda+mllt was causing a small hit <0.5%,
    steps/train_sat.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      5000 80000 $train_bn $lang $ali $dir || exit 1
    # Decode,
    utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1;
    steps/decode_fmllr.sh --nj $njdec --cmd "$decode_cmd" \
      --acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
      $dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
    # Align,
    steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      $train_bn $lang $dir ${dir}_ali || exit 1;
  fi
  
  # Store the bottleneck-FMLLR features,
  gmm=exp/dnn8c_${exp}_fmllr-gmm # fmllr-feats, dnn-targets,
  graph=$gmm/graph
  if [ $stage -le 5 ]; then
    # Dev_set
    steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd" \
       --transform-dir $gmm/decode_$(basename $test_bn) \
       $test_bn_fmllr $test_bn $gmm $test_bn_fmllr/log $test_bn_fmllr/data || exit 1;
    # Training set
    steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd --max-jobs-run 10" \
       --transform-dir ${gmm}_ali \
       $train_bn_fmllr $train_bn $gmm $train_bn_fmllr/log $train_bn_fmllr/data || exit 1;
    # Split the training set
    utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train_bn_fmllr ${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10
  fi
  
  #------------------------------------------------------------------------------------
  # Pre-train stack of RBMs (6 layers, 2048 units),
  if [ $stage -le 6 ]; then
    dir=exp/dnn8d_${exp}_pretrain-dbn; mkdir -p $dir
    # Create input transform, splice 13 frames [ -10 -5..+5 +10 ],
    echo "<Splice> <InputDim> 40 <OutputDim> 520 <BuildVector> -10 -5:1:5 10 </BuildVector>" >$dir/proto.main
    $cuda_cmd $dir/log/pretrain_dbn.log \
      steps/nnet/pretrain_dbn.sh --feature-transform-proto $dir/proto.main \
      $train_bn_fmllr $dir || exit 1
  fi
  
  #------------------------------------------------------------------------------------
  # Train the DNN optimizing cross-entropy,
  if [ $stage -le 7 ]; then
    dir=exp/dnn8e_${exp}_pretrain-dbn_dnn
    ali=${gmm}_ali
    feature_transform=exp/dnn8d_${exp}_pretrain-dbn/final.feature_transform # re-use
    dbn=exp/dnn8d_${exp}_pretrain-dbn/6.dbn # re-use
    # Train  
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
      ${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10 $lang $ali $ali $dir || exit 1;
    # Decode test
    steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
      $graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr) || exit 1
  fi
  
  #------------------------------------------------------------------------------------
  # Finally we optimize sMBR criterion, we do Stochastic-GD with per-utterance updates, 
  dir=exp/dnn8f_${exp}_pretrain-dbn_dnn_smbr
  srcdir=exp/dnn8e_${exp}_pretrain-dbn_dnn
  acwt=0.1
  #
  if [ $stage -le 8 ]; then
    # Generate lattices and alignments
    steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
      $train_bn_fmllr $lang $srcdir ${srcdir}_ali || exit 1;
    steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --acwt $acwt \
      $train_bn_fmllr $lang $srcdir ${srcdir}_denlats  || exit 1;
  fi
  if [ $stage -le 9 ]; then
    # Do 4 epochs of sMBR (leaving out all silence frames and compensating insertions), 
    steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt \
      --do-smbr true --exclude-silphones true --one-silence-class true \
      $train_bn_fmllr $lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
    # Decode test,
    for ITER in 1 2 3 4; do
      steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
        --nnet $dir/${ITER}.nnet \
        $graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr)_it${ITER} || exit 1
    done
  fi 
  
  echo $0 successs.
  exit 0