Yannick Estève / ONTRAC-Kaldi

Blame view

egs/gale_arabic/s5/local/online/run_nnet2.sh 7.74 KB
  #!/bin/bash
  
  # Copyright 2014  Vimal Manohar
  # This is our online neural net build for Gale system
  
  . ./cmd.sh
  
  stage=-1
  train_stage=-10
  use_gpu=true
  mfccdir=mfcc
  train_nj=120
  decode_nj=30
  
  . ./cmd.sh
  . ./path.sh
  . ./utils/parse_options.sh
  
  if $use_gpu; then
    if ! cuda-compiled; then
      cat <<EOF && exit 1
  This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
  If you want to use GPUs (and have them), go to src/, and configure and make on a machine
  where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
  EOF
    fi
    parallel_opts="--gpu 1"
    num_threads=1
    minibatch_size=512
    # the _a is in case I want to change the parameters.
    dir=exp/nnet2_online/nnet_a_gpu
  else
    # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
    # almost the same, but this may be a little bit slow.
    num_threads=16
    minibatch_size=128
    parallel_opts="--num-threads $num_threads"
    dir=exp/nnet2_online/nnet_a
  fi
  
  if [ $stage -le 0 ]; then
    # this shows how you can split across multiple file-systems.  we'll split the
    # MFCC dir across multiple locations.  You might want to be careful here, if you
    # have multiple copies of Kaldi checked out and run the same recipe, not to let
    # them overwrite each other.
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
      date=$(date +'%m_%d_%H_%M')
      utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/gale-$date/s5/$mfccdir/storage $mfccdir/storage || exit 1
    fi
    utils/copy_data_dir.sh data/train data/train_hires || exit 1
    steps/make_mfcc_pitch_online.sh --nj $train_nj --mfcc-config conf/mfcc_hires.conf \
        --cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir || exit 1;
    steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir || exit 1;
  fi
  
  if [ $stage -le 1 ]; then
    # we'll use the features with just MFCC, no pitch, to train the iVector
    # extractor on.  Check that we're using 40-dim features so the command line is correct.
    ! grep 'num-ceps=40' conf/mfcc_hires.conf >/dev/null && \
       echo "Change the script if you change conf/mfcc_hires.conf" && exit 1;
    steps/select_feats.sh  --nj 5 --cmd "$train_cmd" 0-39 data/train_hires \
        data/train_hires_mfcconly exp/nnet2_online/select_hires_train $mfccdir || exit 1
  
    steps/compute_cmvn_stats.sh data/train_hires_mfcconly exp/nnet2_online/select_hires_train $mfccdir || exit 1
  
    # Make a subset of about 1/3 of the data.
    utils/subset_data_dir.sh data/train_hires_mfcconly 100000 \
       data/train_hires_mfcconly_100k || exit 1
  
    # make a corresponding subset of normal-dimensional-MFCC training data.
    utils/subset_data_dir.sh --utt-list <(awk '{print $1}' data/train_hires_mfcconly_100k/utt2spk) \
      data/train data/train_100k || exit 1
  fi
  
  if [ $stage -le 2 ]; then
    # We need to build a small system just because we need the LDA+MLLT transform
    # to train the diag-UBM on top of.  First align the data of the 100k subset using
    # the tri3b system and normal MFCC features, so we have alignments to build our
    # system on hires MFCCs on top of.
  
    steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
      data/train_100k data/lang exp/tri3b exp/tri3b_ali_100k || exit 1;
  
    # Build a small LDA+MLLT system on top of the hires MFCC features, just
    # because we need the transform.  We use --num-iters 13 because after we get
    # the transform (12th iter is the last), any further training is pointless.
    steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 --realign-iters "" \
      --splice-opts "--left-context=3 --right-context=3" \
      5000 10000 data/train_hires_mfcconly_100k data/lang exp/tri3b_ali_100k exp/nnet2_online/tri4a || exit 1
  fi
  
  if [ $stage -le 3 ]; then
    # Train a diagonal UBM.  The input directory exp/nnet2_online/tri3a is only
    # needed for the splice-opts and the LDA+MLLT transform.
    steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj $train_nj --num-frames 400000 \
      data/train_hires_mfcconly_100k 512 exp/nnet2_online/tri4a exp/nnet2_online/diag_ubm || exit 1
  fi
  
  if [ $stage -le 4 ]; then
    # train an iVector extractor on all the mfcconly data.  Note: although we use
    # only 10 job, each job uses 16 processes in total.
    steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
      data/train_hires_mfcconly exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1;
  fi
  
  if [ $stage -le 5 ]; then
    # extract iVectors for the training data.
    ivectordir=exp/nnet2_online/ivectors_train
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then # this shows how you can split across multiple file-systems.
      utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/gale/s5/$ivectordir/storage $ivectordir/storage || exit 1
    fi
  
    # having a larger number of speakers is helpful for generalization, and to
    # handle per-utterance decoding well (iVector starts at zero).
    steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_mfcconly data/train_hires_mfcconly_max2 || exit 1
  
    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $train_nj \
      data/train_hires_mfcconly_max2 exp/nnet2_online/extractor $ivectordir || exit 1;
  fi
  
  if [ $stage -le 6 ]; then
    # this shows how you can split across multiple file-systems.
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
      utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-online/egs/bolt/s5/$dir/egs $dir/egs/storage || exit 1
    fi
  
    # Because we have a lot of data here and we don't want the training to take
    # too long, we reduce the number of epochs from the defaults (15) to (8).
    # The option "--io-opts '--max-jobs-run 12'" is to have more than the default number
    # (5) of jobs dumping the egs to disk; this is OK since we're splitting our
    # data across four filesystems for speed.
  
    steps/nnet2/train_pnorm_simple.sh --stage $train_stage \
      --num-epochs 8 \
      --samples-per-iter 400000 \
      --splice-width 7 --feat-type raw \
      --online-ivector-dir exp/nnet2_online/ivectors_train \
      --cmvn-opts "--norm-means=false --norm-vars=false" \
      --num-threads "$num_threads" \
      --minibatch-size "$minibatch_size" \
      --parallel-opts "$parallel_opts" \
      --io-opts "--max-jobs-run 12" \
      --num-jobs-nnet 6 \
      --num-hidden-layers 4 \
      --mix-up 12000 \
      --initial-learning-rate 0.06 --final-learning-rate 0.006 \
      --cmd "$decode_cmd" \
      --pnorm-input-dim 3000 \
      --pnorm-output-dim 300 \
       data/train_hires data/lang exp/tri3b $dir  || exit 1;
  fi
  
  if [ $stage -le 7 ]; then
    steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \
       --add-pitch true data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1;
  fi
  
  if [ $stage -le 8 ]; then
    # do the actual online decoding with iVectors, carrying info forward from
    # previous utterances of the same speaker.
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \
        exp/tri3b/graph data/test ${dir}_online/decode_test || exit 1;
  fi
  
  if [ $stage -le 9 ]; then
    # this version of the decoding treats each utterance separately
    # without carrying forward speaker information.
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \
       --per-utt true \
        exp/tri3b/graph data/test ${dir}_online/decode_test_utt || exit 1;
  fi
  
  if [ $stage -le 10 ]; then
    # this version of the decoding treats each utterance separately
    # without carrying forward speaker information, but looks to the end
    # of the utterance while computing the iVector.
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj $decode_nj \
       --per-utt true --online false \
        exp/tri3b/graph data/test ${dir}_online/decode_test_utt_offline || exit 1;
  fi
  
  exit 0;