Yannick Estève / ONTRAC-Kaldi

Blame view

egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh 7.86 KB
  #!/bin/bash
  
  # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
  #           2015  Guoguo Chen
  #           2017  Hainan Xu
  #           2017  Szu-Jui Chen
  
  # This script trains LMs on the Chime4 data.
  # rnnlm/train_rnnlm.sh: best iteration (out of 120) was 91, linking it to final iteration.
  # rnnlm/train_rnnlm.sh: train/dev perplexity was 23.2 / 25.6.
  # Train objf: -5.63 -4.52 -4.20 -4.05 -3.96 -3.89 -3.83 -3.79 -3.76 -3.73 -3.70 -3.67 -3.65
  # -3.63 -3.61 -3.59 -3.58 -3.56 -3.54 -3.53 -3.52 -3.50 -3.49 -3.48 -3.47 -3.46 -3.45 -3.44
  # -3.43 -3.42 -3.43 -3.41 -3.39 -3.38 -3.38 -3.37 -3.35 -3.34 -3.34 -3.33 -3.32 -3.31 -3.31
  # -3.30 -3.29 -3.28 -3.28 -3.27 -3.26 -3.25 -3.25 -3.25 -3.23 -3.22 -3.23 -3.22 -3.21 -3.20
  # -3.20 -3.19 -3.19 -3.18 -3.18 -3.17 -3.16 -3.15 -3.16 -3.15 -3.14 -3.13 -3.13 -3.13 -3.12
  # -3.11 -3.12 -3.11 -3.10 -3.09 -3.09 -3.09 -3.08 -3.07 -3.07 -3.07 -3.06 -3.05 -3.05 -3.05
  # -3.04 -3.04 -3.04 -3.03 -3.00 -3.02 -3.00 -2.99 -3.00 -2.99 -2.99 -2.98 -2.96 -2.97 -2.96
  # -2.95 -2.96 -2.95 -2.95 -2.94 -2.93 -2.93 -2.92 -2.91 -2.92 -2.91 -2.91 -2.91 -2.89 -2.90 -2.89 -2.88 
  #Dev objf:   -11.73 -5.17 -4.46 -4.21 -4.06 -3.96 -3.88 -3.82 -3.79 -3.73 -3.69 -3.68 -3.63 
  # -3.61 -3.59 -3.58 -3.54 -3.54 -3.53 -3.51 -3.50 -3.47 -3.47 -3.46 -3.44 -3.44 -3.42 -3.42 
  # -3.42 -3.42 -3.40 -3.36 -3.35 -3.35 -3.34 -3.34 -3.34 -3.33 -3.32 -3.32 -3.31 -3.31 -3.31 
  # -3.30 -3.29 -3.29 -3.29 -3.28 -3.28 -3.28 -3.27 -3.27 -3.26 -3.27 -3.27 -3.26 -3.25 -3.26 
  # -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.25 -3.24 -3.25 -3.25 -3.24 -3.24 
  # -3.25 -3.25 -3.24 -3.24 -3.25 -3.26 -3.25 -3.25 -3.24 -3.25 -3.25 -3.24 -3.25 -3.25 -3.25 
  # -3.24 -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.26 -3.26 -3.26 -3.26 
  # -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.28 -3.28 -3.28 -3.28 -3.29 -3.29 -3.29 
  
  # Begin configuration section.
  affix=1a
  dir=exp/rnnlm_lstm_${affix}
  enhan=$1
  embedding_dim=2048
  lstm_rpd=512
  lstm_nrpd=512
  stage=-10
  train_stage=-10
  
  # variables for lattice rescoring
  run_lat_rescore=true
  run_nbest_rescore=true
  use_backward_model=true
  ac_model_dir=exp/chain/tdnn1a_sp
  decode_dir_suffix=rnnlm_lstm_${affix}
  ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
                # if it's set, it merges histories in the lattice if they share
                # the same ngram history and this prevents the lattice from 
                # exploding exponentially
  
  
  . cmd.sh
  . utils/parse_options.sh
  
  srcdir=data/local/local_lm
  lexicon=data/local/dict/lexiconp.txt
  text_dir=data/rnnlm/text_nosp_${affix}
  mkdir -p $dir/config
  set -e
  
  for f in $lexicon; do
    [ ! -f $f ] && \
      echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
  done
  
  #prepare training and dev data
  if [ $stage -le 0 ]; then
    mkdir -p $text_dir
    cp $srcdir/train.rnn $text_dir/chime4.txt.tmp
    sed -e "s/<RNN_UNK>/<UNK>/g" $text_dir/chime4.txt.tmp > $text_dir/chime4.txt
    cp $srcdir/valid.rnn $text_dir/dev.txt
  fi
  
  if [ $stage -le 1 ]; then
    cp data/lang_chain/words.txt $dir/config/words.txt
    n=`cat $dir/config/words.txt | wc -l`
    echo "<brk> $n" >> $dir/config/words.txt
    # words that are not present in words.txt but are in the training or dev data, will be
    # mapped to <SPOKEN_NOISE> during training.
    echo "<UNK>" >$dir/config/oov.txt
  
    cat > $dir/config/data_weights.txt <<EOF
  chime4   3   1.0
  EOF
  
    rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
                               --unk-word="<UNK>" \
                               --data-weights-file=$dir/config/data_weights.txt \
                               $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
  
    # choose features
    rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
                             --use-constant-feature=true \
                             --special-words='<s>,</s>,<UNK>,<brk>' \
                             $dir/config/words.txt > $dir/config/features.txt
  
    cat >$dir/config/xconfig <<EOF
  input dim=$embedding_dim name=input
  relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
  fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
  relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
  fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
  relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
  output-layer name=output include-log-softmax=false dim=$embedding_dim
  EOF
    rnnlm/validate_config_dir.sh $text_dir $dir/config
  fi
  
  if [ $stage -le 2 ]; then
    rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
  fi
  
  # Train model with forward data(forward model)
  if [ $stage -le 3 ]; then
    rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
                    --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
  fi
  
  # Train another model with reversed data(backward model)
  if [ $stage -le 4 ] && $use_backward_model; then
    local/rnnlm/run_lstm_back.sh --embedding-dim $embedding_dim \
      --lstm-rpd $lstm_rpd --lstm-nrpd $lstm_nrpd \
      --affix $affix
  fi
  
  # Since lattice-rescoring performs worse but faster than nbest-rescoring,
  # we only use it to evaluate how good our forward model is.
  LM=5gkn_5k # using the 5-gram lm from run_lmrescore_tdnn.sh
  tgtdir=${ac_model_dir}_smbr_lmrescore
  if [ $stage -le 5 ] && $run_lat_rescore; then
    echo "$0: Perform lattice-rescoring on $ac_model_dir"
    for decode_set in dt05_real dt05_simu et05_real et05_simu; do
      decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
  
      # Lattice rescoring
      rnnlm/lmrescore_pruned.sh \
        --cmd "$train_cmd --mem 2G" \
        --weight 0.8 --max-ngram-order $ngram_order \
        data/lang_test_$LM $dir \
        data/${decode_set}_${enhan}_chunked ${decode_dir} \
        $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix} &
    done
    
    wait
    
    # calc wers for lattice-rescoring results
    local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix} \
        $tgtdir/graph_tgpr_5k \
        > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
    head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
  fi
  
  nbest=100
  rnnweight=0.8
  if [ $stage -le 6 ] && $run_nbest_rescore; then
    echo "$0: Perform nbest-rescoring on $ac_model_dir"
    for decode_set in dt05_real dt05_simu et05_real et05_simu; do
      decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
      (
      # Lattice rescoring
      rnnlm/lmrescore_nbest.sh \
        --cmd "$train_cmd --mem 2G" --N $nbest \
        $rnnweight data/lang_test_$LM $dir \
        data/${decode_set}_${enhan}_chunked ${decode_dir} \
        $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}
  
      if $use_backward_model; then
        rnnlm/lmrescore_nbest_back.sh \
          --cmd "$train_cmd --mem 2G" --N $nbest \
          $rnnweight data/lang_test_$LM ${dir}_back \
          data/${decode_set}_${enhan}_chunked \
          $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
          $tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi
      fi
      ) &
    done
    wait
    # calc wers for nbest-rescoring results
    if $use_backward_model; then
      local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi \
        $tgtdir/graph_tgpr_5k \
        > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
      head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
    else
      local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
          $tgtdir/graph_tgpr_5k \
          > $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
      head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
    fi
  fi
  
  exit 0