Yannick Estève / ONTRAC-Kaldi

Blame view

egs/reverb/s5/run.sh 6.34 KB
  #!/bin/bash
  
  # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe)
  #                     Johns Hopkins University (author: Szu-Jui Chen)
  #                     Johns Hopkins University (author: Aswin Shanmugam Subramanian)
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  # This is a shell script, but it's recommended that you run the commands one by
  # one by copying and pasting into the shell.
  # Caution: some of the graph creation steps use quite a bit of memory, so you
  # should run this on a machine that has sufficient memory.
  
  # Requirements) matlab and tcsh
  if [ ! `which tcsh` ]; then
    echo "Install tcsh, which is used in some REVERB scripts"
    exit 1
  fi
  if [ ! `which matlab` ]; then
    echo "Install matlab, which is used to generate multi-condition data"
    exit 1
  fi
  
  . ./cmd.sh
  . ./path.sh
  
  stage=0
  nch_se=8
  # flag for turing on computation of dereverberation measures
  compute_se=true
  # please make sure that you or your institution have the license to report PESQ before turning on the below flag
  enable_pesq=false
  
  . utils/parse_options.sh
  # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :
  # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',
  set -euxo pipefail
  
  # please make sure to set the paths of the REVERB and WSJ0 data
  if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then
    reverb=/export/corpora5/REVERB_2014/REVERB
    export wsjcam0=/export/corpora3/LDC/LDC95S24/wsjcam0
    # set LDC WSJ0 directory to obtain LMs
    # REVERB data directory only provides bi-gram (bcb05cnp), but this recipe also uses 3-gram (tcb05cnp.z)
    export wsj0=/export/corpora5/LDC/LDC93S6A/11-13.1 #LDC93S6A or LDC93S6B
    # It is assumed that there will be a 'wsj0' subdirectory
    # within the top-level corpus directory
  else
    echo "Set the data directory locations." && exit 1;
  fi
  
  #training set and test set
  train_set=tr_simu_8ch
  test_sets="dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_cln et_cln"
  
  # The language models with which to decode (tg_5k or bg_5k)
  lm="tg_5k"
  
  # number of jobs for feature extraction and model training
  nj=92
  # number of jobs for decoding
  decode_nj=10
  
  wavdir=${PWD}/wav
  pesqdir=${PWD}/local
  if [ ${stage} -le 1 ]; then
    # data preparation
    echo "stage 0: Data preparation"
    local/generate_data.sh --wavdir ${wavdir} ${wsjcam0}
    local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0}
    local/prepare_real_data.sh --wavdir ${wavdir} ${reverb}
  fi
  
  if [ $stage -le 2 ]; then
    local/run_wpe.sh --cmd "$train_cmd"
    local/run_beamform.sh --cmd "$train_cmd" ${wavdir}/WPE/
  fi
  
  # Compute dereverberation scores
  if [ $stage -le 3 ] && $compute_se; then
    if [ ! -d local/REVERB_scores_source ] || [ ! -d local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools/SRMRToolbox ] || [ ! -f local/PESQ ]; then
      # download and install speech enhancement evaluation tools
      local/download_se_eval_tool.sh
    fi
    local/compute_se_scores.sh --nch $nch_se --enable_pesq $enable_pesq $reverb $wavdir $pesqdir
    cat exp/compute_se_${nch_se}ch/scores/score_SimData
    cat exp/compute_se_${nch_se}ch/scores/score_RealData
  fi
  
  if [ $stage -le 4 ]; then
    # Prepare wsjcam0 clean data and wsj0 language model.
    local/wsjcam0_data_prep.sh $wsjcam0 $wsj0
    
    # Prepare merged BEEP/CMU dictionary.
    local/wsj_prepare_beep_dict.sh
  
    # Prepare wordlists, etc.
    utils/prepare_lang.sh data/local/dict "<NOISE>" data/local/lang_tmp data/lang
  
    # Prepare directory structure for clean data. Apply some language model fixes.
    local/wsjcam0_format_data.sh
  fi
  
  if [ $stage -le 5 ]; then
    for dset in ${train_set} ${test_sets}; do
      utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit
      utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset}
    done
  fi
  
  if [ $stage -le 6 ]; then
    # Extract MFCC features for train and test sets.
    mfccdir=mfcc
    for x in ${train_set} ${test_sets}; do
     steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \
       data/$x exp/make_mfcc/$x $mfccdir
     steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
    done
  fi
  
  if [ $stage -le 7 ]; then
    # Starting basic training on MFCC features
    steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
  		      data/${train_set} data/lang exp/mono
  fi
  
  if [ $stage -le 8 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/mono exp/mono_ali
  
    steps/train_deltas.sh --cmd "$train_cmd" \
  			2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1
  fi
  
  if [ $stage -le 9 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/tri1 exp/tri1_ali
  
    steps/train_lda_mllt.sh --cmd "$train_cmd" \
  			  4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2
  fi
  
  if [ $stage -le 10 ]; then
    utils/mkgraph.sh data/lang_test_$lm exp/tri2 exp/tri2/graph
    for dset in ${test_sets}; do
      steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
  		    exp/tri2/graph data/${dset} exp/tri2/decode_${dset} &
    done
    wait
  fi
  
  if [ $stage -le 11 ]; then
    steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  		    data/${train_set} data/lang exp/tri2 exp/tri2_ali
  
    steps/train_sat.sh --cmd "$train_cmd" \
  		     5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
  fi
  
  if [ $stage -le 12 ]; then
    utils/mkgraph.sh data/lang_test_$lm exp/tri3 exp/tri3/graph
    for dset in ${test_sets}; do
      steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
  			  exp/tri3/graph data/${dset} exp/tri3/decode_${dset} &
    done
    wait
  fi
  
  if [ $stage -le 13 ]; then
    # chain TDNN
    local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \
    --lm-suffix _test_$lm
  fi
  
  # get all WERs. 
  if [ $stage -le 14 ]; then
    local/get_results.sh
  fi