Blame view
egs/chime5/s5b/local/run_recog.sh
5.67 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
#!/bin/bash # # Based mostly on the TED-LIUM and Switchboard recipe # # Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal) # Apache 2.0 # # This is a subset of run.sh to only perform recognition experiments with evaluation data # Begin configuration section. decode_nj=20 stage=0 enhancement=beamformit # for a new enhancement method, # change this variable and stage 4 # End configuration section . ./utils/parse_options.sh . ./cmd.sh . ./path.sh set -e # exit on error # chime5 main directory path # please change the path accordingly chime5_corpus=/export/corpora4/CHiME5 json_dir=${chime5_corpus}/transcriptions audio_dir=${chime5_corpus}/audio # training and test data train_set=train_worn_u100k test_sets="eval_${enhancement}_ref" # This script also needs the phonetisaurus g2p, srilm, beamformit ./local/check_tools.sh || exit 1 if [ $stage -le 4 ]; then # Beamforming using reference arrays # enhanced WAV directory enhandir=enhan for dset in eval; do for mictype in u01 u02 u03 u04 u05 u06; do local/run_beamformit.sh --cmd "$train_cmd" \ ${audio_dir}/${dset} \ ${enhandir}/${dset}_${enhancement}_${mictype} \ ${mictype} done done for dset in eval; do local/prepare_data.sh --mictype ref "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \ ${json_dir}/${dset} data/${dset}_${enhancement}_ref done fi if [ $stage -le 6 ]; then # fix speaker ID issue (thanks to Dr. Naoyuki Kanda) # add array ID to the speaker ID to avoid the use of other array information to meet regulations # Before this fix # $ head -n 2 data/eval_beamformit_ref_nosplit/utt2spk # P01_S01_U02_KITCHEN.ENH-0000192-0001278 P01 # P01_S01_U02_KITCHEN.ENH-0001421-0001481 P01 # After this fix # $ head -n 2 data/eval_beamformit_ref_nosplit_fix/utt2spk # P01_S01_U02_KITCHEN.ENH-0000192-0001278 P01_U02 # P01_S01_U02_KITCHEN.ENH-0001421-0001481 P01_U02 for dset in ${test_sets}; do utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit mkdir -p data/${dset}_nosplit_fix cp data/${dset}_nosplit/{segments,text,wav.scp} data/${dset}_nosplit_fix/ awk -F "_" '{print $0 "_" $3}' data/${dset}_nosplit/utt2spk > data/${dset}_nosplit_fix/utt2spk utils/utt2spk_to_spk2utt.pl data/${dset}_nosplit_fix/utt2spk > data/${dset}_nosplit_fix/spk2utt done # Split speakers up into 3-minute chunks. This doesn't hurt adaptation, and # lets us use more jobs for decoding etc. for dset in ${test_sets}; do utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit_fix data/${dset} done fi if [ $stage -le 7 ]; then # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc for x in ${test_sets}; do steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir utils/fix_data_dir.sh data/$x done fi if [ $stage -le 17 ]; then nnet3_affix=_${train_set}_cleaned for datadir in ${test_sets}; do utils/copy_data_dir.sh data/$datadir data/${datadir}_hires done for datadir in ${test_sets}; do steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${datadir}_hires || exit 1; steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; utils/fix_data_dir.sh data/${datadir}_hires || exit 1; done for data in $test_sets; do steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \ data/${data}_hires exp/nnet3${nnet3_affix}/extractor \ exp/nnet3${nnet3_affix}/ivectors_${data}_hires done fi if [ $stage -le 18 ]; then # First the options that are passed through to run_ivector_common.sh # (some of which are also used in this script directly). lm_suffix= # The rest are configs specific to this script. Most of the parameters # are just hardcoded at this level, in the commands below. affix=1a # affix for the TDNN directory name tree_affix= tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix} dir=exp/chain${nnet3_affix}/tdnn${affix}_sp # training options # training chunk-options chunk_width=140,100,160 # we don't need extra left/right context for TDNN systems. chunk_left_context=0 chunk_right_context=0 utils/mkgraph.sh \ --self-loop-scale 1.0 data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; frames_per_chunk=$(echo $chunk_width | cut -d, -f1) rm $dir/.error 2>/dev/null || true for data in $test_sets; do ( steps/nnet3/decode.sh \ --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $chunk_left_context \ --extra-right-context $chunk_right_context \ --extra-left-context-initial 0 \ --extra-right-context-final 0 \ --frames-per-chunk $frames_per_chunk \ --nj 8 --cmd "$decode_cmd" --num-threads 4 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1 ) || touch $dir/.error & done wait [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 fi if [ $stage -le 20 ]; then # final scoring to get the official challenge result # please specify both dev and eval set directories so that the search parameters # (insertion penalty and language model weight) will be tuned using the dev set local/score_for_submit.sh \ --dev exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_dev_${enhancement}_ref \ --eval exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_eval_${enhancement}_ref fi |