Blame view
egs/aspire/s5/local/nnet3/segment_and_decode.sh
5.74 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
#!/bin/bash # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0. # This script generates the ctm files for dev_aspire, test_aspire and eval_aspire # for scoring with ASpIRE scoring server. # It also provides the WER for dev_aspire data. set -e # general opts iter= stage=0 sad_num_jobs=30 decode_num_jobs=30 affix= # segmentation opts sad_affix= sad_opts="--extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3" sad_graph_opts= sad_priors_opts= sad_stage=0 # ivector opts max_count=75 # parameter for extract_ivectors.sh sub_speaker_frames=6000 # decode opts decode_opts="--min-active 1000" lattice_beam=8 extra_left_context=0 # change for (B)LSTM extra_right_context=0 # change for BLSTM frames_per_chunk=50 # change for (B)LSTM acwt=0.1 # important to change this when using chain models post_decode_acwt=1.0 # important to change this when using chain models extra_left_context_initial=0 extra_right_context_final=0 score_opts="--min-lmwt 6 --max-lmwt 13" . ./cmd.sh [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh || exit 1; if [ $# -ne 6 ]; then echo "Usage: $0 [options] <data-dir> <sad-nnet-dir> <work-dir> <lang-dir> <graph-dir> <model-dir>" echo " Options:" echo " --stage (0|1|2) # start scoring script from part-way through." echo "e.g.:" echo "$0 dev_aspire data/lang exp/tri5a/graph_pp exp/nnet3/tdnn" exit 1; fi data_set=$1 #select from {dev_aspire, test_aspire, eval_aspire}* sad_nnet_dir=$2 sad_work_dir=$3 lang=$4 # data/lang graph=$5 #exp/tri5a/graph_pp dir=$6 # exp/nnet3/tdnn model_affix=`basename $dir` ivector_root_dir=exp/nnet3 affix=${affix:+_${affix}}${iter:+_iter${iter}} if [[ "$data_set" =~ "test_aspire" ]]; then out_file=single_dev_test${affix}_$model_affix.ctm act_data_set=test_aspire elif [[ "$data_set" =~ "eval_aspire" ]]; then out_file=single_eval${affix}_$model_affix.ctm act_data_set=eval_aspire elif [[ "$data_set" =~ "dev_aspire" ]]; then # we will just decode the directory without oracle segments file # as we would like to operate in the actual evaluation condition out_file=single_dev${affix}_${model_affix}.ctm act_data_set=dev_aspire else echo "$0: Unknown data-set $data_set" exit 1 fi if [ $stage -le 2 ]; then steps/segmentation/detect_speech_activity.sh \ --nj $sad_num_jobs --stage $sad_stage \ --affix "$sad_affix" --graph-opts "$sad_graph_opts" \ --transform-probs-opts "$sad_priors_opts" $sad_opts \ data/$data_set $sad_nnet_dir mfcc_hires $sad_work_dir \ $sad_work_dir/${data_set}${sad_affix:+_$sad_affix} || exit 1 fi segmented_data_set=${data_set}${sad_affix:+_$sad_affix} if [ $stage -le 3 ]; then if [ -f data/$act_data_set/ref.rttm ]; then if [ ! -f $sad_work_dir/${segmented_data_set}_seg/reco2file_and_channel ]; then awk '{print $2" "1}' $sad_work_dir/${segmented_data_set}_seg/segments | \ sort -u > $sad_work_dir/${segmented_data_set}_seg/reco2file_and_channel fi steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ --reco2file-and-channel=${sad_work_dir}/${segmented_data_set}_seg/reco2file_and_channel \ ${sad_work_dir}/${segmented_data_set}_seg/{utt2spk,segments,sys.rttm} || exit 1 export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin md-eval.pl -c 0.25 -r data/dev_aspire/ref.rttm \ -s ${sad_work_dir}/${segmented_data_set}_seg/sys.rttm > \ ${sad_work_dir}/${segmented_data_set}_seg/md_eval.log fi fi if [ $stage -le 4 ]; then utils/copy_data_dir.sh $sad_work_dir/${segmented_data_set}_seg \ data/${segmented_data_set}_seg_hires steps/compute_cmvn_stats.sh data/${segmented_data_set}_seg_hires utils/fix_data_dir.sh data/${segmented_data_set}_seg_hires fi if [ $stage -le 5 ]; then echo "Extracting i-vectors" # this does offline decoding. # the --sub-speaker-frames is optional; if provided, it will divide each speaker # up into "sub-speakers" of at least that many frames... can be useful if # acoustic conditions drift over time within the speaker's data. steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj $decode_num_jobs \ --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ data/${segmented_data_set}_seg_hires $lang $ivector_root_dir/extractor \ $ivector_root_dir/ivectors_${segmented_data_set}_seg fi decode_dir=$dir/decode_${segmented_data_set}_seg${affix}_pp if [ $stage -le 6 ]; then echo "Generating lattices" rm -f ${decode_dir}_tg/.error steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config \ --acwt $acwt --post-decode-acwt $post_decode_acwt $decode_opts \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial $extra_left_context_initial \ --extra-right-context-final $extra_right_context_final \ --frames-per-chunk "$frames_per_chunk" \ --skip-scoring true ${iter:+--iter $iter} --lattice-beam $lattice_beam \ --online-ivector-dir $ivector_root_dir/ivectors_${segmented_data_set}_seg \ $graph data/${segmented_data_set}_seg_hires ${decode_dir}_tg || \ { echo "$0: Error decoding" && exit 1; } fi if [ $stage -le 7 ]; then echo "Rescoring lattices" steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ --skip-scoring true \ ${lang}_pp_test{,_fg} data/${segmented_data_set}_seg_hires \ ${decode_dir}_{tg,fg}; fi decode_dir=${decode_dir}_fg if [ $stage -le 8 ]; then local/score_aspire.sh --cmd "$decode_cmd" \ $score_opts \ --word-ins-penalties "0.0,0.25,0.5,0.75,1.0" \ --ctm-beam 6 \ ${iter:+--iter $iter} \ --decode-mbr true \ --tune-hyper true \ $lang $decode_dir $act_data_set ${segmented_data_set}_seg $out_file fi |