Blame view
egs/wsj/s5/steps/nnet3/align_lats.sh
6.17 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
#!/bin/bash # Copyright 2012 Brno University of Technology (Author: Karel Vesely) # 2013 Johns Hopkins University (Author: Daniel Povey) # 2015 Vijayaditya Peddinti # 2016 Vimal Manohar # 2017 Pegah Ghahremani # Apache 2.0 # Computes training alignments using nnet3 DNN, with output to lattices. # Begin configuration section. nj=4 cmd=run.pl stage=-1 # Begin configuration. scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" acoustic_scale=0.1 beam=20 iter=final frames_per_chunk=50 extra_left_context=0 extra_right_context=0 extra_left_context_initial=-1 extra_right_context_final=-1 online_ivector_dir= graphs_scp= generate_ali_from_lats=false # If true, alingments generated from lattices. # End configuration options. echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh # source the path. . parse_options.sh || exit 1; if [ $# != 4 ]; then echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>" echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." exit 1; fi data=$1 lang=$2 srcdir=$3 dir=$4 oov=`cat $lang/oov.int` || exit 1; mkdir -p $dir/log echo $nj > $dir/num_jobs sdata=$data/split${nj} [[ -d $sdata && $data/feats.scp -ot $sdata ]] || \ split_data.sh $data $nj || exit 1; extra_files= if [ ! -z "$online_ivector_dir" ]; then steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1 extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" fi for f in $srcdir/tree $srcdir/${iter}.mdl $data/feats.scp $lang/L.fst $extra_files; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done cp $srcdir/{tree,${iter}.mdl} $dir || exit 1; utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; cp $lang/phones.txt $dir || exit 1; ## Set up features. Note: these are different from the normal features ## because we have one rspecifier that has the features for the entire ## training set, not separate ones for each batch. echo "$0: feature type is raw" cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` cp $srcdir/cmvn_opts $dir 2>/dev/null feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" ivector_opts= if [ ! -z "$online_ivector_dir" ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" fi echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir" frame_subsampling_opt= if [ -f $srcdir/frame_subsampling_factor ]; then # e.g. for 'chain' systems frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor) frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" cp $srcdir/frame_subsampling_factor $dir if [[ $frame_subsampling_factor -gt 1 ]]; then # Assume a chain system, check agrument sanity. if [[ ! ($scale_opts == *--self-loop-scale=1.0* && $scale_opts == *--transition-scale=1.0* && $acoustic_scale = '1.0') ]]; then echo "$0: ERROR: frame-subsampling-factor is not 1, assuming a chain system." echo "... You should pass the following options to this script:" echo " --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0'" \ "--acoustic_scale 1.0" fi fi fi if [ ! -z "$graphs_scp" ]; then if [ ! -f $graphs_scp ]; then echo "Could not find graphs $graphs_scp" && exit 1 fi tra="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $graphs_scp |" prog=compile-train-graphs-fsts else tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; prog=compile-train-graphs fi if [ $stage -le 0 ]; then ## because nnet3-latgen-faster doesn't support adding the transition-probs to the ## graph itself, we need to bake them into the compiled graphs. This means we can't reuse previously compiled graphs, ## because the other scripts write them without transition probs. $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ $prog --read-disambig-syms=$lang/phones/disambig.int \ $scale_opts \ $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" \ "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1 fi if [ $stage -le 1 ]; then # Warning: nnet3-latgen-faster doesn't support a retry-beam so you may get more # alignment errors (however, it does have a default min-active=200 so this # will tend to reduce alignment errors). # --allow_partial=false makes sure we reach the end of the decoding graph. # --word-determinize=false makes sure we retain the alternative pronunciations of # words (including alternatives regarding optional silences). # --lattice-beam=$beam keeps all the alternatives that were within the beam, # it means we do no pruning of the lattice (lattices from a training transcription # will be small anyway). $cmd JOB=1:$nj $dir/log/generate_lattices.JOB.log \ nnet3-latgen-faster --acoustic-scale=$acoustic_scale $ivector_opts $frame_subsampling_opt \ --frames-per-chunk=$frames_per_chunk \ --extra-left-context=$extra_left_context \ --extra-right-context=$extra_right_context \ --extra-left-context-initial=$extra_left_context_initial \ --extra-right-context-final=$extra_right_context_final \ --beam=$beam --lattice-beam=$beam \ --allow-partial=false --word-determinize=false \ $srcdir/${iter}.mdl "ark:gunzip -c $dir/fsts.JOB.gz |" \ "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; fi if [ $stage -le 2 ] && $generate_ali_from_lats; then # If generate_alignments is true, ali.*.gz is generated in lats dir $cmd JOB=1:$nj $dir/log/generate_alignments.JOB.log \ lattice-best-path --acoustic-scale=$acoustic_scale "ark:gunzip -c $dir/lat.JOB.gz |" \ ark:/dev/null "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; fi echo "$0: done generating lattices from training transcripts." |