Blame view
egs/wsj/s5/steps/train_mono.sh
5.8 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) # 2019 Xiaohui Zhang # Apache 2.0 # To be run from .. # Flat start and monophone training, with delta-delta features. # This script applies cepstral mean normalization (per speaker). # Begin configuration section. nj=4 cmd=run.pl scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" num_iters=40 # Number of iterations of training max_iter_inc=30 # Last iter to increase #Gauss on. initial_beam=6 # beam used in the first iteration (set smaller to speed up initialization) regular_beam=10 # beam used after the first iteration retry_beam=40 totgauss=1000 # Target #Gaussians. careful=false boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38"; config= # name of config file. stage=-4 power=0.25 # exponent to determine number of gaussians from occurrence counts norm_vars=false # deprecated, prefer --cmvn-opts "--norm-vars=false" cmvn_opts= # can be used to add extra options to cmvn. delta_opts= # can be used to add extra options to add-deltas # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 3 ]; then echo "Usage: steps/train_mono.sh [options] <data-dir> <lang-dir> <exp-dir>" echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." exit 1; fi data=$1 lang=$2 dir=$3 oov_sym=`cat $lang/oov.int` || exit 1; mkdir -p $dir/log echo $nj > $dir/num_jobs sdata=$data/split$nj; [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; cp $lang/phones.txt $dir || exit 1; $norm_vars && cmvn_opts="--norm-vars=true $cmvn_opts" echo $cmvn_opts > $dir/cmvn_opts # keep track of options to CMVN. [ ! -z $delta_opts ] && echo $delta_opts > $dir/delta_opts # keep track of options to delta feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |" example_feats="`echo $feats | sed s/JOB/1/g`"; echo "$0: Initializing monophone system." [ ! -f $lang/phones/sets.int ] && exit 1; shared_phones_opt="--shared-phones=$lang/phones/sets.int" if [ $stage -le -3 ]; then # Note: JOB=1 just uses the 1st part of the features-- we only need a subset anyway. if ! feat_dim=`feat-to-dim "$example_feats" - 2>/dev/null` || [ -z $feat_dim ]; then feat-to-dim "$example_feats" - echo "error getting feature dimension" exit 1; fi $cmd JOB=1 $dir/log/init.log \ gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \ $dir/0.mdl $dir/tree || exit 1; fi numgauss=`gmm-info --print-args=false $dir/0.mdl | grep gaussians | awk '{print $NF}'` incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss if [ $stage -le -2 ]; then echo "$0: Compiling training graphs" $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/0.mdl $lang/L.fst \ "ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \ "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; fi if [ $stage -le -1 ]; then echo "$0: Aligning data equally (pass 0)" $cmd JOB=1:$nj $dir/log/align.0.JOB.log \ align-equal-compiled "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" ark,t:- \| \ gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \ $dir/0.JOB.acc || exit 1; fi # In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise # we fail to est "rare" phones and later on, they never align properly. if [ $stage -le 0 ]; then gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss --power=$power \ $dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl 2> $dir/log/update.0.log || exit 1; rm $dir/0.*.acc fi beam=$initial_beam # will change to regular_beam below after 1st pass # note: using slightly wider beams for WSJ vs. RM. x=1 while [ $x -lt $num_iters ]; do echo "$0: Pass $x" if [ $stage -le $x ]; then if echo $realign_iters | grep -w $x >/dev/null; then echo "$0: Aligning data" mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \ "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \ || exit 1; fi $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ gmm-acc-stats-ali $dir/$x.mdl "$feats" "ark:gunzip -c $dir/ali.JOB.gz|" \ $dir/$x.JOB.acc || exit 1; $cmd $dir/log/update.$x.log \ gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power $dir/$x.mdl \ "gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1; rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null fi if [ $x -le $max_iter_inc ]; then numgauss=$[$numgauss+$incgauss]; fi beam=$regular_beam x=$[$x+1] done ( cd $dir; rm final.{mdl,occs} 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs ) steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir utils/summarize_warnings.pl $dir/log steps/info/gmm_dir_info.pl $dir echo "$0: Done training monophone system in $dir" exit 0 # example of showing the alignments: # show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/ali.0.gz|" | head -4 |