Blame view
egs/wsj/s5/steps/nnet/align.sh
5.53 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/bin/bash # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely) # Apache 2.0 # Aligns 'data' to sequences of transition-ids using Neural Network based acoustic model. # Optionally produces alignment in lattice format, this is handy to get word alignment. # Begin configuration section. nj=4 cmd=run.pl stage=0 # Begin configuration. scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" beam=10 retry_beam=40 nnet_forward_opts="--no-softmax=true --prior-scale=1.0" ivector= # rx-specifier with i-vectors (ark-with-vectors), text= # (optional) transcipts we align to, align_to_lats=false # optionally produce alignment in lattice format lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10" lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1" use_gpu="no" # yes|no|optionaly # End configuration options. [ $# -gt 0 ] && echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh # source the path. . parse_options.sh || exit 1; set -euo pipefail if [ $# != 4 ]; then echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>" echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." exit 1; fi data=$1 lang=$2 srcdir=$3 dir=$4 mkdir -p $dir/log echo $nj > $dir/num_jobs sdata=$data/split$nj [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt cp $lang/phones.txt $dir cp $srcdir/{tree,final.mdl} $dir || exit 1; # Select default locations to model files nnet=$srcdir/final.nnet; class_frame_counts=$srcdir/ali_train_pdf.counts feature_transform=$srcdir/final.feature_transform model=$dir/final.mdl # Check that files exist for f in $sdata/1/feats.scp $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do [ ! -f $f ] && echo "$0: missing file $f" && exit 1; done [ -z "$text" -a ! -f $sdata/1/text ] && echo "$0: missing file $f" && exit 1 # PREPARE FEATURE EXTRACTION PIPELINE # import config, online_cmvn_opts= cmvn_opts= delta_opts= D=$srcdir [ -e $D/online_cmvn_opts ] && online_cmvn_opts=$(cat $D/online_cmvn_opts) [ -e $D/cmvn_opts ] && cmvn_opts=$(cat $D/cmvn_opts) [ -e $D/delta_opts ] && delta_opts=$(cat $D/delta_opts) # # Create the feature stream, feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |" # apply-cmvn-online (optional), [ -n "$online_cmvn_opts" -a ! -f $nndir/global_cmvn_stats.mat ] && echo "$0: Missing $nndir/global_cmvn_stats.mat" && exit 1 [ -n "$online_cmvn_opts" ] && feats="$feats apply-cmvn-online $online_cmvn_opts --spk2utt=ark:$srcdata/spk2utt $nndir/global_cmvn_stats.mat ark:- ark:- |" # apply-cmvn (optional), [ -n "$cmvn_opts" -a ! -f $sdata/1/cmvn.scp ] && echo "$0: Missing $sdata/1/cmvn.scp" && exit 1 [ -n "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |" # add-deltas (optional), [ -n "$delta_opts" ] && feats="$feats add-deltas $delta_opts ark:- ark:- |" # add-ivector (optional), if [ -e $D/ivector_dim ]; then [ -z $ivector ] && echo "Missing --ivector, they were used in training!" && exit 1 # Get the tool, ivector_append_tool=append-vector-to-feats # default, [ -e $D/ivector_append_tool ] && ivector_append_tool=$(cat $D/ivector_append_tool) # Check dims, feats_job_1=$(sed 's:JOB:1:g' <(echo $feats)) dim_raw=$(feat-to-dim "$feats_job_1" -) dim_raw_and_ivec=$(feat-to-dim "$feats_job_1 $ivector_append_tool ark:- '$ivector' ark:- |" -) dim_ivec=$((dim_raw_and_ivec - dim_raw)) [ $dim_ivec != "$(cat $D/ivector_dim)" ] && \ echo "Error, i-vector dim. mismatch (expected $(cat $D/ivector_dim), got $dim_ivec in '$ivector')" && \ exit 1 # Append to feats, feats="$feats $ivector_append_tool ark:- '$ivector' ark:- |" fi # nnet-forward, feats="$feats nnet-forward $nnet_forward_opts --feature-transform=$feature_transform --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet ark:- ark:- |" # echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'" # Map oovs in reference transcription, oov=`cat $lang/oov.int` || exit 1; [ -z "$text" ] && text=$sdata/JOB/text tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $text |"; # We could just use align-mapped in the next line, but it's less efficient as it compiles the # training graphs one by one. if [ $stage -le 0 ]; then $cmd JOB=1:$nj $dir/log/align.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \ align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \ "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1; fi # Optionally align to lattice format (handy to get word alignment) if [ "$align_to_lats" == "true" ]; then echo "$0: aligning also to lattices '$dir/lat.*.gz'" $cmd JOB=1:$nj $dir/log/align_lat.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $lats_graph_scales $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \ latgen-faster-mapped $lats_decode_opts --word-symbol-table=$lang/words.txt $dir/final.mdl ark:- \ "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; fi echo "$0: done aligning data." |