Blame view

egs/wsj/s5/steps/nnet/align.sh 5.53 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  #!/bin/bash
  # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely)
  # Apache 2.0
  
  # Aligns 'data' to sequences of transition-ids using Neural Network based acoustic model.
  # Optionally produces alignment in lattice format, this is handy to get word alignment.
  
  # Begin configuration section.
  nj=4
  cmd=run.pl
  stage=0
  # Begin configuration.
  scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
  beam=10
  retry_beam=40
  nnet_forward_opts="--no-softmax=true --prior-scale=1.0"
  ivector=            # rx-specifier with i-vectors (ark-with-vectors),
  text= # (optional) transcipts we align to,
  
  align_to_lats=false # optionally produce alignment in lattice format
   lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10"
   lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1"
  
  use_gpu="no" # yes|no|optionaly
  # End configuration options.
  
  [ $# -gt 0 ] && echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh # source the path.
  . parse_options.sh || exit 1;
  
  set -euo pipefail
  
  if [ $# != 4 ]; then
     echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
     echo "e.g.:  $0 data/train data/lang exp/tri1 exp/tri1_ali"
     echo "main options (for others, see top of script file)"
     echo "  --config <config-file>                           # config containing options"
     echo "  --nj <nj>                                        # number of parallel jobs"
     echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
     exit 1;
  fi
  
  data=$1
  lang=$2
  srcdir=$3
  dir=$4
  
  mkdir -p $dir/log
  echo $nj > $dir/num_jobs
  sdata=$data/split$nj
  [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
  
  utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt
  cp $lang/phones.txt $dir
  
  cp $srcdir/{tree,final.mdl} $dir || exit 1;
  
  # Select default locations to model files
  nnet=$srcdir/final.nnet;
  class_frame_counts=$srcdir/ali_train_pdf.counts
  feature_transform=$srcdir/final.feature_transform
  model=$dir/final.mdl
  
  # Check that files exist
  for f in $sdata/1/feats.scp $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do
    [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
  done
  [ -z "$text" -a ! -f $sdata/1/text ] && echo "$0: missing file $f" && exit 1
  
  
  # PREPARE FEATURE EXTRACTION PIPELINE
  # import config,
  online_cmvn_opts=
  cmvn_opts=
  delta_opts=
  D=$srcdir
  [ -e $D/online_cmvn_opts ] && online_cmvn_opts=$(cat $D/online_cmvn_opts)
  [ -e $D/cmvn_opts ] && cmvn_opts=$(cat $D/cmvn_opts)
  [ -e $D/delta_opts ] && delta_opts=$(cat $D/delta_opts)
  #
  # Create the feature stream,
  feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
  # apply-cmvn-online (optional),
  [ -n "$online_cmvn_opts" -a ! -f $nndir/global_cmvn_stats.mat ] && echo "$0: Missing $nndir/global_cmvn_stats.mat" && exit 1
  [ -n "$online_cmvn_opts" ] && feats="$feats apply-cmvn-online $online_cmvn_opts --spk2utt=ark:$srcdata/spk2utt $nndir/global_cmvn_stats.mat ark:- ark:- |"
  # apply-cmvn (optional),
  [ -n "$cmvn_opts" -a ! -f $sdata/1/cmvn.scp ] && echo "$0: Missing $sdata/1/cmvn.scp" && exit 1
  [ -n "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
  # add-deltas (optional),
  [ -n "$delta_opts" ] && feats="$feats add-deltas $delta_opts ark:- ark:- |"
  
  # add-ivector (optional),
  if [ -e $D/ivector_dim ]; then
    [ -z $ivector ] && echo "Missing --ivector, they were used in training!" && exit 1
    # Get the tool,
    ivector_append_tool=append-vector-to-feats # default,
    [ -e $D/ivector_append_tool ] && ivector_append_tool=$(cat $D/ivector_append_tool)
    # Check dims,
    feats_job_1=$(sed 's:JOB:1:g' <(echo $feats))
    dim_raw=$(feat-to-dim "$feats_job_1" -)
    dim_raw_and_ivec=$(feat-to-dim "$feats_job_1 $ivector_append_tool ark:- '$ivector' ark:- |" -)
    dim_ivec=$((dim_raw_and_ivec - dim_raw))
    [ $dim_ivec != "$(cat $D/ivector_dim)" ] && \
      echo "Error, i-vector dim. mismatch (expected $(cat $D/ivector_dim), got $dim_ivec in '$ivector')" && \
      exit 1
    # Append to feats,
    feats="$feats $ivector_append_tool ark:- '$ivector' ark:- |"
  fi
  
  # nnet-forward,
  feats="$feats nnet-forward $nnet_forward_opts --feature-transform=$feature_transform --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet ark:- ark:- |"
  #
  
  echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
  
  # Map oovs in reference transcription,
  oov=`cat $lang/oov.int` || exit 1;
  [ -z "$text" ] && text=$sdata/JOB/text
  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $text |";
  # We could just use align-mapped in the next line, but it's less efficient as it compiles the
  # training graphs one by one.
  if [ $stage -le 0 ]; then
    $cmd JOB=1:$nj $dir/log/align.JOB.log \
      compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
      align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \
        "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
  fi
  
  # Optionally align to lattice format (handy to get word alignment)
  if [ "$align_to_lats" == "true" ]; then
    echo "$0: aligning also to lattices '$dir/lat.*.gz'"
    $cmd JOB=1:$nj $dir/log/align_lat.JOB.log \
      compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $lats_graph_scales $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
      latgen-faster-mapped $lats_decode_opts --word-symbol-table=$lang/words.txt $dir/final.mdl ark:- \
        "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
  fi
  
  echo "$0: done aligning data."