Blame view

egs/wsj/s5/steps/nnet3/align.sh 4.89 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
  #!/bin/bash
  # Copyright 2012  Brno University of Technology (Author: Karel Vesely)
  #           2013  Johns Hopkins University (Author: Daniel Povey)
  #           2015  Vijayaditya Peddinti
  #           2016  Vimal Manohar
  # Apache 2.0
  
  # Computes training alignments using nnet3 DNN
  # Warning: this script uses GPUs by default, and this is generally not
  # an efficient use of GPUs. Set --use-gpu false to make it run on CPU.
  
  # Begin configuration section.
  nj=4
  cmd=run.pl
  # Begin configuration.
  scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
  beam=10
  retry_beam=40
  iter=final
  use_gpu=true
  frames_per_chunk=50
  extra_left_context=0
  extra_right_context=0
  extra_left_context_initial=-1
  extra_right_context_final=-1
  online_ivector_dir=
  graphs_scp=
  # End configuration options.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh # source the path.
  . parse_options.sh || exit 1;
  
  if [ $# != 4 ]; then
     echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
     echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
     echo "Warning: this script uses GPUs by default, and this is generally not"
     echo "an efficient use of GPUs. Set --use-gpu false to make it run on CPU."
     echo ""
     echo "main options (for others, see top of script file)"
     echo "  --config <config-file>                           # config containing options"
     echo "  --nj <nj>                                        # number of parallel jobs"
     echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
     exit 1;
  fi
  
  data=$1
  lang=$2
  srcdir=$3
  dir=$4
  
  oov=`cat $lang/oov.int` || exit 1;
  mkdir -p $dir/log
  echo $nj > $dir/num_jobs
  sdata=$data/split${nj}
  [[ -d $sdata && $data/feats.scp -ot $sdata ]] || \
     split_data.sh $data $nj || exit 1;
  
  if $use_gpu; then
    queue_opt="--gpu 1"
    gpu_opt="--use-gpu=yes"
  else
    queue_opt=""
    gpu_opt="--use-gpu=no"
  fi
  
  extra_files=
  if [ ! -z "$online_ivector_dir" ]; then
    steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
    extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
  fi
  
  for f in $srcdir/tree $srcdir/${iter}.mdl $data/feats.scp $lang/L.fst $extra_files; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
  
  utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
  cp $lang/phones.txt $dir || exit 1;
  ## Set up features.  Note: these are different from the normal features
  ## because we have one rspecifier that has the features for the entire
  ## training set, not separate ones for each batch.
  echo "$0: feature type is raw"
  
  cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
  cp $srcdir/cmvn_opts $dir 2>/dev/null
  
  feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
  
  ivector_opts=
  if [ ! -z "$online_ivector_dir" ]; then
    ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
    ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
  fi
  
  echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
  
  frame_subsampling_opt=
  if [ -f $srcdir/frame_subsampling_factor ]; then
    # e.g. for 'chain' systems
    frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor)
    frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor"
    cp $srcdir/frame_subsampling_factor $dir
    if [ "$frame_subsampling_factor" -gt 1 ] && \
       [ "$scale_opts" == "--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" ]; then
      echo "$0: frame-subsampling-factor is not 1 (so likely a chain system),"
      echo "...  but the scale opts are the defaults.  You probably want"
      echo "--scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0'"
      sleep 1
    fi
  fi
  
  if [ ! -z "$graphs_scp" ]; then
    if [ ! -f $graphs_scp ]; then
      echo "Could not find graphs $graphs_scp" && exit 1
    fi
    tra="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $graphs_scp |"
    prog=compile-train-graphs-fsts
  else
    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
    prog=compile-train-graphs
  fi
  
  $cmd $queue_opt JOB=1:$nj $dir/log/align.JOB.log \
    $prog --read-disambig-syms=$lang/phones/disambig.int $dir/tree \
    $srcdir/${iter}.mdl  $lang/L.fst "$tra" ark:- \| \
    nnet3-align-compiled $scale_opts $ivector_opts $frame_subsampling_opt \
    --frames-per-chunk=$frames_per_chunk \
    --extra-left-context=$extra_left_context \
    --extra-right-context=$extra_right_context \
    --extra-left-context-initial=$extra_left_context_initial \
    --extra-right-context-final=$extra_right_context_final \
    $gpu_opt --beam=$beam --retry-beam=$retry_beam \
    $srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
  
  steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir
  
  echo "$0: done aligning data."