Blame view

Scripts/steps/decode_sgmm2_rescore_project.sh 7.77 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
  #!/bin/bash
  
  # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  
  # This script does decoding with an SGMM system, by rescoring lattices
  # generated from a previous SGMM system.  This version does the "predictive"
  # SGMM, where we subtract some constant times the log-prob of the left
  # few spliced frames, and the same for the right few.
  # The directory with the lattices
  # is assumed to contain any speaker vectors, if used.  This script just
  # adds into the acoustic scores, (some constant, default -0.25) times
  # the acoustic score of the left model, and the same for the right model.
  
  # the lattices one final time, using the same setup as the final decoding
  # pass of the source dir.  The assumption is that the model may have
  # been discriminatively trained.
  
  # If the system was built on top of fMLLR transforms from a conventional system,
  # you should provide the --transform-dir option.
  
  # Begin configuration section.
  stage=0
  transform_dir=    # dir to find fMLLR transforms.
  cmd=run.pl
  iter=final
  prob_scale=-0.25
  dimensions=0:13:104:117
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f ./path.sh ] && . ./path.sh; # source the path.
  . parse_options.sh || exit 1;
  
  if [ $# -ne 5 ]; then
    echo "Usage: steps/decode_sgmm_rescore_project.sh [options] <full-lda-mat> <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
    echo " e.g.: steps/decode_sgmm_rescore_project.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
    echo "     exp/tri2b/full.mat exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a/decode_dev93_tgpr_predict"
    echo "main options (for others, see top of script file)"
    echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
    echo "                                           # where we can find transforms for SAT systems."
    echo "  --config <config-file>                   # config containing options"
    echo "  --cmd <cmd>                              # Command to run in parallel with"
    echo "  --prob-scale <scale>                     # Default -0.25, scale on left and right models."
    exit 1;
  fi
  
  full_lda_mat=$1
  graphdir=$2
  data=$3
  olddir=$4
  dir=$5
  srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
  
  for f in $full_lda_mat $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz \
     $olddir/gselect.1.gz $srcdir/$iter.mdl; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  nj=`cat $olddir/num_jobs` || exit 1;
  sdata=$data/split$nj;
  splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
  
  mkdir -p $dir/log
  [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
  echo $nj > $dir/num_jobs
  
  if [ -f $olddir/vecs.1 ]; then
    echo "$0: using speaker vectors from $olddir"
    spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
  else
    echo "$0: no speaker vectors found."
    spkvecs_opt=
  fi
  
  if [ $stage -le 0 ]; then
    # Get full LDA+MLLT mat and its inverse.  Note: the full LDA+MLLT mat is
    # the LDA+MLLT mat, plus the "rejected" rows of the LDA matrix.
    $cmd $dir/log/get_full_lda.log \
      get-full-lda-mat $srcdir/final.mat $full_lda_mat $dir/full.mat $dir/full_inv.mat || exit 1;
  fi
  
  if [ $stage -le 1 ]; then
    left_start=`echo $dimensions | cut '-d:' -f 1`;
    left_end=`echo $dimensions | cut '-d:' -f 2`;
    right_start=`echo $dimensions | cut '-d:' -f 3`;
    right_end=`echo $dimensions | cut '-d:' -f 4`;
  
    # Prepare left and right models.  For now, the dimensions are hardwired (e.g., 13 MFCCs and splice 9 frames).
    # Note: the choice of dividing by the prob of the left 4 and the right 4 frames is a bit arbitrary and
    # we could investigate different configurations.
    $cmd $dir/log/left.log \
      sgmm2-project --start-dim=$left_start --end-dim=$left_end $srcdir/final.mdl $dir/full.mat $dir/left.mdl $dir/left.mat || exit 1;
    $cmd $dir/log/right.log \
      sgmm2-project --start-dim=$right_start --end-dim=$right_end $srcdir/final.mdl $dir/full.mat $dir/right.mdl $dir/right.mat || exit 1;
  fi
  
  
  # we apply the scaling on the new acoustic probs by adding the inverse
  # of that to the old acoustic probs, and then later inverting again.
  # this has to do with limitations in sgmm2-rescore-lattice: we can only
  # scale the *old* acoustic probs, not the new ones.
  inverse_prob_scale=`perl -e "print (1.0 / $prob_scale);"`
  cur_lats="ark:gunzip -c $olddir/lat.JOB.gz | lattice-scale --acoustic-scale=$inverse_prob_scale ark:- ark:- |"
  
  ## Set up features.  Note: we only support LDA+MLLT features, this
  ## is inherent in the method, we could not support deltas.
  
  for model_type in left right; do
  
    feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" # spliced features.
    if [ ! -z "$transform_dir" ]; then  # using speaker-specific transforms.
       # we want to transform in the sequence: $dir/full.mat, then the result of
       # (extend-transform-dim $transform_dir/trans.JOB), then $dir/full_inv.mat to
       # get back to the spliced space, then the left.mat or right.mat.  But
       # note that compose-transforms operates in matrix-multiplication order,
       # which is opposite from the "order of applying the transforms" order.
       new_dim=$[`copy-matrix --binary=false $dir/full.mat - | wc -l` - 1]; # 117 in normal case.
       feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk 'ark:extend-transform-dim --new-dimension=$new_dim ark:$transform_dir/trans.JOB ark:- | compose-transforms ark:- $dir/full.mat ark:- | compose-transforms $dir/full_inv.mat ark:- ark:- | compose-transforms $dir/${model_type}.mat ark:- ark:- |' ark:- ark:- |"
    else  # else, we transform with the "left" or "right" matrix; these transform from the
          # spliced space.
       feats="$feats transform-feats $dir/${model_type}.mat |"
       # If we don't have the --transform-dir option, make sure the model was
       # trained in the same way.
       if grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
         echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
         echo "  but you are not providing the --transform-dir option in test time."
       fi
    fi
    if [ -f $olddir/trans.1 ]; then
       echo "$0: warning: not using transforms in $olddir (this is just a "
       echo " limitation of the script right now, and could be fixed)."
    fi
    
    if [ $stage -le 2 ]; then
      echo "Getting gselect info for $model_type model."
      $cmd JOB=1:$nj $dir/log/gselect.$model_type.JOB.log \
         sgmm2-gselect $dir/$model_type.mdl "$feats" \
         "ark,t:|gzip -c >$dir/gselect.$model_type.JOB.gz" || exit 1;
    fi
    gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.$model_type.JOB.gz|"
  
  
    # Rescore the state-level lattices with the model provided.  Just
    # one command in this script.
    # The --old-acoustic-scale=1.0 option means we just add the scores
    # to the old scores.
    if [ $stage -le 3 ]; then
      echo "$0: rescoring lattices with $model_type model"
      $cmd JOB=1:$nj $dir/log/rescore.${model_type}.JOB.log \
        sgmm2-rescore-lattice --old-acoustic-scale=1.0 "$gselect_opt" $spkvecs_opt \
        $dir/$model_type.mdl "$cur_lats" "$feats" \
        "ark:|gzip -c > $dir/lat.${model_type}.JOB.gz" || exit 1;
    fi
    cur_lats="ark:gunzip -c $dir/lat.${model_type}.JOB.gz |"
  done
  
  if [ $stage -le 4 ]; then
    echo "$0: getting final lattices."
    $cmd JOB=1:$nj $dir/log/scale_lats.JOB.log \
      lattice-scale --acoustic-scale=$prob_scale "$cur_lats" "ark:|gzip -c >$dir/lat.JOB.gz" \
     || exit 1;
  fi
  
  rm $dir/lat.{left,right}.*.gz 2>/dev/null  # note: if these still exist, it will
   # confuse the scoring script.
  
  [ ! -x local/score.sh ] && \
    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
  local/score.sh --cmd "$cmd" $data $graphdir $dir
  
  exit 0;