Blame view

egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh 6.36 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  #!/bin/bash
  
  # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
  # Apache 2.0.
  
  # This script converts nnet2 models which expect splice+LDA as the input, into
  # models which expect raw features (e.g. MFCC) as the input.  If you include
  # the option --global-cmvn-stats <matrix>, it will also remove CMVN from the model
  # by including it as part of the neural net.
  
  
  # Begin configuration section
  cleanup=true
  global_cmvn_stats=
  cmd=run.pl
  # learning_rate and max_change will only make a difference if we train this model, which is unlikely.
  learning_rate=0.00001 # give it a tiny learning rate by default; the user
                        # should probably tune this or set it if they want to train.
  max_change=5.0
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f ./path.sh ] && . ./path.sh; # source the path.
  . parse_options.sh || exit 1;
  
  
  if [ $# -ne 2 ]; then
    echo "Usage: $0 [options] <src-nnet-dir> <dest-nnet-dir>"
    echo "e.g.: $0 --global-cmvn-stats global_cmvn.mat exp/dnn4b_nnet2 exp/dnn4b_nnet2_raw"
    echo "Options include"
    echo "   --global-cmvn-stats <stats-file>         # Filename of globally summed CMVN stats, if"
    echo "                                            # you want to push the CMVN inside the nnet"
    echo "                                            # (it won't any longer be speaker specific)"
    exit 1;
  fi
  
  src=$1
  dir=$2
  
  mkdir -p $dir/log || exit 1;
  
  for f in $src/final.mdl $src/final.mat $src/splice_opts $src/cmvn_opts; do
    [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
  done
  
  cp $src/phones.txt $dir 2>/dev/null
  
  mkdir -p $dir/log
  
  # nnet.config will be a config for a few trivial neural-network layers
  # that come before the main network, and which do things like
  echo -n >$dir/nnet.config
  
  if [ ! -z "$global_cmvn_stats" ]; then
    [ ! -f $src/cmvn_opts ] && \
      echo "$0: expected $src/cmvn_opts to exist" && exit 1;
    norm_vars=false
    if grep 'norm-means=false' $src/cmvn_opts; then
      echo "$0: if --norm-means=false, don't supply the --global-cmvn-stats option to this script"
      exit 1;
    elif grep 'norm-vars=true' $src/cmvn_opts; then
      echo "$0: warning: this script has not been tested with --norm-vars=true in CMVN options"
      norm_vars=true
    fi
  
  
    # First add to the config, layers that will do the same transform as cepstral
    # mean and variance normalization using these global stats.  We do this as
    # first an added offset (FixedBiasComonent), then, only if norm-vars=true
    # in the CMVN options, a scaling (FixedScaleComponent).
    
    $cmd $dir/log/copy_feats.log \
      copy-feats --binary=false "$global_cmvn_stats" $dir/global_cmvn_stats.txt || exit 1;
    cat $dir/global_cmvn_stats.txt | \
      perl -e ' $line0 = <STDIN>; $line0 == "[
  " || die "expected first line to be [, got $line0";
      $line1 = <STDIN>; $line2 = <STDIN>; @L1 = split(" ",$line1); @L2 = split(" ",$line2);
      ($bias_out, $scale_out) = @ARGV;
      open(B, ">$bias_out") || die "opening bias-out file $bias_out";
      open(S, ">$scale_out") || die "opening scale-out file $scale_out";
      pop @L2; pop @L2; # remove the " 0 ]"
      $count = pop @L1;  # last element of line 1 is total count.
      ($count > 0.0) || die "Bad count $count";
      $dim = @L1;
      $dim == scalar @L2 || die "Bad dimension of second line of CMVN stats @L2";
      print B "[ ";  print S "[ ";
      for ($x = 0; $x < $dim; $x++) {
        $mean = $L1[$x] / $count;  $var = ($L2[$x] / $count) - ($mean * $mean);
        $bias = -$mean;  print B "$bias ";
        $scale = 1.0 / sqrt($var); $scale > 0 || die "Bad scale $scale";  print S "$scale ";
      }
      print B "]
  ";  print S "]
  "; ' $dir/bias.txt $dir/scales.txt || exit 1;
    echo "FixedBiasComponent bias=$dir/bias.txt" >> $dir/nnet.config  
    if $norm_vars; then
      echo "FixedScaleComponent scales=$dir/scales.txt" >> $dir/nnet.config  
    fi
    echo "--norm-means=false --norm-vars=false" >$dir/cmvn_opts || exit 1;
  else
    cp $src/cmvn_opts $dir/ || exit 1;
  fi
  
  # We need the dimension of the raw features.  We work it out from the LDA matrix dimension.
  # get a word-count of the second row of the LDA matrix...  this will be either the
  # spliced dim or the spliced dim plus one.
  spliced_dim=$(copy-matrix --binary=false $src/final.mat - | head -n 2 | tail -n 1 | wc -w) || exit 1;
  
  
  splice_opts=$(cat $src/splice_opts) || exit 1;
  # Work out how many frames are spliced together by splicing a matrix with one element
  # and testing the resulting number of columns.
  num_splice=$(echo "foo [ 1.0 ]" | splice-feats $splice_opts ark:- ark:- | feat-to-dim ark:- -)
  
  # We'll separately need the left-context and right-context.
  # defaults in the splice-feats code are 4 and 4.
  left_context=4
  right_context=4
  for opt in $(cat $src/splice_opts); do
    if echo $opt | grep left-context  >/dev/null; then
      left_context=$(echo $opt | cut -d= -f2) || exit 1;
    fi
    if echo $opt | grep right-context  >/dev/null; then
      right_context=$(echo $opt | cut -d= -f2) || exit 1;
    fi
  done
  if ! [ $num_splice -eq $[$left_context+1+$right_context] ]; then
    echo "$0: num-splice worked out from the binaries differs from our interpreation of the options:"
    echo "$num_splice != $left_context + 1 + $right_context"
    exit 1;
  fi
  
  modulo=$[$spliced_dim%$num_splice]
  if [ $modulo -eq 1 ]; then
    # matrix includes offset term.
    spliced_dim=$[$spliced_dim-1];
    cp $src/final.mat $dir/
  elif [ $modulo -eq 0 ]; then
    # We need to add a zero bias term to the matrix, because the AffineComponent
    # expects that.
    copy-matrix --binary=false $src/final.mat - | \
      awk '{if ($NF == "]") { $NF = "0"; print $0, "]"; } else { if (NF > 1) { print $0, "0"; } else {print;}}}' >$dir/final.mat
  else
    echo "$0: Cannot make sense of spliced dimension $spliced_dim and num-splice=$num_splice"
    exit 1;
  fi
  feat_dim=$[$spliced_dim/$num_splice];
  echo "SpliceComponent input-dim=$feat_dim left-context=$left_context right-context=$right_context" >>$dir/nnet.config
  
  # use AffineComponentPreconditioned as it's easier to configure than AffineComponentPreconditionedOnline.
  echo "AffineComponentPreconditioned alpha=4.0 learning-rate=$learning_rate max-change=$max_change matrix=$dir/final.mat" >>$dir/nnet.config
  
  
  $cmd $dir/log/nnet_init.log \
    nnet-init $dir/nnet.config $dir/lda.nnet || exit 1;
  
  $cmd $dir/log/nnet_insert.log \
    nnet-insert --insert-at=0 --randomize-next-component=false \
     $src/final.mdl $dir/lda.nnet $dir/final.mdl || exit 1;
  
  if $cleanup; then
    rm $dir/final.mat $dir/lda.nnet
  fi