prepare_online_decoding.sh 6.83 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175


#!/bin/bash

# Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0

# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
feature_type=mfcc
add_pitch=false
mfcc_config=conf/mfcc.conf # you can override any of these you need to override.
plp_config=conf/plp.conf
fbank_config=conf/fbank.conf
# online_pitch_config is the config file for both pitch extraction and
# post-processing; we combine them into one because during training this
# is given to the program compute-and-process-kaldi-pitch-feats.
online_pitch_config=conf/online_pitch.conf

# Below are some options that affect the iVectors, and should probably
# match those used in extract_ivectors_online.sh.
num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
                    # inter-frame correlations.
min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
               # caution: you should use the same value in the online-estimation
               # code.
max_count=100   # This max-count of 100 can make iVectors more consistent for
                # different lengths of utterance, by scaling up the prior term
                # when the data-count exceeds this value.  The data-count is
                # after posterior-scaling, so assuming the posterior-scale is
                # 0.1, --max-count 100 starts having effect after 1000 frames,
                # or 10 seconds of data.
iter=final
# End configuration.

echo "$0 $@"  # Print the command line for logging

[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;

if [ $# -ne 4 ] && [ $# -ne 3 ]; then
   echo "Usage: $0 [options] <lang-dir> [<ivector-extractor-dir>] <nnet-dir> <output-dir>"
   echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online"
   echo "main options (for others, see top of script file)"
   echo "  --feature-type <mfcc|plp>                        # Type of the base features; "
   echo "                                                   # important to generate the correct"
   echo "                                                   # configs in <output-dir>/conf/"
   echo "  --add-pitch <true|false>                         # Append pitch features to cmvn"
   echo "                                                   # (default: false)"
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
   echo "  --config <config-file>                           # config containing options"
   echo "  --iter <model-iteration|final>                   # iteration of model to take."
   echo "  --stage <stage>                                  # stage to do partial re-run from."
   exit 1;
fi


if [ $# -eq 4 ]; then
  lang=$1
  iedir=$2
  srcdir=$3
  dir=$4
else
  [ $# -eq 3 ] || exit 1;
  lang=$1
  iedir=
  srcdir=$2
  dir=$3
fi

for f in $lang/phones/silence.csl $srcdir/${iter}.mdl $srcdir/tree; do
  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
if [ ! -z "$iedir" ]; then
  for f in final.{mat,ie,dubm} splice_opts global_cmvn.stats online_cmvn.conf; do
    [ ! -f $iedir/$f ] && echo "$0: no such file $iedir/$f" && exit 1;
  done
  if $add_pitch; then
    iedim=`matrix-dim $iedir/final.mat | awk '{print $1}'`
    amdim=`nnet3-am-info $srcdir/${iter}.mdl | grep "input-dim:" | awk '{print $2}'`
    [ $(($amdim-$iedim)) -eq 0 ] && echo "$0: remove pitch from the input of ivector extractor" && exit 1;
  fi
fi


dir=$(utils/make_absolute.sh $dir) # Convert $dir to an absolute pathname, so that the
                        # configuration files we write will contain absolute
                        # pathnames.
mkdir -p $dir/conf

utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;

cp $srcdir/${iter}.mdl $dir/final.mdl || exit 1;
cp $srcdir/tree $dir/ || exit 1;
if [ -f $srcdir/frame_subsampling_factor ]; then
  cp $srcdir/frame_subsampling_factor $dir/
fi

if [ ! -z "$iedir" ]; then
  mkdir -p $dir/ivector_extractor/
  cp $iedir/final.{mat,ie,dubm} $iedir/global_cmvn.stats $dir/ivector_extractor/ || exit 1;

  # The following things won't be needed directly by the online decoding, but
  # will allow us to run prepare_online_decoding.sh again with
  # $dir/ivector_extractor/ as the input directory (useful in certain
  # cross-system training scenarios).
  cp $iedir/splice_opts $iedir/online_cmvn.conf $dir/ivector_extractor/ || exit 1;
fi


mkdir -p $dir/conf
rm $dir/{plp,mfcc,fbank}.conf 2>/dev/null
echo "$0: preparing configuration files in $dir/conf"

if [ -f $dir/conf/online.conf ]; then
  echo "$0: moving $dir/conf/online.conf to $dir/conf/online.conf.bak"
  mv $dir/conf/online.conf $dir/conf/online.conf.bak
fi

conf=$dir/conf/online.conf
echo -n >$conf

echo "--feature-type=$feature_type" >>$conf

case "$feature_type" in
  mfcc)
    echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
    cp $mfcc_config $dir/conf/mfcc.conf || exit 1;;
  plp)
    echo "--plp-config=$dir/conf/plp.conf" >>$conf
    cp $plp_config $dir/conf/plp.conf || exit 1;;
  fbank)
    echo "--fbank-config=$dir/conf/fbank.conf" >>$conf
    cp $fbank_config $dir/conf/fbank.conf || exit 1;;
  *)
    echo "Unknown feature type $feature_type"
esac


if [ ! -z "$iedir" ]; then
  ieconf=$dir/conf/ivector_extractor.conf
  echo -n >$ieconf
  echo "--ivector-extraction-config=$ieconf" >>$conf
  cp $iedir/online_cmvn.conf $dir/conf/online_cmvn.conf || exit 1;
  # the next line puts each option from splice_opts on its own line in the config.
  for x in $(cat $iedir/splice_opts); do echo "$x"; done > $dir/conf/splice.conf
  echo "--splice-config=$dir/conf/splice.conf" >>$ieconf
  echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$ieconf
  echo "--lda-matrix=$dir/ivector_extractor/final.mat" >>$ieconf
  echo "--global-cmvn-stats=$dir/ivector_extractor/global_cmvn.stats" >>$ieconf
  echo "--diag-ubm=$dir/ivector_extractor/final.dubm" >>$ieconf
  echo "--ivector-extractor=$dir/ivector_extractor/final.ie" >>$ieconf
  echo "--num-gselect=$num_gselect"  >>$ieconf
  echo "--min-post=$min_post" >>$ieconf
  echo "--posterior-scale=$posterior_scale" >>$ieconf # this is currently the default in the scripts.
  echo "--max-remembered-frames=1000" >>$ieconf # the default
  echo "--max-count=$max_count" >>$ieconf
fi

if $add_pitch; then
  echo "$0: enabling pitch features"
  echo "--add-pitch=true" >>$conf
  echo "$0: creating $dir/conf/online_pitch.conf"
  if [ ! -f $online_pitch_config ]; then
    echo "$0: expected file '$online_pitch_config' to exist.";
    exit 1;
  fi
  cp $online_pitch_config $dir/conf/online_pitch.conf || exit 1;
  echo "--online-pitch-config=$dir/conf/online_pitch.conf" >>$conf
fi

silphonelist=`cat $lang/phones/silence.csl` || exit 1;
echo "--endpoint.silence-phones=$silphonelist" >>$conf
echo "$0: created config file $conf"