prepare_online_decoding.sh 10.3 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256


#!/bin/bash

# Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0

# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
feature_type=mfcc
online_cmvn_config=conf/online_cmvn.conf
add_pitch=false
pitch_config=conf/pitch.conf
pitch_process_config=conf/pitch_process.conf
per_utt_basis=true # If true, then treat each utterance as a separate speaker
                   # for purposes of basis training... this is recommended if
                   # the number of actual speakers in your training set is less
                   # than (feature-dim) * (feature-dim+1).
per_utt_cmvn=false # If true, apply online CMVN normalization per utterance
                   # rather than per speaker.
silence_weight=0.01
cmd=run.pl
cleanup=true
# End configuration.

echo "$0 $@"  # Print the command line for logging

[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;

if [ $# -ne 4 -a $# -ne 5 ]; then
   echo "Usage: $0 [options] <data-dir> <lang-dir> <sat-model-dir> [<MMI-model>] <output-dir>"
   echo "e.g.: $0 data/train data/lang exp/tri3b exp/tri3b_mmi/final.mdl exp/tri3b_online"
   echo "main options (for others, see top of script file)"
   echo "  --feature-type <mfcc|plp>                        # Type of the base features; "
   echo "                                                   # important to generate the correct"
   echo "                                                   # configs in <output-dir>/conf/"
   echo "  --online-cmvn-config <config>                    # config for online cmvn,"
   echo "                                                   # default conf/online_cmvn.conf"
   echo "  --add-pitch <true|false>                         # Append pitch features to cmvn"
   echo "                                                   # (default: false)"
   echo "  --per-utt-cmvn <true|false>                      # Apply online CMVN per utt, not"
   echo "                                                   # per speaker (default: false)"
   echo "  --per-utt-basis <true|false>                     # Do basis computation per utterance"
   echo "                                                   # (default: true)"
   echo "  --silence-weight <weight>                        # Weight on silence for basis fMLLR;"
   echo "                                                   # default 0.01."
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
   echo "  --config <config-file>                           # config containing options"
   echo "  --stage <stage>                                  # stage to do partial re-run from."
   exit 1;
fi


if [ $# -eq 5 ]; then
  data=$1
  lang=$2
  srcdir=$3
  mmi_model=$4
  dir=$5
else
  data=$1
  lang=$2
  srcdir=$3
  mmi_model=$srcdir/final.mdl
  dir=$4
fi


for f in $srcdir/final.mdl $srcdir/ali.1.gz $data/feats.scp $lang/phones.txt \
    $mmi_model $online_cmvn_config; do
  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done

nj=`cat $srcdir/num_jobs` || exit 1;
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;

mkdir -p $dir/log
echo $nj >$dir/num_jobs || exit 1;

utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;

splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
cp $srcdir/splice_opts $srcdir/cmvn_opts $srcdir/final.mat $srcdir/final.mdl $dir/ 2>/dev/null

cp $mmi_model $dir/final.rescore_mdl

# Set up the unadapted features "$sifeats".
if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
if ! $per_utt_cmvn; then
  online_cmvn_spk2utt_opt=
else
  online_cmvn_spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi


# create global_cmvn.stats
if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
  echo "$0: Error summing cmvn stats"
  exit 1
fi

if $add_pitch; then
  skip_opt="--skip-dims=13:14:15" # should make this more general.
fi

echo "$0: feature type is $feat_type";
case $feat_type in
  delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
        online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $dir/global_cmvn.stats $online_cmvn_spk2utt_opt scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
  lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
       online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $online_cmvn_spk2utt_opt $dir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
  *) echo "Invalid feature type $feat_type" && exit 1;
esac

# Set up the adapted features "$feats" for training set.
if [ -f $srcdir/trans.1 ]; then
  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$srcdir/trans.JOB ark:- ark:- |";
else
  feats="$sifeats";
fi


if $per_utt_basis; then
  spk2utt_opt=  # treat each utterance as separate speaker when computing basis.
  echo "Doing per-utterance adaptation for purposes of computing the basis."
else
  echo "Doing per-speaker adaptation for purposes of computing the basis."
  [ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
    echo "Warning: number of speakers is small, might be better to use --per-utt=true."
  spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi

if [ $stage -le 0 ]; then
  echo "$0: Accumulating statistics for basis-fMLLR computation"
# Note: we get Gaussian level alignments with the "final.mdl" and the
# speaker adapted features.
  $cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
    ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:- \| \
    weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
    gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
    gmm-basis-fmllr-accs-gpost $spk2utt_opt \
    $dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1;
fi

if [ $stage -le 1 ]; then
  echo "$0: computing the basis matrices."
  $cmd $dir/log/basis_training.log \
    gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
  if $cleanup; then
    rm $dir/basis.acc.* 2>/dev/null
  fi
fi

if [ $stage -le 2 ]; then
  echo "$0: accumulating stats for online alignment model."

  # Accumulate stats for "online alignment model"-- this model is computed with
  # the speaker-independent features and online CMVN, but matches
  # Gaussian-for-Gaussian with the final speaker-adapted model.

  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
    ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:-  \| \
    gmm-acc-stats-twofeats $dir/final.mdl "$feats" "$online_sifeats" \
    ark,s,cs:- $dir/final.JOB.acc || exit 1;
  [ `ls $dir/final.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
  # Update model.
  $cmd $dir/log/est_online_alimdl.log \
    gmm-est --remove-low-count-gaussians=false $dir/final.mdl \
    "gmm-sum-accs - $dir/final.*.acc|" $dir/final.oalimdl  || exit 1;
  if $cleanup; then
    rm $dir/final.*.acc
  fi
fi

if [ $stage -le 3 ]; then
  mkdir -p $dir/conf
  rm $dir/{plp,mfcc}.conf 2>/dev/null
  echo "$0: preparing configuration files in $dir/conf"
  if [ -f $dir/conf/online_decoding.conf ]; then
    echo "$0: moving $dir/conf/online_decoding.conf to $dir/conf/online_decoding.conf.bak"
    mv $dir/conf/online_decoding.conf $dir/conf/online_decoding.conf.bak
  fi
  conf=$dir/conf/online_decoding.conf
  echo -n >$conf
  case "$feature_type" in
    mfcc)
      echo "$0: creating $dir/conf/mfcc.conf"
      echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
      cp conf/mfcc.conf $dir/conf/ ;;
    plp)
      echo "$0: enabling plp features"
      echo "--feature-type=plp" >>$conf
      echo "$0: creating $dir/conf/plp.conf"
      echo "--plp-config=$dir/conf/plp.conf" >>$conf
      cp conf/plp.conf $dir/conf/ ;;
    *)
      echo "Unknown feature type $feature_type"
  esac
  if ! cp $online_cmvn_config $dir/conf/online_cmvn.conf; then
    echo "$0: error copying online cmvn config to $dir/conf/"
    exit 1;
  fi
  echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$conf
  if [ -f $dir/final.mat ]; then
    echo "$0: enabling feature splicing"
    echo "--splice-feats" >>$conf
    echo "$0: creating $dir/conf/splice.conf"
    for x in $(cat $dir/splice_opts); do echo $x; done > $dir/conf/splice.conf
    echo "--splice-config=$dir/conf/splice.conf" >>$conf
    echo "$0: enabling LDA"
    echo "--lda-matrix=$dir/final.mat" >>$conf
  else
    echo "$0: enabling deltas"
    echo "--add-deltas" >>$conf
  fi
  if $add_pitch; then
    echo "$0: enabling pitch features"
    echo "--add-pitch" >>$conf
    echo "$0: creating $dir/conf/pitch.conf"
    echo "--pitch-config=$dir/conf/pitch.conf" >>$conf
    if ! cp $pitch_config $dir/conf/pitch.conf; then
      echo "$0: error copying pitch config to $dir/conf/"
      exit 1;
    fi;
    echo "$0: creating $dir/conf/pitch_process.conf"
    echo "--pitch-process-config=$dir/conf/pitch_process.conf" >>$conf
    if ! cp $pitch_process_config $dir/conf/pitch_process.conf; then
      echo "$0: error copying pitch process config to $dir/conf/"
      exit 1;
    fi;
    nfields=$(sed -n '2,2p' $dir/global_cmvn.stats | \
      perl -e '$_ = <>; s/^\s+|\s+$//g; print scalar(split);');
    if [ $nfields != 17 ]; then
      echo "$0: $dir/global_cmvn.stats has $nfields entries per row (expected 17)."
      echo "$0: Did you append pitch features?"
      exit 1;
    fi
    #offset=$(sed -n '2,2p' $dir/global_cmvn.stats | \
    #  perl -e '$_ = <>; s/^\s+|\s+$//g; ($t, $c) = (split)[13, 16]; print -$t/$c;');
    #echo "--pov-offset=$offset" >>$dir/conf/pitch_process.conf
  fi

  echo "--fmllr-basis=$dir/fmllr.basis" >>$conf
  echo "--online-alignment-model=$dir/final.oalimdl" >>$conf
  echo "--model=$dir/final.mdl" >>$conf
  if ! cmp --quiet $dir/final.mdl $dir/final.rescore_mdl; then
    echo "--rescore-model=$dir/final.rescore_mdl" >>$conf
  fi
  echo "--silence-phones=$silphonelist" >>$conf
  echo "--endpoint.silence-phones=$silphonelist" >>$conf
  echo "--global-cmvn-stats=$dir/global_cmvn.stats" >>$conf
  echo "$0: created config file $conf"
fi