run_blocksoftmax.sh 5.98 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151


#!/bin/bash

# Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
# Apache 2.0

# This example script trains DNN with <BlockSoftmax> output on top of FBANK features.
# The network is trained on RM and WSJ84 simultaneously.

. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
           ## This relates to the queue.

. ./path.sh ## Source the tools/utils (import the queue.pl)

dev=data-fbank-blocksoftmax/test
train=data-fbank-blocksoftmax/train
wsj=data-fbank-blocksoftmax/wsj
train_tr90_wsj=data-fbank-blocksoftmax/train_tr90_wsj

dev_original=data/test
train_original=data/train
wsj_original=../../wsj/s5/data/train_si284
[ ! -e $wsj_original ] && echo "Missing $wsj_original" && exit 1

gmm=exp/tri3b
wsj_ali=../../wsj/s5/exp/tri4b_ali_si284
[ ! -e $wsj_ali ] && echo "Missing $wsj_ali" && exit 1

stage=0
. utils/parse_options.sh || exit 1;

set -euxo pipefail

# Make the FBANK features,
[ ! -e $dev ] && if [ $stage -le 0 ]; then
  # Make datadir copies,
  utils/copy_data_dir.sh $dev_original $dev; rm $dev/{cmvn,feats}.scp
  utils/copy_data_dir.sh $train_original $train; rm $train/{cmvn,feats}.scp
  utils/copy_data_dir.sh --utt-prefix wsj --spk-prefix wsj $wsj_original $wsj; rm $wsj/{cmvn,feats}.scp
  
  # Feature extraction,
  # Dev set,
  steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
    $dev $dev/log $dev/data
  steps/compute_cmvn_stats.sh $dev $dev/log $dev/data
  # Training set,
  steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \
    $train $train/log $train/data
  steps/compute_cmvn_stats.sh $train $train/log $train/data
  # Wsj,
  steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \
    $wsj $wsj/log $wsj/data
  steps/compute_cmvn_stats.sh $wsj $wsj/log $wsj/data

  # Split the rm training set,
  utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
  # Merge-in the wsj set with train-set,
  utils/combine_data.sh $train_tr90_wsj ${train}_tr90 $wsj
fi


# Prepare the merged targets,
dir=exp/dnn4e-fbank_blocksoftmax
ali1_dim=$(hmm-info ${gmm}_ali/final.mdl | grep pdfs | awk '{ print $NF }')
ali2_dim=$(hmm-info ${wsj_ali}/final.mdl | grep pdfs | awk '{ print $NF }')
#
output_dim=$((ali1_dim + ali2_dim))
#
ali1_pdf="ark:ali-to-pdf ${gmm}_ali/final.mdl 'ark:gzcat ${gmm}_ali/ali.*.gz |' ark:- |"
ali1_dir=${gmm}_ali
#
if [ $stage -le 1 ]; then
  mkdir -p $dir/log
  # Mapping keys in wsj alignment to have prefix 'wsj',
  copy-int-vector "ark:gzcat ${wsj_ali}/ali.*.gz |" ark,t:- | awk -v prefix=wsj_ '{ $1=prefix $1; print; }' | \
    gzip -c >$dir/ali_wsj.gz 

  # Store single-stream posteriors to disk, indexed by 'scp' for pasting w/o caching,
  ali-to-pdf ${gmm}_ali/final.mdl "ark:gzcat ${gmm}_ali/ali.*.gz |" ark:- | \
    ali-to-post ark:- ark,scp:$dir/post1.ark,$dir/post1.scp
  ali-to-pdf ${wsj_ali}/final.mdl "ark:gzcat $dir/ali_wsj.gz |" ark:- | \
    ali-to-post ark:- ark,scp:$dir/post2.ark,$dir/post2.scp

  # Paste the posteriors from the 'scp' inputs,
  featlen="ark:feat-to-len 'scp:cat $train/feats.scp $wsj/feats.scp |' ark,t:- |"
  paste-post --allow-partial=true "$featlen" $ali1_dim:$ali2_dim \
    scp:$dir/post1.scp scp:$dir/post2.scp \
    ark,scp:$dir/pasted_post.ark,$dir/pasted_post.scp 2>$dir/log/paste_post.log
fi


# Train NN with '<BlockSoftmax>' output, we use 'MultiTask' objective function,
objw1=1; objw2=0.1; # we'll use lower weight for 'wsj' data,
if [ $stage -le 2 ]; then
  $cuda_cmd $dir/log/train_nnet.log \
    steps/nnet/train.sh \
      --cmvn-opts "--norm-means=true --norm-vars=true" \
      --delta-opts "--delta-order=2" --splice 5 \
      --labels "scp:$dir/pasted_post.scp" --num-tgt $output_dim \
      --proto-opts "--block-softmax-dims='$ali1_dim:$ali2_dim'" \
      --train-tool "nnet-train-frmshuff --objective-function=multitask,xent,$ali1_dim,$objw1,xent,$ali2_dim,$objw2" \
      --learn-rate 0.008 \
      ${train_tr90_wsj} ${train}_cv10 lang-dummy ali-dummy ali-dummy $dir
  # Create files used in decdoing, missing due to --labels use,
  analyze-counts --binary=false "$ali1_pdf" $dir/ali_train_pdf.counts
  copy-transition-model --binary=false $ali1_dir/final.mdl $dir/final.mdl
  cp $ali1_dir/tree $dir/tree
  # Rebuild network, <BlockSoftmax> is removed, and neurons from 1st block are selected,
  nnet-concat "nnet-copy --remove-last-components=1 $dir/final.nnet - |" \
    "echo '<Copy> <InputDim> $output_dim <OutputDim> $ali1_dim <BuildVector> 1:$ali1_dim </BuildVector>' | nnet-initialize - - |" \
    $dir/final.nnet.lang1
  # Decode (reuse HCLG graph),
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    --nnet $dir/final.nnet.lang1 \
    $gmm/graph $dev $dir/decode
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    --nnet $dir/final.nnet.lang1 \
    $gmm/graph_ug $dev $dir/decode_ug
fi

exit 0

# TODO, 
# make nnet-copy support block selection, 
# - either by replacing <BlockSoftmax> by <Softmax> and shrinking <AffineTransform>,
# - or by appending <Copy> transform,
#
# Will it be compatible with other scripts/tools which assume <Softmax> at the end?
# Or is it better to do everything visually in master script as now?... 
# Hmmm, need to think about it...

# Train baseline system with <Softmax>,
if [ $stage -le 3 ]; then
  dir=exp/dnn4e-fbank_baseline
  $cuda_cmd $dir/log/train_nnet.log \
    steps/nnet/train.sh \
      --cmvn-opts "--norm-means=true --norm-vars=true" \
      --delta-opts "--delta-order=2" --splice 5 \
      --learn-rate 0.008 \
      ${train}_tr90 ${train}_cv10 data/lang ${gmm}_ali ${gmm}_ali $dir
  # Decode (reuse HCLG graph)
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    $gmm/graph $dev $dir/decode
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    $gmm/graph_ug $dev $dir/decode_ug
fi

echo Success
exit 0

# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done