dump_nnet_activations.sh 5.12 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132


#!/bin/bash

# Copyright   2013  Daniel Povey
# Apache 2.0.

# This script was modified from ./extract_ivectors_online2.sh.  It is to be used
# when retraining the top layer of a system that was trained on another,
# out-of-domain dataset, on some in-domain dataset.  It takes as input a
# directory such as nnet_gpu_online as prepared by ./prepare_online_decoding.sh,
# and a data directory, and it processes the wave files to get features and iVectors,
# then puts it through all but the last layer of the neural net in that directory, and dumps
# those final activations in a feats.scp file in the output directory.  These files
# might be quite large.  A typical feature-dimension is 300; it's the p-norm output dim.
# We compress these files (note: the compression is lossy).


# Begin configuration section.
nj=30
cmd="run.pl"
stage=0
utts_per_spk_max=2 # maximum 2 utterances per "fake-speaker."

# End configuration section.

echo "$0 $@"  # Print the command line for logging

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;


if [ $# != 3 ]; then
  echo "Usage: $0 [options] <data> <srcdir> <output-dir>"
  echo " e.g.: $0 data/train exp/nnet2_online/nnet_a_online exp/nnet2_online/activations_train"
  echo "Output is in <output-dir>/feats.scp"
  echo "main options (for others, see top of script file)"
  echo "  --config <config-file>                           # config containing options"
  echo "  --cmd (utils/run.pl|utils/queue.pl <queue-opts>) # how to run jobs."
  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
  echo "  --stage <stage|0>                                # To control partial reruns"
  echo "  --utts-per-spk-max <int;default=2>    # Controls splitting into 'fake speakers'."
  echo "                                        # Set to 1 if compatibility with utterance-by-utterance"
  echo "                                        # decoding is the only factor, and to larger if you care "
  echo "                                        # also about adaptation over several utterances."
  exit 1;
fi

data=$1
srcdir=$2
dir=$3

for f in $data/wav.scp $srcdir/conf/online_nnet2_decoding.conf $srcdir/final.mdl; do
  [ ! -f $f ] && echo "No such file $f" && exit 1;
done

# Set various variables.
mkdir -p $dir/log
echo $nj >$dir/num_jobs
sdata=$data/split$nj;
utils/split_data.sh $data $nj || exit 1;


mkdir -p $dir/conf $dir/feats
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf > $dir/conf/online_feature_pipeline.conf

if [ $stage -le 0 ]; then
  ns=$(wc -l <$data/spk2utt)
  if [ "$ns" == 1 -a "$utts_per_spk_max" != 1 ]; then
    echo "$0: you seem to have just one speaker in your database.  This is probably not a good idea."
    echo "  see http://kaldi-asr.org/doc/data_prep.html (search for 'bold') for why"
    echo "  Setting --utts-per-spk-max to 1."
    utts_per_spk_max=1
  fi

  mkdir -p $dir/spk2utt_fake
  for job in $(seq $nj); do 
   # create fake spk2utt files with reduced number of utterances per speaker,
   # so the network is well adapted to using iVectors from small amounts of
   # training data.
    awk -v max=$utts_per_spk_max '{ n=2; count=0; while(n<=NF) {
      nmax=n+max; count++; printf("%s-%06x", $1, count); for (;n<nmax&&n<=NF; n++) printf(" %s", $n); print "";} }' \
        <$sdata/$job/spk2utt >$dir/spk2utt_fake/spk2utt.$job
  done
fi

if [ $stage -le 1 ]; then
  info=$dir/nnet_info
  nnet-am-info $srcdir/final.mdl >$info
  nc=$(grep num-components $info | awk '{print $2}');
  if grep SumGroupComponent $info >/dev/null; then 
    nc_truncate=$[$nc-3]  # we did mix-up: remove AffineComponent,
                          # SumGroupComponent, SoftmaxComponent
  else
    nc_truncate=$[$nc-2]  # remove AffineComponent, SoftmaxComponent
  fi
  nnet-to-raw-nnet --truncate=$nc_truncate $srcdir/final.mdl $dir/nnet.raw
fi

if [ $stage -le 2 ]; then
  echo "$0: dumping neural net activations"

  # The next line is a no-op unless $dir/feats/storage/ exists; see utils/create_split_dir.pl.
  for j in $(seq $nj); do  utils/create_data_link.pl $dir/feats/feats.$j.ark; done

  if [ -f $data/segments ]; then
    wav_rspecifier="ark,s,cs:extract-segments scp,p:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- |"
  else
    wav_rspecifier="scp,p:$sdata/JOB/wav.scp"
  fi
  $cmd JOB=1:$nj $dir/log/dump_activations.JOB.log \
    online2-wav-dump-features  --config=$dir/conf/online_feature_pipeline.conf \
      ark:$dir/spk2utt_fake/spk2utt.JOB "$wav_rspecifier" ark:- \| \
    nnet-compute $dir/nnet.raw ark:- ark:- \| \
    copy-feats --compress=true ark:- \
      ark,scp:$dir/feats/feats.JOB.ark,$dir/feats/feats.JOB.scp || exit 1;
fi

if [ $stage -le 3 ]; then
  echo "$0: combining activations across jobs"
  mkdir -p $dir/data
  cp -r $data/* $dir/data
  for j in $(seq $nj); do cat $dir/feats/feats.$j.scp; done >$dir/data/feats.scp || exit 1;
fi

if [ $stage -le 4 ]; then
  echo "$0: computing [fake] CMVN stats."
  # We shouldn't actually be doing CMVN, but the get_egs.sh script expects it,
  # so create fake CMVN stats.
  steps/compute_cmvn_stats.sh --fake $dir/data $dir/log $dir/feats || exit 1
fi


echo "$0: done.  Output is in $dir/data/feats.scp"