Blame view
egs/wsj/s5/steps/online/nnet2/dump_nnet_activations.sh
5.12 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
#!/bin/bash # Copyright 2013 Daniel Povey # Apache 2.0. # This script was modified from ./extract_ivectors_online2.sh. It is to be used # when retraining the top layer of a system that was trained on another, # out-of-domain dataset, on some in-domain dataset. It takes as input a # directory such as nnet_gpu_online as prepared by ./prepare_online_decoding.sh, # and a data directory, and it processes the wave files to get features and iVectors, # then puts it through all but the last layer of the neural net in that directory, and dumps # those final activations in a feats.scp file in the output directory. These files # might be quite large. A typical feature-dimension is 300; it's the p-norm output dim. # We compress these files (note: the compression is lossy). # Begin configuration section. nj=30 cmd="run.pl" stage=0 utts_per_spk_max=2 # maximum 2 utterances per "fake-speaker." # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 3 ]; then echo "Usage: $0 [options] <data> <srcdir> <output-dir>" echo " e.g.: $0 data/train exp/nnet2_online/nnet_a_online exp/nnet2_online/activations_train" echo "Output is in <output-dir>/feats.scp" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --cmd (utils/run.pl|utils/queue.pl <queue-opts>) # how to run jobs." echo " --nj <n|10> # Number of jobs (also see num-processes and num-threads)" echo " --stage <stage|0> # To control partial reruns" echo " --utts-per-spk-max <int;default=2> # Controls splitting into 'fake speakers'." echo " # Set to 1 if compatibility with utterance-by-utterance" echo " # decoding is the only factor, and to larger if you care " echo " # also about adaptation over several utterances." exit 1; fi data=$1 srcdir=$2 dir=$3 for f in $data/wav.scp $srcdir/conf/online_nnet2_decoding.conf $srcdir/final.mdl; do [ ! -f $f ] && echo "No such file $f" && exit 1; done # Set various variables. mkdir -p $dir/log echo $nj >$dir/num_jobs sdata=$data/split$nj; utils/split_data.sh $data $nj || exit 1; mkdir -p $dir/conf $dir/feats grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf > $dir/conf/online_feature_pipeline.conf if [ $stage -le 0 ]; then ns=$(wc -l <$data/spk2utt) if [ "$ns" == 1 -a "$utts_per_spk_max" != 1 ]; then echo "$0: you seem to have just one speaker in your database. This is probably not a good idea." echo " see http://kaldi-asr.org/doc/data_prep.html (search for 'bold') for why" echo " Setting --utts-per-spk-max to 1." utts_per_spk_max=1 fi mkdir -p $dir/spk2utt_fake for job in $(seq $nj); do # create fake spk2utt files with reduced number of utterances per speaker, # so the network is well adapted to using iVectors from small amounts of # training data. awk -v max=$utts_per_spk_max '{ n=2; count=0; while(n<=NF) { nmax=n+max; count++; printf("%s-%06x", $1, count); for (;n<nmax&&n<=NF; n++) printf(" %s", $n); print "";} }' \ <$sdata/$job/spk2utt >$dir/spk2utt_fake/spk2utt.$job done fi if [ $stage -le 1 ]; then info=$dir/nnet_info nnet-am-info $srcdir/final.mdl >$info nc=$(grep num-components $info | awk '{print $2}'); if grep SumGroupComponent $info >/dev/null; then nc_truncate=$[$nc-3] # we did mix-up: remove AffineComponent, # SumGroupComponent, SoftmaxComponent else nc_truncate=$[$nc-2] # remove AffineComponent, SoftmaxComponent fi nnet-to-raw-nnet --truncate=$nc_truncate $srcdir/final.mdl $dir/nnet.raw fi if [ $stage -le 2 ]; then echo "$0: dumping neural net activations" # The next line is a no-op unless $dir/feats/storage/ exists; see utils/create_split_dir.pl. for j in $(seq $nj); do utils/create_data_link.pl $dir/feats/feats.$j.ark; done if [ -f $data/segments ]; then wav_rspecifier="ark,s,cs:extract-segments scp,p:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- |" else wav_rspecifier="scp,p:$sdata/JOB/wav.scp" fi $cmd JOB=1:$nj $dir/log/dump_activations.JOB.log \ online2-wav-dump-features --config=$dir/conf/online_feature_pipeline.conf \ ark:$dir/spk2utt_fake/spk2utt.JOB "$wav_rspecifier" ark:- \| \ nnet-compute $dir/nnet.raw ark:- ark:- \| \ copy-feats --compress=true ark:- \ ark,scp:$dir/feats/feats.JOB.ark,$dir/feats/feats.JOB.scp || exit 1; fi if [ $stage -le 3 ]; then echo "$0: combining activations across jobs" mkdir -p $dir/data cp -r $data/* $dir/data for j in $(seq $nj); do cat $dir/feats/feats.$j.scp; done >$dir/data/feats.scp || exit 1; fi if [ $stage -le 4 ]; then echo "$0: computing [fake] CMVN stats." # We shouldn't actually be doing CMVN, but the get_egs.sh script expects it, # so create fake CMVN stats. steps/compute_cmvn_stats.sh --fake $dir/data $dir/log $dir/feats || exit 1 fi echo "$0: done. Output is in $dir/data/feats.scp" |