Blame view
egs/wsj/s5/steps/nnet3/make_bottleneck_features.sh
4.61 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
#!/bin/bash # Copyright 2016 Pegah Ghahremani # This script dumps bottleneck feature for model trained using nnet3. # CAUTION! This script isn't very suitable for dumping features from recurrent # architectures such as LSTMs, because it doesn't support setting the chunk size # and left and right context. (Those would have to be passed into nnet3-compute). # See also chain/get_phone_post.sh. # Begin configuration section. stage=1 nj=4 cmd=queue.pl use_gpu=false ivector_dir= compress=true # End configuration options. echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh # source the path. . parse_options.sh || exit 1; if [[ ( $# -lt 4 ) || ( $# -gt 6 ) ]]; then echo "usage: steps/nnet3/make_bottleneck_features.sh <bnf-node-name> <input-data-dir> <bnf-data-dir> <nnet-dir> [<log-dir> [<bnfdir>] ]" echo "e.g.: steps/nnet3/make_bottleneck_features.sh tdnn_bn.renorm data/train data/train_bnf exp/nnet3/tdnn_bnf exp_bnf/dump_bnf bnf" echo "Note: <log-dir> defaults to <bnf-data-dir>/log and <bnfdir> defaults to" echo " <bnf-data-dir>/data" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --ivector-dir # directory for ivectors" exit 1; fi bnf_name=$1 # the component-node name in nnet3 model used for bottleneck feature extraction data=$2 bnf_data=$3 nnetdir=$4 if [ $# -gt 4 ]; then logdir=$5 else logdir=$bnf_data/log fi if [ $# -gt 5 ]; then bnfdir=$6 else bnfdir=$bnf_data/data fi # Assume that final.nnet is in nnetdir cmvn_opts=`cat $nnetdir/cmvn_opts`; bnf_nnet=$nnetdir/final.raw if [ ! -f $bnf_nnet ] ; then if [ ! -f $nnetdir/final.mdl ]; then echo "$0: No such file $bnf_nnet or $nnetdir/final.mdl"; exit 1; else bnf_nnet=$nnetdir/final.mdl fi fi if $use_gpu; then compute_queue_opt="--gpu 1" compute_gpu_opt="--use-gpu=yes" if ! cuda-compiled; then echo "$0: WARNING: you are running with one thread but you have not compiled" echo " for CUDA. You may be running a setup optimized for GPUs. If you have" echo " GPUs and have nvcc installed, go to src/ and do ./configure; make" exit 1 fi else echo "$0: without using a GPU this will be very slow. nnet3 does not yet support multiple threads." compute_gpu_opt="--use-gpu=no" fi ## Set up input features of nnet name=`basename $data` sdata=$data/split$nj mkdir -p $logdir mkdir -p $bnf_data mkdir -p $bnfdir echo $nj > $nnetdir/num_jobs [ ! -f $data/feats.scp ] && echo >&2 "The file $data/feats.scp does not exist!" && exit 1; [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; use_ivector=false if [ ! -z "$ivector_dir" ];then use_ivector=true steps/nnet2/check_ivectors_compatible.sh $nnetdir $ivector_dir || exit 1; fi feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" ivector_feats="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $ivector_dir/ivector_online.scp |" if [ $stage -le 1 ]; then echo "$0: Generating bottleneck (BNF) features using $bnf_nnet model as output of " echo " component-node with name $bnf_name." echo "output-node name=output input=$bnf_name" > $bnf_data/output.config modified_bnf_nnet="nnet3-copy --nnet-config=$bnf_data/output.config $bnf_nnet - |" ivector_opts= if $use_ivector; then ivector_period=$(cat $ivector_dir/ivector_period) || exit 1; ivector_opts="--online-ivector-period=$ivector_period --online-ivectors='$ivector_feats'" fi $cmd $compute_queue_opt JOB=1:$nj $logdir/make_bnf_$name.JOB.log \ nnet3-compute $compute_gpu_opt $ivector_opts "$modified_bnf_nnet" "$feats" ark:- \| \ copy-feats --compress=$compress ark:- ark,scp:$bnfdir/raw_bnfeat_$name.JOB.ark,$bnfdir/raw_bnfeat_$name.JOB.scp || exit 1; fi N0=$(cat $data/feats.scp | wc -l) N1=$(cat $bnfdir/raw_bnfeat_$name.*.scp | wc -l) if [[ "$N0" != "$N1" ]]; then echo "$0: Error generating BNF features for $name (original:$N0 utterances, BNF:$N1 utterances)" exit 1; fi # Concatenate feats.scp into bnf_data for n in $(seq $nj); do cat $bnfdir/raw_bnfeat_$name.$n.scp; done > $bnf_data/feats.scp for f in segments spk2utt text utt2spk wav.scp char.stm glm kws reco2file_and_channel stm; do [ -e $data/$f ] && cp -r $data/$f $bnf_data/$f done echo "$0: computing CMVN stats." steps/compute_cmvn_stats.sh $bnf_data echo "$0: done making BNF features." exit 0; |