Blame view
egs/wsj/s5/steps/nnet/make_bn_feats.sh
5.1 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/bin/bash # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely) # Apache 2.0 # To be run from .. (one directory up from here) # see ../run.sh for example # Begin configuration section. nj=4 cmd=run.pl remove_last_components=4 # remove N last components from the nnet nnet_forward_opts= use_gpu=no htk_save=false ivector= # rx-specifier with i-vectors (ark-with-vectors), # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; set -euo pipefail if [ $# != 5 ]; then echo "usage: $0 [options] <tgt-data-dir> <src-data-dir> <nnet-dir> <log-dir> <abs-path-to-bn-feat-dir>"; echo "options: " echo " --cmd 'queue.pl <queue opts>' # how to run jobs." echo " --nj <nj> # number of parallel jobs" echo " --remove-last-components <N> # number of NNet Components to remove from the end" echo " --use-gpu (no|yes|optional) # forwarding on GPU" exit 1; fi if [ -f path.sh ]; then . ./path.sh; fi data=$1 srcdata=$2 nndir=$3 logdir=$4 bnfeadir=$5 ######## CONFIGURATION # copy the dataset metadata from srcdata. mkdir -p $data $logdir $bnfeadir || exit 1; utils/copy_data_dir.sh $srcdata $data; rm -f $data/{feats,cmvn}.scp 2>/dev/null # make $bnfeadir an absolute pathname. [ '/' != ${bnfeadir:0:1} ] && bnfeadir=$PWD/$bnfeadir required="$srcdata/feats.scp $nndir/final.nnet $nndir/final.feature_transform" for f in $required; do [ ! -f $f ] && echo "$0: Missing $f" && exit 1; done name=$(basename $srcdata) sdata=$srcdata/split$nj [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; # Concat feature transform with trimmed MLP: nnet=$bnfeadir/feature_extractor.nnet nnet-concat $nndir/final.feature_transform "nnet-copy --remove-last-components=$remove_last_components $nndir/final.nnet - |" $nnet 2>$logdir/feature_extractor.log || exit 1 nnet-info $nnet >$data/feature_extractor.nnet-info echo "Creating bn-feats into $data" # PREPARE FEATURE EXTRACTION PIPELINE # import config, online_cmvn_opts= cmvn_opts= delta_opts= D=$nndir [ -e $D/online_cmvn_opts ] && online_cmvn_opts=$(cat $D/online_cmvn_opts) [ -e $D/cmvn_opts ] && cmvn_opts=$(cat $D/cmvn_opts) [ -e $D/delta_opts ] && delta_opts=$(cat $D/delta_opts) # # Create the feature stream, feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |" # apply-cmvn-online (optional), [ -n "$online_cmvn_opts" -a ! -f $nndir/global_cmvn_stats.mat ] && echo "$0: Missing $nndir/global_cmvn_stats.mat" && exit 1 [ -n "$online_cmvn_opts" ] && feats="$feats apply-cmvn-online $online_cmvn_opts --spk2utt=ark:$srcdata/spk2utt $nndir/global_cmvn_stats.mat ark:- ark:- |" # apply-cmvn (optional), [ -n "$cmvn_opts" -a ! -f $sdata/1/cmvn.scp ] && echo "$0: Missing $sdata/1/cmvn.scp" && exit 1 [ -n "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$srcdata/utt2spk scp:$srcdata/cmvn.scp ark:- ark:- |" # add-deltas (optional), [ -n "$delta_opts" ] && feats="$feats add-deltas $delta_opts ark:- ark:- |" # add-ivector (optional), if [ -e $D/ivector_dim ]; then [ -z $ivector ] && echo "Missing --ivector, they were used in training!" && exit 1 # Get the tool, ivector_append_tool=append-vector-to-feats # default, [ -e $D/ivector_append_tool ] && ivector_append_tool=$(cat $D/ivector_append_tool) # Check dims, feats_job_1=$(sed 's:JOB:1:g' <(echo $feats)) dim_raw=$(feat-to-dim "$feats_job_1" -) dim_raw_and_ivec=$(feat-to-dim "$feats_job_1 $ivector_append_tool ark:- '$ivector' ark:- |" -) dim_ivec=$((dim_raw_and_ivec - dim_raw)) [ $dim_ivec != "$(cat $D/ivector_dim)" ] && \ echo "Error, i-vector dim. mismatch (expected $(cat $D/ivector_dim), got $dim_ivec in '$ivector')" && \ exit 1 # Append to feats, feats="$feats $ivector_append_tool ark:- '$ivector' ark:- |" fi if [ $htk_save == false ]; then # Run the forward pass, $cmd JOB=1:$nj $logdir/make_bnfeats.JOB.log \ nnet-forward $nnet_forward_opts --use-gpu=$use_gpu $nnet "$feats" \ ark,scp:$bnfeadir/raw_bnfea_$name.JOB.ark,$bnfeadir/raw_bnfea_$name.JOB.scp \ || exit 1; # concatenate the .scp files for ((n=1; n<=nj; n++)); do cat $bnfeadir/raw_bnfea_$name.$n.scp >> $data/feats.scp done # check sentence counts, N0=$(cat $srcdata/feats.scp | wc -l) N1=$(cat $data/feats.scp | wc -l) [[ "$N0" != "$N1" ]] && echo "$0: sentence-count mismatch, $srcdata $N0, $data $N1" && exit 1 echo "Succeeded creating MLP-BN features '$data'" else # htk_save == true # Run the forward pass saving HTK features, $cmd JOB=1:$nj $logdir/make_bnfeats_htk.JOB.log \ mkdir -p $data/htkfeats/JOB \; \ nnet-forward $nnet_forward_opts --use-gpu=$use_gpu $nnet "$feats" ark:- \| \ copy-feats-to-htk --output-dir=$data/htkfeats/JOB ark:- || exit 1 # Make list of htk features, find $data/htkfeats -name *.fea >$data/htkfeats.scp # Check sentence counts, N0=$(cat $srcdata/feats.scp | wc -l) N1=$(find $data/htkfeats.scp | wc -l) [[ "$N0" != "$N1" ]] && echo "$0: sentence-count mismatch, $srcdata $N0, $data/htk* $N1" && exit 1 echo "Succeeded creating MLP-BN features '$data/htkfeats.scp'" fi |