Blame view
egs/dihard_2018/v1/local/prepare_feats.sh
2.93 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#!/bin/bash # # Apache 2.0. # This script adds deltas, applies sliding window CMVN and writes the features to disk. # # Although this kind of script isn't necessary in speaker recognition recipes, # it can be helpful in the diarization recipes. The script # diarization/extract_ivectors.sh extracts i-vectors from very # short (e.g., 1-2 seconds) segments. Therefore, in order to apply the sliding # window CMVN in a meaningful way, it must be performed prior to performing # the subsegmentation. nj=40 cmd="run.pl" stage=0 norm_vars=false center=true compress=true cmn_window=300 delta_window=3 delta_order=2 echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 3 ]; then echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>" echo "e.g.: $0 data/train data/train_no_sil exp/make_ivector_features" echo "Options: " echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --norm-vars <true|false> # If true, normalize variances in the sliding window cmvn" exit 1; fi data_in=$1 data_out=$2 dir=$3 name=`basename $data_in` for f in $data_in/feats.scp ; do [ ! -f $f ] && echo "$0: No such file $f" && exit 1; done # Set various variables. mkdir -p $dir/log mkdir -p $data_out featdir=$(utils/make_absolute.sh $dir) if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then utils/create_split_dir.pl \ /export/b{14,15,16,17}/$USER/kaldi-data/egs/dihard_2018/v1/ivector-$(date +'%m_%d_%H_%M')/ivector_cmvn_feats/storage $featdir/storage fi for n in $(seq $nj); do # the next command does nothing unless $featdir/storage/ exists, see # utils/create_data_link.pl for more info. utils/create_data_link.pl $featdir/ivector_cmvn_feats_${name}.${n}.ark done cp $data_in/utt2spk $data_out/utt2spk cp $data_in/spk2utt $data_out/spk2utt cp $data_in/wav.scp $data_out/wav.scp write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" sdata_in=$data_in/split$nj; utils/split_data.sh $data_in $nj || exit 1; delta_opts="--delta-window=$delta_window --delta-order=$delta_order" $cmd JOB=1:$nj $dir/log/create_ivector_cmvn_feats_${name}.JOB.log \ add-deltas $delta_opts scp:${sdata_in}/JOB/feats.scp ark:- \| \ apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ ark:- ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$featdir/ivector_cmvn_feats_${name}.JOB.ark,$featdir/ivector_cmvn_feats_${name}.JOB.scp || exit 1; for n in $(seq $nj); do cat $featdir/ivector_cmvn_feats_${name}.$n.scp || exit 1; done > ${data_out}/feats.scp || exit 1 for n in $(seq $nj); do cat $featdir/log/utt2num_frames.$n || exit 1; done > $data_out/utt2num_frames || exit 1 rm $featdir/log/utt2num_frames.* echo "$0: Succeeded creating ivector features for $name" |