Blame view
Scripts/steps/make_fmllr_feats.sh
3.62 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
#!/bin/bash # Copyright 2012 Karel Vesely # Johns Hopkins University (Author: Daniel Povey), # # Apache 2.0. # This script is for use in neural network training and testing; it dumps # (LDA+MLLT or splice+delta) + fMLLR features in a similar format to # conventional raw MFCC features. # Begin configuration section. nj=4 cmd=run.pl transform_dir= norm_vars=false # End configuration section. echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; if [ $# != 5 ]; then echo "Usage: $0 [options] <tgt-data-dir> <src-data-dir> <gmm-dir> <log-dir> <fea-dir>" echo "e.g.: $0 data-fmllr/train data/train exp/tri5a exp/make_fmllr_feats/log plp/processed/" echo "" echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out" echo "what type of features you used (assuming it's one of these two)" echo "You can also use fMLLR features-- you have to supply --transform-dir option." echo "" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --transform-dir <transform-dir> # where to find fMLLR transforms." exit 1; fi data=$1 srcdata=$2 gmmdir=$3 logdir=$4 feadir=$5 #srcdir=$1 -> gmmdir #data=$2 -> srcdata #dir=$3 -> ruzne #tgtdata=$4 -> feadir sdata=$srcdata/split$nj; splice_opts=`cat $gmmdir/splice_opts 2>/dev/null` mkdir -p $data $logdir $feadir [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; for f in $sdata/1/feats.scp $sdata/1/cmvn.scp; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done if [ -f $gmmdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi echo "$0: feature type is $feat_type"; case $feat_type in delta) feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; lda) feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $gmmdir/final.mat ark:- ark:- |";; *) echo "Invalid feature type $feat_type" && exit 1; esac if [ ! -z "$transform_dir" ]; then # add transforms to features... echo "Using fMLLR transforms from $transform_dir" [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist." && exit 1 # [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \ # echo "Mismatch in number of jobs with $transform_dir" && exit 1; # feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |" feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk \"ark:cat $transform_dir/trans.* |\" ark:- ark:- |" fi #prepare the dir cp $srcdata/* $data; rm $data/{feats.scp,cmvn.scp}; # make $bnfeadir an absolute pathname. feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` name=`basename $data` #forward the feats $cmd JOB=1:$nj $logdir/make_fmllr_feats.JOB.log \ copy-feats "$feats" \ ark,scp:$feadir/feats_fmllr_$name.JOB.ark,$feadir/feats_fmllr_$name.JOB.scp || exit 1; #merge the feats to single SCP for n in $(seq 1 $nj); do cat $feadir/feats_fmllr_$name.$n.scp done > $data/feats.scp echo "$0 finished... $srcdata -> $data ($gmmdir)" exit 0; |