Blame view
egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh
2.1 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
#!/bin/bash # Copyright 2016 David Snyder # # This script computes a PCA transform on top of spliced features processed with # apply-cmvn-online. # # # Apache 2.0. # Begin configuration. cmd=run.pl config= stage=0 dim=40 # The dim after applying PCA normalize_variance=true # If the PCA transform normalizes the variance normalize_mean=true # If the PCA transform centers splice_opts= online_cmvn_opts= max_utts=5000 # maximum number of files to use subsample=5 # subsample features with this periodicity echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh . parse_options.sh || exit 1; if [ $# != 2 ]; then echo "Usage: steps/nnet2/get_pca_transform.sh [options] <data> <dir>" echo " e.g.: steps/train_pca_transform.sh data/train_si84 exp/tri2b" echo "Main options (for others, see top of script file)" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --config <config-file> # config containing options" echo " --stage <stage> # stage to do partial re-run from." exit 1; fi data=$1 dir=$2 for f in $data/feats.scp ; do [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1 done mkdir -p $dir/log echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options # so that later stages of system building can know what they were. echo $online_cmvn_opts > $dir/online_cmvn.conf # keep track of options to CMVN. # create global_cmvn.stats if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then echo "$0: Error summing cmvn stats" exit 1 fi feats="ark,s,cs:utils/subset_scp.pl --quiet $max_utts $data/feats.scp | apply-cmvn-online $online_cmvn_opts $dir/global_cmvn.stats scp:- ark:- | splice-feats $splice_opts ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" if [ $stage -le 0 ]; then $cmd $dir/log/pca_est.log \ est-pca --dim=$dim --normalize-variance=$normalize_variance \ --normalize-mean=$normalize_mean "$feats" $dir/final.mat || exit 1; fi echo "Done estimating PCA transform in $dir" exit 0 |