Blame view

egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh 2.1 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  #!/bin/bash
  
  # Copyright 2016  David Snyder
  #
  # This script computes a PCA transform on top of spliced features processed with
  # apply-cmvn-online.
  #
  #
  # Apache 2.0.
  
  # Begin configuration.
  cmd=run.pl
  config=
  stage=0
  dim=40 # The dim after applying PCA
  normalize_variance=true # If the PCA transform normalizes the variance
  normalize_mean=true # If the PCA transform centers
  splice_opts=
  online_cmvn_opts=
  max_utts=5000 # maximum number of files to use
  subsample=5 # subsample features with this periodicity
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh
  . parse_options.sh || exit 1;
  
  if [ $# != 2 ]; then
    echo "Usage: steps/nnet2/get_pca_transform.sh [options] <data> <dir>"
    echo " e.g.: steps/train_pca_transform.sh data/train_si84 exp/tri2b"
    echo "Main options (for others, see top of script file)"
    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
    echo "  --config <config-file>                           # config containing options"
    echo "  --stage <stage>                                  # stage to do partial re-run from."
    exit 1;
  fi
  
  data=$1
  dir=$2
  
  for f in $data/feats.scp ; do
    [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1
  done
  
  mkdir -p $dir/log
  
  echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options
             # so that later stages of system building can know what they were.
  echo $online_cmvn_opts > $dir/online_cmvn.conf # keep track of options to CMVN.
  
  # create global_cmvn.stats
  if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
    echo "$0: Error summing cmvn stats"
    exit 1
  fi
  
  feats="ark,s,cs:utils/subset_scp.pl --quiet $max_utts $data/feats.scp | apply-cmvn-online $online_cmvn_opts $dir/global_cmvn.stats scp:- ark:- | splice-feats $splice_opts ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |"
  
  if [ $stage -le 0 ]; then
    $cmd $dir/log/pca_est.log \
      est-pca --dim=$dim --normalize-variance=$normalize_variance \
      --normalize-mean=$normalize_mean "$feats" $dir/final.mat || exit 1;
  fi
  
  echo "Done estimating PCA transform in $dir"
  
  exit 0