Blame view

egs/wsj/s5/steps/best_path_weights.sh 3.36 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  #!/bin/bash
  
  # Copyright 2014-17 Vimal Manohar
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  
  # This script gets from the lattice the best path alignments and frame-level
  # posteriors of the pdfs in the best path alignment.
  # The output directory has the format of an alignment directory.
  # It can optionally read alignments from a directory, in which case,
  # the script gets frame-level posteriors of the pdf corresponding to those
  # alignments.
  # The frame-level posteriors in the form of kaldi vectors and are 
  # output in weights.scp.
  
  set -e
  
  # begin configuration section.
  cmd=run.pl
  stage=-10
  acwt=0.1
  #end configuration section.
  
  if [ -f ./path.sh ]; then . ./path.sh; fi
  . utils/parse_options.sh || exit 1;
  
  if [ $# -ne 3 ] && [ $# -ne 4 ]; then
    cat <<EOF
      Usage: $0 [options] <data-dir> <decode-dir> [<ali-dir>] <out-dir>
        E.g. $0 data/train_unt.seg exp/tri1/decode exp/tri1/best_path
      Options:
        --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes.
  EOF
    
    exit 1;
  fi
  
  data=$1
  decode_dir=$2
  dir=${@: -1}  # last argument to the script
  
  ali_dir=$dir
  if [ $# -eq 4 ]; then
    ali_dir=$3
  fi
  
  mkdir -p $dir
  
  nj=$(cat $decode_dir/num_jobs)
  echo $nj > $dir/num_jobs
  
  if [ $stage -le 1 ]; then
    mkdir -p $dir/log
    $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
      lattice-best-path --acoustic-scale=$acwt \
        "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \
        ark:/dev/null "ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
  fi
  
  # Find where the final.mdl is.
  if [ -f $(dirname $decode_dir)/final.mdl ]; then
    src_dir=$(dirname $decode_dir)
  else
    src_dir=$decode_dir
  fi
  
  cp $src_dir/cmvn_opts $dir/ || exit 1
  for f in final.mat splice_opts frame_subsampling_factor; do
    if [ -f $src_dir/$f ]; then cp $src_dir/$f $dir; fi
  done
  
  # make $dir an absolute pathname.
  fdir=$(perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD})
  
  model=$src_dir/final.mdl
  tree=$src_dir/tree
  
  for f in $model $decode_dir/lat.1.gz $tree; do
    if [ ! -f $f ]; then echo "$0: expecting file $f to exist" && exit 1; fi
  done
  
  cp $model $tree $dir || exit 1
  
  ali_nj=$(cat $ali_dir/num_jobs) || exit 1
  if [ $nj -ne $ali_nj ]; then
    echo "$0: $decode_dir and $ali_dir have different number of jobs. Redo alignment with $nj jobs."
    exit 1
  fi
  
  if [ $stage -lt 2 ]; then
    $cmd JOB=1:$nj $dir/log/get_post.JOB.log \
      lattice-to-post --acoustic-scale=$acwt \
        "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \
      post-to-pdf-post $model ark,s,cs:- ark:- \| \
      get-post-on-ali ark,s,cs:- \
      "ark,s,cs:gunzip -c $ali_dir/ali.JOB.gz | convert-ali $dir/final.mdl $model $tree ark,s,cs:- ark:- | ali-to-pdf $model ark,s,cs:- ark:- |" \
      "ark,scp:$fdir/weights.JOB.ark,$fdir/weights.JOB.scp" || exit 1
  fi
  
  for n in `seq $nj`; do
    cat $dir/weights.$n.scp 
  done > $dir/weights.scp
  
  rm $dir/weights.*.scp
  
  exit 0