Blame view

egs/wsj/s5/steps/diagnostic/analyze_alignments.sh 1.78 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  #!/bin/bash
  #
  # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
  
  # This script performs some analysis of alignments on disk, currently in terms
  # of phone lengths, including lenghts of leading and trailing silences
  
  
  # begin configuration section.
  cmd=run.pl
  #end configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f ./path.sh ] && . ./path.sh
  . parse_options.sh || exit 1;
  
  if [ $# -ne 2 ]; then
    echo "Usage: $0 [options] <lang-dir> <ali-dir>"
    echo " Options:"
    echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
    echo "e.g.:"
    echo "$0 data/lang exp/tri4b"
    echo "This script writes some diagnostics to <ali-dir>/log/alignments.log"
    exit 1;
  fi
  
  lang=$1
  dir=$2
  
  model=$dir/final.mdl
  
  for f in $lang/words.txt $model $dir/ali.1.gz $dir/num_jobs; do
    [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
  done
  
  num_jobs=$(cat $dir/num_jobs) || exit 1
  
  mkdir -p $dir/log
  
  rm $dir/phone_stats.*.gz 2>/dev/null || true
  
  $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \
    set -o pipefail '&&' ali-to-phones --write-lengths=true "$model"  \
        "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \
     sed -E 's/^[^ ]+ //' \| \
     awk 'BEGIN{FS=" ; "; OFS="
  ";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
     sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
  
  if ! $cmd $dir/log/analyze_alignments.log \
    gunzip -c "$dir/phone_stats.*.gz" \| \
    steps/diagnostic/analyze_phone_length_stats.py $lang; then
    echo "$0: analyze_phone_length_stats.py failed, but ignoring the error (it's just for diagnostics)"
  fi
  
  grep WARNING $dir/log/analyze_alignments.log
  echo "$0: see stats in $dir/log/analyze_alignments.log"
  
  rm $dir/phone_stats.*.gz
  
  exit 0