Blame view

Scripts/utils/nnet/analyze_alignments.sh 2.13 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  #!/bin/bash
  # Copyright 2012 Karel Vesely
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  # To be run from ..
  
  
  if [ $# != 4 ]; then
     echo "Usage: $0 <ali-tag> <ali-rspecifier> <transition-model> <lang>"
     echo " e.g.: $0 'TRAINING SET' 'ark:gunzip -c \$alidir/ali.gz |' tri1/final.mdl "
     exit 1;
  fi
  
  if [ -f path.sh ]; then . path.sh; fi
  
  tag=$1
  ali=$2
  model=$3
  lang=$4
  
  tmpfile=$(mktemp)
  
  echo "%%%%%% .pdf STATS, $tag %%%%%%"
  analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
    "ark:ali-to-pdf --print-args=false $model \"$ali\" ark:- 2>/dev/null |" \
    $tmpfile.0 2>&1
  echo
  
  echo "%%%%%% .phone STATS, $tag %%%%%%"
  #prob stats
  analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
    "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
    $tmpfile.1 2>&1
  #frame stats
  analyze-counts --binary=false \
    "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
    $tmpfile.2 2>/dev/null
  echo
  
  echo "%%%%%% .ali STATS, $tag %%%%%%"
  analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true "$ali" /dev/null 2>&1
  echo
  
  echo "%%%%%% .phone STATS (VERBOSE), $tag %%%%%%"
  #paste and show the logs
  cat $tmpfile.1 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '
  ' >$tmpfile.1a
  cat $tmpfile.2 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '
  ' >$tmpfile.2a
  paste $tmpfile.1a $tmpfile.2a > $tmpfile
  paste $lang/phones.txt $tmpfile | awk '{printf "%10s %4d  %f %d
  ", $1, $2, $3, $4;}' 
  echo
  
  echo "%%%%%% .pdf STATS (VERBOSE), $part %%%%%%"
  cat $tmpfile.0
  echo "%%%%%% END"
  
  rm $tmpfile{,.0,.1,.2,.1a,.2a}