analyze_alignments.sh
2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
# Copyright 2012 Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
if [ $# != 4 ]; then
echo "Usage: $0 <ali-tag> <ali-rspecifier> <transition-model> <lang>"
echo " e.g.: $0 'TRAINING SET' 'ark:gunzip -c \$alidir/ali.gz |' tri1/final.mdl "
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
tag=$1
ali=$2
model=$3
lang=$4
tmpfile=$(mktemp)
echo "%%%%%% .pdf STATS, $tag %%%%%%"
analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
"ark:ali-to-pdf --print-args=false $model \"$ali\" ark:- 2>/dev/null |" \
$tmpfile.0 2>&1
echo
echo "%%%%%% .phone STATS, $tag %%%%%%"
#prob stats
analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
"ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
$tmpfile.1 2>&1
#frame stats
analyze-counts --binary=false \
"ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
$tmpfile.2 2>/dev/null
echo
echo "%%%%%% .ali STATS, $tag %%%%%%"
analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true "$ali" /dev/null 2>&1
echo
echo "%%%%%% .phone STATS (VERBOSE), $tag %%%%%%"
#paste and show the logs
cat $tmpfile.1 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.1a
cat $tmpfile.2 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.2a
paste $tmpfile.1a $tmpfile.2a > $tmpfile
paste $lang/phones.txt $tmpfile | awk '{printf "%10s %4d %f %d\n", $1, $2, $3, $4;}'
echo
echo "%%%%%% .pdf STATS (VERBOSE), $part %%%%%%"
cat $tmpfile.0
echo "%%%%%% END"
rm $tmpfile{,.0,.1,.2,.1a,.2a}