get_hmm_states.pl
2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/perl
# Usage: get_hmm_states.pl < MMF > states_file
# Format is:
# each mixture has 3 lines: mixture-weight, mean, variance.
# <STATE> A_s2_25 5
# 7.630738e-02
# 1.340054e-01 8.793820e-01 3.381338e+00 4.707607e+00 2.738673e+00 2.130200e+00 1.505306e+00 5.386179e-01 -2.053278e+00 -2.369776e+00 -1.863615e+00 1.138925e-01 -5.315916e+00 -4.409675e-01 3.801185e-01 5.222713e-01 6.952143e-01 5.838995e-01 2.584392e-01 1.045933e-01 2.842196e-01 1.797329e-01 -2.383467e-01 -3.872046e-01 -1.208599e-01 -1.123077e+00 1.104437e-01 -4.043639e-01 -5.273581e-01 -5.055419e-01 -3.857868e-01 -4.783591e-02 -1.888570e-02 -2.677171e-01 2.246067e-01 2.122188e-01 2.536023e-01 5.772380e-03 5.641578e-01
# 1.210491e+01 2.887772e+01 1.444726e+01 2.377322e+01 2.243100e+01 3.112299e+01 3.070573e+01 2.498269e+01 2.727588e+01 2.047188e+01 1.985626e+01 9.705117e+00 4.483324e+01 8.487927e-01 1.378040e+00 1.394030e+00 1.389163e+00 1.826177e+00 2.960613e+00 2.533339e+00 1.901944e+00 1.711646e+00 2.056901e+00 1.904792e+00 1.298796e+00 1.229760e+00 1.435853e-01 2.742554e-01 2.303578e-01 3.856331e-01 3.890942e-01 5.537956e-01 5.028566e-01 4.502904e-01 5.239967e-01 4.440382e-01 3.432592e-01 2.598946e-01 2.945138e-01
# 1.673454e-01
# ...
# ...
while(<>) {
if(m/\<BEGINHMM\>/) { # Avoid parsing lines like ~s "m_s4_6" that appear in HMM definitions.
while(1) {
$l = <>;
chop $l;
if($l eq "<ENDHMM>") { last; }
}
}
if(m/^\~s \"(.+)\"/) { # state macro begins.
$macroname = $1;
$l = <>;
$l =~ m/\<NUMMIXES\> (\d+)/ || die "bad line (1) $l;";
$nummix = $1;
print "<STATE> $macroname $nummix\n";
for($n = 1; $n <= $nummix; $n++) {
$l = <>;
$l =~ m/\<MIXTURE\> (\d+) (\S+)/ || die "bad line, n=$n (2) $l";
$n == $1 || die "Mixture number mismatch.";
$mixweight = $2;
print "$mixweight\n";
$l = <>;
$l =~ m/\<MEAN\>/ || die "bad line (3) $."; # discard line e.g. <MEAN> 39
$l = <>;
print $l; # Just print the next line which is the mean.
$l = <>;
$l =~ m/\<VARIANCE\>/ || die "bad line (4) $."; # discard line e.g. <VARIANCE> 39
$l = <>;
print $l; # Just print the next line which is the variance.
$l = <>;
$l =~ m/GCONST/ || die "Unexpected line (5) $l (line $.)";
}
}
}