Blame view

LIA_kaldiUtils/gen_topo.pl 2.46 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  #!/usr/bin/perl
  
  # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
  
  # Generate a topology file.  This allows control of the number of states in the
  # non-silence HMMs, and in the silence HMMs.
  
  if(@ARGV != 4) {
    print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>
  ";
    print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3
  ";
    exit (1);
  }
  
  ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
  
  ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || die "Unexpected number of nonsilence-model states $num_nonsil_states
  ";
  ( $num_sil_states >= 3 && $num_sil_states <= 100 ) || die "Unexpected number of silence-model states $num_sil_states
  ";
  
  $nonsil_phones =~ s/:/ /g;
  $sil_phones =~ s/:/ /g;
  $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV
  ";
  $sil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV
  ";
  
  print "<Topology>
  ";
  print "<TopologyEntry>
  ";
  print "<ForPhones>
  ";
  print "$nonsil_phones
  ";
  print "</ForPhones>
  ";
  for ($state = 0; $state < $num_nonsil_states; $state++) {
    $statep1 = $state+1;
    print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>
  ";
  }
  print "<State> $num_nonsil_states </State>
  "; # non-emitting final state.
  print "</TopologyEntry>
  ";
  # Now silence phones.  They have a different topology-- apart from the first and
  # last states, it's fully connected.
  $transp = 1.0 / ($num_sil_states-1);
  
  print "<TopologyEntry>
  ";
  print "<ForPhones>
  ";
  print "$sil_phones
  ";
  print "</ForPhones>
  ";
  print "<State> 0 <PdfClass> 0 ";
  for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 
    # emitting state.
    print "<Transition> $nextstate $transp ";
  }
  print "</State>
  ";
  for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
    # themselves and to the last emitting state.
    print "<State> $state <PdfClass> $state ";
    for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
      print "<Transition> $nextstate $transp ";
    }
    print "</State>
  ";
  }
  # Final emitting state (non-skippable).
  $state = $num_sil_states-1;
  print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>
  ";
  # Final nonemitting state:
  print "<State> $num_sil_states </State>
  "; 
  print "</TopologyEntry>
  ";
  print "</Topology>
  ";