Blame view

egs/wsj/s5/utils/make_lexicon_fst_silprob.pl 5.09 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  #!/usr/bin/env perl
  
  # THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED.  See
  # utils/lang/make_lexicon_fst_silprob.py which is the python-based replacement.
  
  use warnings; #sed replacement for -w perl parameter
  # Copyright 2010-2011  Microsoft Corporation
  #                2013  Johns Hopkins University (author: Daniel Povey)
  #                2015  Hainan Xu
  #                2015  Guoguo Chen
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  
  # makes lexicon FST, in text form, from lexicon which contains (optional)
  # probabilities of pronuniations, and (mandatory) probabilities of silence
  # before and after the pronunciation. This script is almost the same with
  # the make_lexicon_fst.pl script except for the word-dependent silprobs part
  
  if (@ARGV != 4) {
    print STDERR "Usage: $0 lexiconp_silprob_disambig.txt \\
  ";
    print STDERR "       silprob.txt silphone_string sil_disambig_sym > lexiconfst.txt 
  ";
    print STDERR "
  ";
    print STDERR "This script is almost the same as the utils/make_lexicon_fst.pl
  ";
    print STDERR "except here we include word-dependent silence probabilities
  ";
    print STDERR "when making the lexicon FSTs. ";
    print STDERR "For details, see paper 
  http://danielpovey.com/files/2015_interspeech_silprob.pdf
  
  ";
    print STDERR "The lexiconp_silprob_disambig.txt file should have each line like 
  
  ";
    print STDERR "word p(pronunciation|word) p(sil-after|word) correction-term-for-sil ";
    print STDERR "correction-term-for-no-sil phone-1 phone-2 ... phone-N
  
  ";
    print STDERR "The pronunciation would have to include disambiguation symbols;
  ";
    print STDERR "the 2 correction terms above are computed to reflect how much a 
  ";
    print STDERR "word affects the probability of a [non-]silence before it. 
  ";
    print STDERR "Please see the paper (link given above) for detailed descriptions
  ";
    print STDERR "for how the 2 terms are computed.
  
  ";
    print STDERR "The silprob.txt file contains 4 lines, 
  
  ";
    print STDERR "<s> p(sil-after|<s>)
  ";
    print STDERR "</s>_s correction-term-for-sil-for-</s>
  ";
    print STDERR "</s>_n correction-term-for-no-sil-for-</s>
  ";
    print STDERR "overall p(overall-sil)
  
  ";
    print STDERR "Other files are the same as utils/make_lexicon_fst.pl
  ";
  
    exit(1);
  }
  
  $lexfn = shift @ARGV;
  $silprobfile = shift @ARGV;
  
  ($silphone,$sildisambig) = @ARGV;
  
  open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
  open(SP, "<$silprobfile") || die "Error opening word-sil-probs $SP";
  
  $silbeginprob = -1;
  $silendcorrection = -1;
  $nonsilendcorrection = -1;
  $siloverallprob = -1;
  
  while (<SP>) {
    @A = split(" ", $_);
    $w = shift @A;
    if ($w eq "<s>") {
      $silbeginprob = shift @A;
    }
    if ($w eq "</s>_s") {
      $silendcorrection = shift @A;
    }
    if ($w eq "</s>_n") {
      $nonsilendcorrection = shift @A;
    }
    if ($w eq "overall") {
      $siloverallprob = shift @A;
    }
  }
  
  $startstate = 0;
  $nonsilstart = 1;
  $silstart = 2;
  $nextstate = 3;
  
  $cost = -log($silbeginprob);
  print "$startstate\t$silstart\t$silphone\t<eps>\t$cost
  "; # will change these
  $cost = -log(1 - $silbeginprob);
  print "$startstate\t$nonsilstart\t$sildisambig\t<eps>\t$cost
  ";
  
  while (<L>) {
    @A = split(" ", $_);
    $w = shift @A;
    $pron_prob = shift @A;
    if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
      die "Bad pronunciation probability in line $_";
    }
  
    $wordsilprob = shift @A;
    $silwordcorrection = shift @A;
    $nonsilwordcorrection = shift @A;
  
    $pron_cost = -log($pron_prob);
    $wordsilcost = -log($wordsilprob);
    $wordnonsilcost = -log(1.0 - $wordsilprob);
    $silwordcost = -log($silwordcorrection);
    $nonsilwordcost = -log($nonsilwordcorrection);
  
    $first = 1;  # used as a bool, to handle the first phone (adding sils)
    while (@A > 0) {
      $p = shift @A;
  
      if ($first == 1) {
        $newstate = $nextstate++;
  
        # for nonsil before w
        $cost = $nonsilwordcost + $pron_cost;
        print "$nonsilstart\t$newstate\t$p\t$w\t$cost
  ";
  
        # for sil before w
        $cost = $silwordcost + $pron_cost;
        print "$silstart\t$newstate\t$p\t$w\t$cost
  ";
        $first = 0;
      }
      else {
        $oldstate = $nextstate - 1;
        print "$oldstate\t$nextstate\t$p\t<eps>
  ";
        $nextstate++;
      }
      if (@A == 0) {
        $oldstate = $nextstate - 1;
        # for no sil after w
        $cost = $wordnonsilcost;
        print "$oldstate\t$nonsilstart\t$sildisambig\t<eps>\t$cost
  ";
  
        # for sil after w
        $cost = $wordsilcost;
        print "$oldstate\t$silstart\t$silphone\t<eps>\t$cost
  ";
      }
    }
  }
  $cost = -log($silendcorrection);
  print "$silstart\t$cost
  ";
  $cost = -log($nonsilendcorrection);
  print "$nonsilstart\t$cost
  ";