Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/utils/make_lexicon_fst_silprob.pl 5.09 KB
  #!/usr/bin/env perl
  
  # THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED.  See
  # utils/lang/make_lexicon_fst_silprob.py which is the python-based replacement.
  
  use warnings; #sed replacement for -w perl parameter
  # Copyright 2010-2011  Microsoft Corporation
  #                2013  Johns Hopkins University (author: Daniel Povey)
  #                2015  Hainan Xu
  #                2015  Guoguo Chen
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  
  # makes lexicon FST, in text form, from lexicon which contains (optional)
  # probabilities of pronuniations, and (mandatory) probabilities of silence
  # before and after the pronunciation. This script is almost the same with
  # the make_lexicon_fst.pl script except for the word-dependent silprobs part
  
  if (@ARGV != 4) {
    print STDERR "Usage: $0 lexiconp_silprob_disambig.txt \\
  ";
    print STDERR "       silprob.txt silphone_string sil_disambig_sym > lexiconfst.txt 
  ";
    print STDERR "
  ";
    print STDERR "This script is almost the same as the utils/make_lexicon_fst.pl
  ";
    print STDERR "except here we include word-dependent silence probabilities
  ";
    print STDERR "when making the lexicon FSTs. ";
    print STDERR "For details, see paper 
  http://danielpovey.com/files/2015_interspeech_silprob.pdf
  
  ";
    print STDERR "The lexiconp_silprob_disambig.txt file should have each line like 
  
  ";
    print STDERR "word p(pronunciation|word) p(sil-after|word) correction-term-for-sil ";
    print STDERR "correction-term-for-no-sil phone-1 phone-2 ... phone-N
  
  ";
    print STDERR "The pronunciation would have to include disambiguation symbols;
  ";
    print STDERR "the 2 correction terms above are computed to reflect how much a 
  ";
    print STDERR "word affects the probability of a [non-]silence before it. 
  ";
    print STDERR "Please see the paper (link given above) for detailed descriptions
  ";
    print STDERR "for how the 2 terms are computed.
  
  ";
    print STDERR "The silprob.txt file contains 4 lines, 
  
  ";
    print STDERR "<s> p(sil-after|<s>)
  ";
    print STDERR "</s>_s correction-term-for-sil-for-</s>
  ";
    print STDERR "</s>_n correction-term-for-no-sil-for-</s>
  ";
    print STDERR "overall p(overall-sil)
  
  ";
    print STDERR "Other files are the same as utils/make_lexicon_fst.pl
  ";
  
    exit(1);
  }
  
  $lexfn = shift @ARGV;
  $silprobfile = shift @ARGV;
  
  ($silphone,$sildisambig) = @ARGV;
  
  open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
  open(SP, "<$silprobfile") || die "Error opening word-sil-probs $SP";
  
  $silbeginprob = -1;
  $silendcorrection = -1;
  $nonsilendcorrection = -1;
  $siloverallprob = -1;
  
  while (<SP>) {
    @A = split(" ", $_);
    $w = shift @A;
    if ($w eq "<s>") {
      $silbeginprob = shift @A;
    }
    if ($w eq "</s>_s") {
      $silendcorrection = shift @A;
    }
    if ($w eq "</s>_n") {
      $nonsilendcorrection = shift @A;
    }
    if ($w eq "overall") {
      $siloverallprob = shift @A;
    }
  }
  
  $startstate = 0;
  $nonsilstart = 1;
  $silstart = 2;
  $nextstate = 3;
  
  $cost = -log($silbeginprob);
  print "$startstate\t$silstart\t$silphone\t<eps>\t$cost
  "; # will change these
  $cost = -log(1 - $silbeginprob);
  print "$startstate\t$nonsilstart\t$sildisambig\t<eps>\t$cost
  ";
  
  while (<L>) {
    @A = split(" ", $_);
    $w = shift @A;
    $pron_prob = shift @A;
    if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
      die "Bad pronunciation probability in line $_";
    }
  
    $wordsilprob = shift @A;
    $silwordcorrection = shift @A;
    $nonsilwordcorrection = shift @A;
  
    $pron_cost = -log($pron_prob);
    $wordsilcost = -log($wordsilprob);
    $wordnonsilcost = -log(1.0 - $wordsilprob);
    $silwordcost = -log($silwordcorrection);
    $nonsilwordcost = -log($nonsilwordcorrection);
  
    $first = 1;  # used as a bool, to handle the first phone (adding sils)
    while (@A > 0) {
      $p = shift @A;
  
      if ($first == 1) {
        $newstate = $nextstate++;
  
        # for nonsil before w
        $cost = $nonsilwordcost + $pron_cost;
        print "$nonsilstart\t$newstate\t$p\t$w\t$cost
  ";
  
        # for sil before w
        $cost = $silwordcost + $pron_cost;
        print "$silstart\t$newstate\t$p\t$w\t$cost
  ";
        $first = 0;
      }
      else {
        $oldstate = $nextstate - 1;
        print "$oldstate\t$nextstate\t$p\t<eps>
  ";
        $nextstate++;
      }
      if (@A == 0) {
        $oldstate = $nextstate - 1;
        # for no sil after w
        $cost = $wordnonsilcost;
        print "$oldstate\t$nonsilstart\t$sildisambig\t<eps>\t$cost
  ";
  
        # for sil after w
        $cost = $wordsilcost;
        print "$oldstate\t$silstart\t$silphone\t<eps>\t$cost
  ";
      }
    }
  }
  $cost = -log($silendcorrection);
  print "$silstart\t$cost
  ";
  $cost = -log($nonsilendcorrection);
  print "$nonsilstart\t$cost
  ";