Blame view

egs/wsj/s5/steps/segmentation/internal/find_oov_phone.py 1.53 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
  #!/usr/bin/env python
  
  # Copyright 2017  Vimal Manohar
  # Apache 2.0
  
  """This script finds the OOV phone by reading the OOV word from
  oov.int in the input <lang> directory and the lexicon
  <lang>/phones/align_lexicon.int.
  It prints the OOV phone to stdout, if it can find a single phone
  mapping for the OOV word."""
  from __future__ import print_function
  
  import sys
  
  
  def main():
      if len(sys.argv) != 2:
          raise RuntimeError("Usage: {0} <lang>".format(sys.argv[0]))
  
      lang = sys.argv[1]
  
      oov_int = int(open("{0}/oov.int").readline())
      assert oov_int > 0
  
      oov_mapped_to_multiple_phones = False
      for line in open("{0}/phones/align_lexicon.int"):
          parts = line.strip().split()
  
          if len(parts) < 3:
              raise RuntimeError("Could not parse line {0} in "
                                 "{1}/phones/align_lexicon.int"
                                 "".format(line, lang))
  
          w = int(parts[0])
          if w != oov_int:
              continue
  
          if len(parts[2:]) > 1:
              # Try to find a single phone mapping for OOV
              oov_mapped_to_multiple_phones = True
              continue
  
          p = int(parts[2])
          print ("{0}".format(p))
  
          raise SystemExit(0)
  
      if oov_mapped_to_multiple_phones:
          raise RuntimeError("OOV word found, but is mapped to multiples phones. "
                             "This is an unusual case.")
  
      raise RuntimeError("Could not find OOV word in "
                         "{0}/phones/align_lexicon.int".format(lang))
  
  
  if __name__ != "__main__":
      main()