Blame view

egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py 6.5 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  #!/usr/bin/env python
  
  
  # Copyright 2016 Johns Hopkins University (author: Daniel Povey)
  # Apache 2.0.
  
  from __future__ import print_function
  from __future__ import division
  import argparse
  import sys, os
  from collections import defaultdict
  
  
  parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh "
                                   "to print information about lattice depths broken down per phone. "
                                   "The normal output of this script is written to the standard output "
                                   "and is human readable (on crashes, we'll print an error to stderr.")
  
  parser.add_argument("--frequency-cutoff-percentage", type = float,
                      default = 0.5, help="Cutoff, expressed as a percentage "
                      "(between 0 and 100), of frequency at which we print stats "
                      "for a phone.")
  
  parser.add_argument("lang",
                      help="Language directory, e.g. data/lang.")
  
  args = parser.parse_args()
  
  # set up phone_int2text to map from phone to printed form.
  phone_int2text = {}
  try:
      f = open(args.lang + "/phones.txt", "r");
      for line in f.readlines():
          [ word, number] = line.split()
          phone_int2text[int(number)] = word
      f.close()
  except:
      sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
              args.lang))
  # this is a special case... for begin- and end-of-sentence stats,
  # we group all nonsilence phones together.
  phone_int2text[0] = 'nonsilence'
  
  # populate the set and 'nonsilence', which will contain the integer phone-ids of
  # nonsilence phones (and disambig phones, which won't matter).
  nonsilence = set(phone_int2text.keys())
  nonsilence.remove(0)
  try:
      # open lang/phones/silence.csl-- while there are many ways of obtaining the
      # silence/nonsilence phones, we read this because it's present in graph
      # directories as well as lang directories.
      filename = "{0}/phones/silence.csl".format(args.lang)
      f = open(filename, "r")
      line = f.readline()
      for silence_phone in line.split(":"):
          nonsilence.remove(int(silence_phone))
      f.close()
  except Exception as e:
      sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
              args.lang, str(e)))
  
  # phone_depth_counts is a dict of dicts.
  # for each integer phone-id 'phone',
  # phone_depth_counts[phone] is a map from depth to count (of frames on which
  # that was the 1-best phone in the alignment, and the lattice depth
  # had that value).  So we'd access it as
  # count = phone_depth_counts[phone][depth].
  
  phone_depth_counts = dict()
  
  # note: -1 is for all phones put in one bucket.
  for p in [ -1 ] + list(phone_int2text.keys()):
      phone_depth_counts[p] = defaultdict(int)
  
  total_frames = 0
  
  while True:
      line = sys.stdin.readline()
      if line == '':
          break
      a = line.split()
      if len(a) != 3:
          sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
      try:
          phone, depth, count = [ int(x) for x in a ]
  
          phone_depth_counts[phone][depth] += count
          total_frames += count
          if phone in nonsilence:
              nonsilence_phone = 0
              phone_depth_counts[nonsilence_phone][depth] += count
          universal_phone = -1
          phone_depth_counts[universal_phone][depth] += count
      except Exception as e:
          sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} "
                   "seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
  
  if total_frames == 0:
      sys.exit("analyze_lattice_depth_stats.py: read no input")
  
  
  # If depth_to_count is a map from depth-in-frames to count,
  # return the depth-in-frames that equals the (fraction * 100)'th
  # percentile of the distribution.
  def GetPercentile(depth_to_count, fraction):
      this_total_frames = sum(depth_to_count.values())
      if this_total_frames == 0:
          return 0
      else:
          items = sorted(depth_to_count.items())
          count_cutoff = int(fraction * this_total_frames)
          cur_count_total = 0
          for depth,count in items:
              assert count >= 0
              cur_count_total += count
              if cur_count_total >= count_cutoff:
                  return depth
          assert false # we shouldn't reach here.
  
  def GetMean(depth_to_count):
      this_total_frames = sum(depth_to_count.values())
      if this_total_frames == 0:
          return 0.0
      this_total_depth = sum([ float(l * c) for l,c in depth_to_count.items() ])
      return this_total_depth / this_total_frames
  
  
  print("The total amount of data analyzed assuming 100 frames per second "
        "is {0} hours".format("%.1f" % (total_frames / 360000.0)))
  
  # the next block prints lines like (to give some examples):
  # Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1
  # Phone SIL accounts for 25.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.5
  # Phone Z_E accounts for 2.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,6) and mean=2.9
  # ...
  
  
  # sort the phones in decreasing order of count.
  for phone,depths in sorted(phone_depth_counts.items(), key = lambda x : -sum(x[1].values())):
  
      frequency_percentage = sum(depths.values()) * 100.0 / total_frames
      if frequency_percentage < args.frequency_cutoff_percentage:
          continue
  
  
      depth_percentile_10 = GetPercentile(depths, 0.1)
      depth_percentile_50 = GetPercentile(depths, 0.5)
      depth_percentile_90 = GetPercentile(depths, 0.9)
      depth_mean = GetMean(depths)
  
      if phone > 0:
          try:
              phone_text = phone_int2text[phone]
          except:
              sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
                       "(lang/alignment mismatch?)".format(phone))
          preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format(
              phone_text = phone_text, percent = "%.1f" % frequency_percentage)
      elif phone == 0:
          preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format(
              percent = "%.1f" % frequency_percentage)
      else:
          assert phone == -1
          preamble = "Overall,";
  
      print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
              preamble = preamble,
              p10 = depth_percentile_10,
              p50 = depth_percentile_50,
              p90 = depth_percentile_90,
              mean = "%.1f" % depth_mean))