Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py 6.5 KB
  #!/usr/bin/env python
  
  
  # Copyright 2016 Johns Hopkins University (author: Daniel Povey)
  # Apache 2.0.
  
  from __future__ import print_function
  from __future__ import division
  import argparse
  import sys, os
  from collections import defaultdict
  
  
  parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh "
                                   "to print information about lattice depths broken down per phone. "
                                   "The normal output of this script is written to the standard output "
                                   "and is human readable (on crashes, we'll print an error to stderr.")
  
  parser.add_argument("--frequency-cutoff-percentage", type = float,
                      default = 0.5, help="Cutoff, expressed as a percentage "
                      "(between 0 and 100), of frequency at which we print stats "
                      "for a phone.")
  
  parser.add_argument("lang",
                      help="Language directory, e.g. data/lang.")
  
  args = parser.parse_args()
  
  # set up phone_int2text to map from phone to printed form.
  phone_int2text = {}
  try:
      f = open(args.lang + "/phones.txt", "r");
      for line in f.readlines():
          [ word, number] = line.split()
          phone_int2text[int(number)] = word
      f.close()
  except:
      sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
              args.lang))
  # this is a special case... for begin- and end-of-sentence stats,
  # we group all nonsilence phones together.
  phone_int2text[0] = 'nonsilence'
  
  # populate the set and 'nonsilence', which will contain the integer phone-ids of
  # nonsilence phones (and disambig phones, which won't matter).
  nonsilence = set(phone_int2text.keys())
  nonsilence.remove(0)
  try:
      # open lang/phones/silence.csl-- while there are many ways of obtaining the
      # silence/nonsilence phones, we read this because it's present in graph
      # directories as well as lang directories.
      filename = "{0}/phones/silence.csl".format(args.lang)
      f = open(filename, "r")
      line = f.readline()
      for silence_phone in line.split(":"):
          nonsilence.remove(int(silence_phone))
      f.close()
  except Exception as e:
      sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
              args.lang, str(e)))
  
  # phone_depth_counts is a dict of dicts.
  # for each integer phone-id 'phone',
  # phone_depth_counts[phone] is a map from depth to count (of frames on which
  # that was the 1-best phone in the alignment, and the lattice depth
  # had that value).  So we'd access it as
  # count = phone_depth_counts[phone][depth].
  
  phone_depth_counts = dict()
  
  # note: -1 is for all phones put in one bucket.
  for p in [ -1 ] + list(phone_int2text.keys()):
      phone_depth_counts[p] = defaultdict(int)
  
  total_frames = 0
  
  while True:
      line = sys.stdin.readline()
      if line == '':
          break
      a = line.split()
      if len(a) != 3:
          sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
      try:
          phone, depth, count = [ int(x) for x in a ]
  
          phone_depth_counts[phone][depth] += count
          total_frames += count
          if phone in nonsilence:
              nonsilence_phone = 0
              phone_depth_counts[nonsilence_phone][depth] += count
          universal_phone = -1
          phone_depth_counts[universal_phone][depth] += count
      except Exception as e:
          sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} "
                   "seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
  
  if total_frames == 0:
      sys.exit("analyze_lattice_depth_stats.py: read no input")
  
  
  # If depth_to_count is a map from depth-in-frames to count,
  # return the depth-in-frames that equals the (fraction * 100)'th
  # percentile of the distribution.
  def GetPercentile(depth_to_count, fraction):
      this_total_frames = sum(depth_to_count.values())
      if this_total_frames == 0:
          return 0
      else:
          items = sorted(depth_to_count.items())
          count_cutoff = int(fraction * this_total_frames)
          cur_count_total = 0
          for depth,count in items:
              assert count >= 0
              cur_count_total += count
              if cur_count_total >= count_cutoff:
                  return depth
          assert false # we shouldn't reach here.
  
  def GetMean(depth_to_count):
      this_total_frames = sum(depth_to_count.values())
      if this_total_frames == 0:
          return 0.0
      this_total_depth = sum([ float(l * c) for l,c in depth_to_count.items() ])
      return this_total_depth / this_total_frames
  
  
  print("The total amount of data analyzed assuming 100 frames per second "
        "is {0} hours".format("%.1f" % (total_frames / 360000.0)))
  
  # the next block prints lines like (to give some examples):
  # Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1
  # Phone SIL accounts for 25.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.5
  # Phone Z_E accounts for 2.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,6) and mean=2.9
  # ...
  
  
  # sort the phones in decreasing order of count.
  for phone,depths in sorted(phone_depth_counts.items(), key = lambda x : -sum(x[1].values())):
  
      frequency_percentage = sum(depths.values()) * 100.0 / total_frames
      if frequency_percentage < args.frequency_cutoff_percentage:
          continue
  
  
      depth_percentile_10 = GetPercentile(depths, 0.1)
      depth_percentile_50 = GetPercentile(depths, 0.5)
      depth_percentile_90 = GetPercentile(depths, 0.9)
      depth_mean = GetMean(depths)
  
      if phone > 0:
          try:
              phone_text = phone_int2text[phone]
          except:
              sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
                       "(lang/alignment mismatch?)".format(phone))
          preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format(
              phone_text = phone_text, percent = "%.1f" % frequency_percentage)
      elif phone == 0:
          preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format(
              percent = "%.1f" % frequency_percentage)
      else:
          assert phone == -1
          preamble = "Overall,";
  
      print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
              preamble = preamble,
              p10 = depth_percentile_10,
              p50 = depth_percentile_50,
              p90 = depth_percentile_90,
              mean = "%.1f" % depth_mean))