Blame view
egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
6.5 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
#!/usr/bin/env python # Copyright 2016 Johns Hopkins University (author: Daniel Povey) # Apache 2.0. from __future__ import print_function from __future__ import division import argparse import sys, os from collections import defaultdict parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh " "to print information about lattice depths broken down per phone. " "The normal output of this script is written to the standard output " "and is human readable (on crashes, we'll print an error to stderr.") parser.add_argument("--frequency-cutoff-percentage", type = float, default = 0.5, help="Cutoff, expressed as a percentage " "(between 0 and 100), of frequency at which we print stats " "for a phone.") parser.add_argument("lang", help="Language directory, e.g. data/lang.") args = parser.parse_args() # set up phone_int2text to map from phone to printed form. phone_int2text = {} try: f = open(args.lang + "/phones.txt", "r"); for line in f.readlines(): [ word, number] = line.split() phone_int2text[int(number)] = word f.close() except: sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format( args.lang)) # this is a special case... for begin- and end-of-sentence stats, # we group all nonsilence phones together. phone_int2text[0] = 'nonsilence' # populate the set and 'nonsilence', which will contain the integer phone-ids of # nonsilence phones (and disambig phones, which won't matter). nonsilence = set(phone_int2text.keys()) nonsilence.remove(0) try: # open lang/phones/silence.csl-- while there are many ways of obtaining the # silence/nonsilence phones, we read this because it's present in graph # directories as well as lang directories. filename = "{0}/phones/silence.csl".format(args.lang) f = open(filename, "r") line = f.readline() for silence_phone in line.split(":"): nonsilence.remove(int(silence_phone)) f.close() except Exception as e: sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format( args.lang, str(e))) # phone_depth_counts is a dict of dicts. # for each integer phone-id 'phone', # phone_depth_counts[phone] is a map from depth to count (of frames on which # that was the 1-best phone in the alignment, and the lattice depth # had that value). So we'd access it as # count = phone_depth_counts[phone][depth]. phone_depth_counts = dict() # note: -1 is for all phones put in one bucket. for p in [ -1 ] + list(phone_int2text.keys()): phone_depth_counts[p] = defaultdict(int) total_frames = 0 while True: line = sys.stdin.readline() if line == '': break a = line.split() if len(a) != 3: sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line) try: phone, depth, count = [ int(x) for x in a ] phone_depth_counts[phone][depth] += count total_frames += count if phone in nonsilence: nonsilence_phone = 0 phone_depth_counts[nonsilence_phone][depth] += count universal_phone = -1 phone_depth_counts[universal_phone][depth] += count except Exception as e: sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} " "seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e))) if total_frames == 0: sys.exit("analyze_lattice_depth_stats.py: read no input") # If depth_to_count is a map from depth-in-frames to count, # return the depth-in-frames that equals the (fraction * 100)'th # percentile of the distribution. def GetPercentile(depth_to_count, fraction): this_total_frames = sum(depth_to_count.values()) if this_total_frames == 0: return 0 else: items = sorted(depth_to_count.items()) count_cutoff = int(fraction * this_total_frames) cur_count_total = 0 for depth,count in items: assert count >= 0 cur_count_total += count if cur_count_total >= count_cutoff: return depth assert false # we shouldn't reach here. def GetMean(depth_to_count): this_total_frames = sum(depth_to_count.values()) if this_total_frames == 0: return 0.0 this_total_depth = sum([ float(l * c) for l,c in depth_to_count.items() ]) return this_total_depth / this_total_frames print("The total amount of data analyzed assuming 100 frames per second " "is {0} hours".format("%.1f" % (total_frames / 360000.0))) # the next block prints lines like (to give some examples): # Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1 # Phone SIL accounts for 25.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.5 # Phone Z_E accounts for 2.5% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,6) and mean=2.9 # ... # sort the phones in decreasing order of count. for phone,depths in sorted(phone_depth_counts.items(), key = lambda x : -sum(x[1].values())): frequency_percentage = sum(depths.values()) * 100.0 / total_frames if frequency_percentage < args.frequency_cutoff_percentage: continue depth_percentile_10 = GetPercentile(depths, 0.1) depth_percentile_50 = GetPercentile(depths, 0.5) depth_percentile_90 = GetPercentile(depths, 0.9) depth_mean = GetMean(depths) if phone > 0: try: phone_text = phone_int2text[phone] except: sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt " "(lang/alignment mismatch?)".format(phone)) preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format( phone_text = phone_text, percent = "%.1f" % frequency_percentage) elif phone == 0: preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format( percent = "%.1f" % frequency_percentage) else: assert phone == -1 preamble = "Overall,"; print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format( preamble = preamble, p10 = depth_percentile_10, p50 = depth_percentile_50, p90 = depth_percentile_90, mean = "%.1f" % depth_mean)) |