Blame view
egs/aspire/s5/local/multi_condition/create_uniform_segments.py
3.47 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#!/usr/bin/env python # Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). Apache 2.0. # creates a segments file in the provided data directory # into uniform segments with specified window and overlap from __future__ import division import imp, sys, argparse, os, math, subprocess min_segment_length = 10 # in seconds def segment(total_length, window_length, overlap = 0): increment = window_length - overlap num_windows = int(math.ceil(float(total_length)/increment)) segments = [(x * increment, min( total_length, (x * increment) + window_length)) for x in range(0, num_windows)] if segments[-1][1] - segments[-1][0] < min_segment_length: segments[-2] = (segments[-2][0], segments[-1][1]) segments.pop() return segments def get_wave_segments(wav_command, window_length, overlap): raw_output = subprocess.check_output(wav_command+" sox -t wav - -n stat 2>&1 | grep Length ", shell = True) parts = raw_output.split(":") if parts[0].strip() != "Length (seconds)": raise Exception("Failed while processing file ", wav_command) total_length = float(parts[1]) segments = segment(total_length, window_length, overlap) return segments def prepare_segments_file(kaldi_data_dir, window_length, overlap): if not os.path.exists(kaldi_data_dir+'/wav.scp'): raise Exception("Not a proper kaldi data directory") ids = [] files = [] for line in open(kaldi_data_dir+'/wav.scp').readlines(): parts = line.split() ids.append(parts[0]) files.append(" ".join(parts[1:])) segments_total = [] segments_per_recording = [] for i in range(0, len(ids)): segments = get_wave_segments(files[i], window_length, overlap) segments_current_recording = [] for segment in segments: segment_string = "{0}-{1:06}-{2:06} {0} {3} {4}".format(ids[i], int(segment[0] * 1000), int(segment[1]* 1000), segment[0], segment[1]) segments_total.append(segment_string) segments_current_recording.append(segment_string.split()[0]) segments_per_recording.append([ids[i], segments_current_recording]) return segments_total, segments_per_recording if __name__ == "__main__": usage = """ Python script to create segments file with uniform segment given the kaldi data directory.""" sys.stderr.write(str(" ".join(sys.argv))) main_parser = argparse.ArgumentParser(usage) parser = argparse.ArgumentParser() parser.add_argument('--window-length', type = float, default = 30.0, help = 'length of the window used to cut the segment') parser.add_argument('--overlap', type = float, default = 5.0, help = 'overlap of neighboring windows') parser.add_argument('data_dir', help='directory such as data/train') params = parser.parse_args() # write the segments file segments_file = open(params.data_dir+"/segments", "w") segments, segments_per_recording = prepare_segments_file(params.data_dir, params.window_length, params.overlap) segments_file.write(" ".join(segments)) segments_file.close() utt2spk_file = open(params.data_dir + "/utt2spk", "w") spk2utt_file = open(params.data_dir + "/spk2utt", "w") # write the utt2spk file # assumes the recording id is the speaker ir for i in range(len(segments_per_recording)): segments = segments_per_recording[i][1] recording = segments_per_recording[i][0] spk2utt_file.write("{0} {1} ".format(recording, " ".join(segments))) for segment in segments: utt2spk_file.write("{0} {1} ".format(segment, recording)) spk2utt_file.close() utt2spk_file.close() |