Blame view
egs/wsj/s5/steps/data/make_musan.py
6.65 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
#!/usr/bin/env python3 # Copyright 2015 David Snyder # 2019 Phani Sankar Nidadavolu # Apache 2.0. # # This file is meant to be invoked by make_musan.sh. import os, sys, argparse sys.path.append("steps/data/") sys.path.insert(0, 'steps/') import libs.common as common_lib def get_args(): parser = argparse.ArgumentParser(description="Create MUSAN corpus", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--use-vocals", type=str, dest='use_vocals', default=True, action=common_lib.StrToBoolAction, choices=["true", "false"], help='use vocals from the music corpus') parser.add_argument('--sampling-rate', type=int, default=16000, help="Sampling rate of the source data. If a positive integer is specified with this option, " "the MUSAN corpus will be resampled to the rate of the source data." "Original MUSAN corpus is sampled at 16KHz. Defaults to 16000 Hz") parser.add_argument("in_dir", help="Input data directory") parser.add_argument("out_dir", help="Output data directory") print(' '.join(sys.argv)) args = parser.parse_args() args = check_args(args) return args def check_args(args): if not os.path.exists(args.in_dir): raise Exception('input dir {0} does not exist'.format(args.in_dir)) if not os.path.exists(args.out_dir): print("Preparing {0}/musan...".format(args.out_dir)) os.makedirs(args.out_dir) return args def process_music_annotations(path): utt2spk = {} utt2vocals = {} lines = open(path, 'r').readlines() for line in lines: utt, genres, vocals, musician = line.rstrip().split()[:4] # For this application, the musican ID isn't important utt2spk[utt] = utt utt2vocals[utt] = vocals == "Y" return utt2spk, utt2vocals def prepare_music(root_dir, use_vocals, sampling_rate): utt2vocals = {} utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 music_dir = os.path.join(root_dir, "music") for root, dirs, files in os.walk(music_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path elif str(file) == "ANNOTATIONS": utt2spk_part, utt2vocals_part = process_music_annotations(file_path) utt2spk.update(utt2spk_part) utt2vocals.update(utt2vocals_part) utt2spk_str = "" utt2wav_str = "" for utt in utt2vocals: if utt in utt2wav: if use_vocals or not utt2vocals[utt]: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + " " if sampling_rate == 16000: utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + " " else: utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ " {fs} -t wav - | ".format(fs=sampling_rate) num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In music directory, processed {} files; {} had missing wav data".format( num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_speech(root_dir, sampling_rate): utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 speech_dir = os.path.join(root_dir, "speech") for root, dirs, files in os.walk(speech_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path utt2spk[utt] = utt utt2spk_str = "" utt2wav_str = "" for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + " " if sampling_rate == 16000: utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + " " else: utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ " {fs} -t wav - | ".format(fs=sampling_rate) num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In speech directory, processed {} files; {} had missing wav data".format( num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_noise(root_dir, sampling_rate): utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 noise_dir = os.path.join(root_dir, "noise") for root, dirs, files in os.walk(noise_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path utt2spk[utt] = utt utt2spk_str = "" utt2wav_str = "" for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + " " if sampling_rate == 16000: utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + " " else: utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ " {fs} -t wav - | ".format(fs=sampling_rate) num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In noise directory, processed {} files; {} had missing wav data".format( num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def main(): args = get_args() in_dir = args.in_dir out_dir = args.out_dir use_vocals = args.use_vocals sampling_rate = args.sampling_rate utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals, sampling_rate) utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, sampling_rate) utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, sampling_rate) utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') wav_fi.write(utt2wav) utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') utt2spk_fi.write(utt2spk) if __name__=="__main__": main() |