Blame view
egs/vystadial_cz/online_demo/build_reference.py
1.82 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env python # encoding: utf-8 from __future__ import unicode_literals from __future__ import print_function import glob import sys import os import codecs def build_reference(wav_scp, ref_path): print(wav_scp, ref_path) with codecs.open(ref_path, 'w', 'utf-8') as w: with codecs.open(wav_scp, 'r', 'utf-8') as scp: for line in scp: name, wavpath = line.strip().split(' ', 1) with codecs.open(wavpath + '.trn', 'r', 'utf-8') as trn: trans = trn.read().strip() w.write(u'%s %s ' % (name, trans)) if __name__ == '__main__': usage = ''' Usage: python %(exec)s (audio_directory|in.scp) decode_directory Where directory contains files "*.scp" and audio files "*.wav" and their transcriptions "*.wav.trn". The "*.scp" files contains of list wav names and their path. The %(exec)s looks for "*.scp" files builds a reference from "*.wav.trn" ''' usage_args = {'exec': sys.argv[0]} if len(sys.argv) != 3: print("Wrong number of arguments", file=sys.stderr) print(usage % {'exec': sys.argv[0]}, file=sys.stderr) sys.exit(1) if sys.argv[1].endswith('scp'): scps = [sys.argv[1]] else: scps = glob.glob(os.path.join(sys.argv[1], '*.scp')) target_dir = sys.argv[2] if not len(scps): print("No '*.scp' files found", file=sys.stderr) print(usage % {'exec': sys.argv[0]}, file=sys.stderr) sys.exit(1) if not os.path.isdir(target_dir): print("No '*.scp' files found", file=sys.stderr) print(usage % {'exec': sys.argv[0]}, file=sys.stderr) sys.exit(1) refers = [os.path.join(target_dir, os.path.basename(scp) + '.tra') for scp in scps] for scp, refer in zip(scps, refers): build_reference(scp, refer) |