data.py 2.69 KB
import argparse
import sys

from utils import SubCommandRunner

import core.data




def filter_file(file, filter, outfile):
    file_path = file
    filter_path = filter
    file_data = core.data.read_id_values(file_path)
    filter_data = core.data.read_lst(filter_path)

    with open(outfile, "w") as out:
        for id_ in filter_data:
            core.data.write_line(id_, file_data[id_], out)
        
    

    

def convert(file, type_from, type):

    pass

def utt2dur(wavscp: str, outfile: str):
    
    import wave
    import contextlib

    with open(wavscp, "r") as f, open(outfile, "w") as of:
        for line in f:
            splited = line.replace("\n", "").split(" ")
            id_ = splited[0]
            wav_ = splited[1]
            duration = 0
            
            with contextlib.closing(wave.open(wav_,'r')) as wav_f:
                frames = wav_f.getnframes()
                rate = wav_f.getframerate()
                duration = frames / float(rate)
            
            of.write(f"{id_} {duration}\n")


if __name__ == "__main__":
    # Main parser
    parser = argparse.ArgumentParser(description="manage files")
    subparsers = parser.add_subparsers(title="actions")

    # Filter ids
    parser_filter = subparsers.add_parser("filter", help="filter a file")
    parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True)
    parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True)
    parser_filter.add_argument("--outfile", type=str, help="output file", required=True)
    parser_filter.set_defaults(which="filter")

    # Convert
    parser_convert = subparsers.add_parser("convert", help="convert a file")
    parser_convert.add_argument("--file", type=str, help="...", required=True)
    parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True)
    parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True)
    parser_convert.set_defaults(which="convert")

    # utt2dur
    parser_utt2dur = subparsers.add_parser("utt2dur", help="generate utt2dur file")
    parser_utt2dur.add_argument("--wavscp", type=str, help="wav file", required=True)
    parser_utt2dur.add_argument("--outfile", type=str, default="utt2dur", help="output file")
    parser_utt2dur.set_defaults(which="utt2dur")

    # TODO: utt2label_to_label2utt

    # TODO: label2utt_to_utt2label

    # Parse
    args = parser.parse_args()

    # Run commands
    runner = SubCommandRunner({
        "convert" : convert,
        "filter": filter_file,
        "utt2dur": utt2dur
    })

    runner.run(args.which, args.__dict__, remove="which")