Blame view
egs/aishell/v1/local/split_data_enroll_eval.py
842 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
#!/usr/bin/env python3 # Copyright 2017 Bengu Wu # Apache 2.0. # This script splits the test set utt2spk into enroll set and eval set # For each speaker, 3 utterances are randomly selected as enroll samples, # and the others are used as eval samples for evaluation # input: test utt2spk # output: enroll utt2spk, eval utt2spk import sys,random dictutt = {} for line in open(sys.argv[1]): line = line.rstrip('\r\t ') utt, spk = line.split(' ') if spk not in dictutt: dictutt[spk] = [] dictutt[spk].append(utt) fenroll = open(sys.argv[2], 'w') feval = open(sys.argv[3], 'w') for key in dictutt: utts = dictutt[key] random.shuffle(utts) for i in range(0, len(utts)): line = utts[i] + ' ' + key if(i < 3): fenroll.write(line + ' ') else: feval.write(line + ' ') fenroll.close() feval.close() |