Blame view
egs/dihard_2018/v1/local/make_voxceleb1_v2.pl
4.48 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
#!/usr/bin/perl # # Copyright 2018 Ewald Enzinger # 2018 David Snyder # 2019 Soonshin Seo # # Usage: make_voxceleb1_v2.pl /export/voxceleb1 dev data/dev # # The VoxCeleb1 corpus underwent several updates that changed the directory and speaker ID format. # The script 'make_voxceleb1.pl' works for the oldest version of the corpus. # This script should be used if you've downloaded the corpus recently. if (@ARGV != 3) { print STDERR "Usage: $0 <path-to-voxceleb1> <dataset> <path-to-data-dir> "; print STDERR "e.g. $0 /export/voxceleb1 dev data/dev "; exit(1); } ($data_base, $dataset, $out_dir) = @ARGV; if ("$dataset" ne "dev" && "$dataset" ne "test") { die "dataset parameter must be 'dev' or 'test'!"; } if (system("mkdir -p $out_dir") != 0) { die "Error making directory $out_dir"; } print "$data_base/$dataset/wav "; opendir my $dh, "$data_base/$dataset/wav" or die "Cannot open directory: $!"; my @spkr_dirs = grep {-d "$data_base/$dataset/wav/$_" && ! /^\.{1,2}$/} readdir($dh); closedir $dh; if ($dataset eq "dev"){ open(SPKR_TRAIN, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; open(WAV_TRAIN, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; foreach (@spkr_dirs) { my $spkr_id = $_; opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); closedir $dh; foreach (@rec_dirs) { my $rec_id = $_; opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); closedir $dh; foreach (@files) { my $name = $_; my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; my $utt_id = "$spkr_id-$rec_id-$name"; print WAV_TRAIN "$utt_id", " $wav", " "; print SPKR_TRAIN "$utt_id", " $spkr_id", " "; } } } close(SPKR_TRAIN) or die; close(WAV_TRAIN) or die; } if ($dataset eq "test"){ if (! -e "$data_base/voxceleb1_test_v2.txt") { system("wget -O $data_base/voxceleb1_test_v2.txt http://www.openslr.org/resources/49/voxceleb1_test_v2.txt"); } open(TRIAL_IN, "<", "$data_base/voxceleb1_test_v2.txt") or die "could not open the verification trials file $data_base/voxceleb1_test_v2.txt"; open(TRIAL_OUT, ">", "$out_dir/trials") or die "Could not open the output file $out_test_dir/trials"; open(SPKR_TEST, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; open(WAV_TEST, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; my $test_spkrs = (); while (<TRIAL_IN>) { chomp; my ($tar_or_non, $path1, $path2) = split; # Create entry for left-hand side of trial my ($spkr_id, $rec_id, $name) = split('/', $path1); my $utt_id1 = "$spkr_id-$rec_id-$name"; $test_spkrs{$spkr_id} = (); # Create entry for right-hand side of trial my ($spkr_id, $rec_id, $name) = split('/', $path2); my $utt_id2 = "$spkr_id-$rec_id-$name"; $test_spkrs{$spkr_id} = (); my $target = "nontarget"; if ($tar_or_non eq "1") { $target = "target"; } print TRIAL_OUT "$utt_id1 $utt_id2 $target "; } foreach (@spkr_dirs) { my $spkr_id = $_; opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); closedir $dh; foreach (@rec_dirs) { my $rec_id = $_; opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); closedir $dh; foreach (@files) { my $name = $_; my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; my $utt_id = "$spkr_id-$rec_id-$name"; print WAV_TEST "$utt_id", " $wav", " "; print SPKR_TEST "$utt_id", " $spkr_id", " "; } } } close(SPKR_TEST) or die; close(WAV_TEST) or die; close(TRIAL_OUT) or die; close(TRIAL_IN) or die; } if (system( "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { die "Error creating spk2utt file in directory $out_dir"; } system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir"); if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { die "Error validating directory $out_dir"; } |