Blame view
egs/lre/v1/local/make_lre03.pl
3.18 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
#!/usr/bin/env perl use warnings; #sed replacement for -w perl parameter # # Copyright 2014 David Snyder Daniel Povey if (@ARGV != 2) { print STDERR "Usage: $0 <path-to-LDC2006S31> <output-dir> "; print STDERR "e.g. $0 /export/corpora4/LDC/LDC2006S31 data "; exit(1); } ($base, $out_base_dir) = @ARGV; $db_file = $base . "/docs/LID03_KEY.v3"; open(DB, "<$db_file") || die "Failed opening input file $db_file"; $out_dir = $out_base_dir . "/lre03"; $data_dir = $base . "/data/lid03e1"; if (system("mkdir -p $out_dir") != 0) { die "Error making directory $out_dir"; } open(WAV, ">$out_dir" . '/wav.scp') || die "Failed opening output file $out_dir/wav.scp"; open(UTT2LANG, ">$out_dir" . '/utt2lang') || die "Failed opening output file $out_dir/utt2lang"; open(UTT2SPK, ">$out_dir" . '/utt2spk') || die "Failed opening output file $out_dir/utt2spk"; open(SPK2GEN, ">$out_dir" . '/spk2gender') || die "Failed opening output file $out_dir/spk2gender"; while($line = <DB>) { chomp($line); @toks = split(" ", $line); $seg_id = lc $toks[0]; $lang = lc $toks[1]; # $conv_id = $toks[2]; $channel = $toks[3]; $duration = $toks[4]; $gender = lc $toks[6]; $channel = substr($channel, 1, 1); # they are either A1 or B2: we want the # numeric channel. $wav = "$base/data/lid03e1/test/$duration/$seg_id.sph"; if (! -f $wav) { print STDERR "No such file $wav "; next; } $uttId = "lre03_${seg_id}"; print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav | "; print UTT2SPK "$uttId $uttId "; print UTT2LANG "$uttId $lang "; print SPK2GEN "$uttId $gender "; } close(WAV) || die; close(UTT2SPK) || die; close(UTT2LANG) || die; close(SPK2GEN) || die; close(DB) || die; system("utils/fix_data_dir.sh $out_dir"); (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") == 0) || die "Error validating data dir."; for $set ("lid96d1", "lid96e1") { $out_dir = $out_base_dir . "/$set"; $data_dir = $base . "/data/$set/test/"; if (system("mkdir -p $out_dir") != 0) { die "Error making directory $out_dir"; } open(WAV, ">$out_dir" . '/wav.scp') || die "Failed opening output file $out_dir/wav.scp"; open(UTT2LANG, ">$out_dir" . '/utt2lang') || die "Failed opening output file $out_dir/utt2lang"; open(UTT2SPK, ">$out_dir" . '/utt2spk') || die "Failed opening output file $out_dir/utt2spk"; for $duration ("10", "3", "30") { $key = "$data_dir/$duration/seg_lang.ndx"; open(KEY, "<$key") || die "Failed opening input file $key"; while ($line = <KEY>) { chomp($line); ($seg_id, $lang) = split(" ", $line); $wav = "$data_dir/$duration/$seg_id.sph"; $uttId = "${set}_${seg_id}"; print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav | "; print UTT2SPK "$uttId $uttId "; print UTT2LANG "$uttId $lang "; # Gender information is absent here, not outputting spk2gender file. } close(KEY) || die; } close(WAV) || die; close(UTT2SPK) || die; close(UTT2LANG) || die; system("utils/fix_data_dir.sh $out_dir"); (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") == 0) || die "Error validating data dir."; } |