make_lre09.pl
2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env perl
use warnings; #sed replacement for -w perl parameter
#
# Copyright 2014 David Snyder
if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-LRE2009/eval> <output-dir>\n";
print STDERR "e.g. $0 /export/corpora5/NIST/LRE/LRE2009/eval data\n";
exit(1);
}
($base, $out_base_dir) = @ARGV;
$out_dir = $out_base_dir . "/lre09";
$tmp_dir = "$out_dir/tmp";
$db_file = $base . "/keys/NIST_LRE09_segment.key.v0.txt";
open(DB, "<$db_file")
|| die "Failed opening input file $db_file";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $out_dir";
}
open(WAV, ">$out_dir" . '/wav.scp')
|| die "Failed opening output file $out_dir/wav.scp";
open(UTT2LANG, ">$out_dir" . '/utt2lang')
|| die "Failed opening output file $out_dir/utt2lang";
open(UTT2SPK, ">$out_dir" . '/utt2spk')
|| die "Failed opening output file $out_dir/utt2spk";
if (system("find $base -name '*.sph' > $tmp_dir/sph.list")
!= 0) {
die "Error getting list of sph files";
}
open(WAVLIST, "<", "$tmp_dir/sph.list") or die "cannot open wav list";
%wav = ();
while($sph = <WAVLIST>) {
chomp($sph);
@A = split("/", $sph);
$basename = $A[$#A];
$raw_basename = $basename;
$raw_basename =~ s/\.sph$// || die "bad basename $basename";
$wav{$raw_basename} = $sph;
}
close(WAVLIST) || die;
while($line = <DB>) {
chomp($line);
if (index($line, "#") != -1) {
next;
}
@toks = split(",", $line);
$seg_id = lc $toks[0];
$lang = lc $toks[1];
$channel = $toks[5];
if ($channel eq "X") {
next;
} elsif ($channel eq "a") {
$channel = "1";
} elsif ($channel eq "b") {
$channel = "2";
} else {
die "Invalid channel $channel";
}
if (! -f $wav{$seg_id}) {
print STDERR "No such file $wav{$seg_id}\n";
next;
}
$uttId = "lre09_${seg_id}_${channel}";
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav{$seg_id} |\n";
print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId $lang\n";
}
close(WAV) || die;
close(UTT2SPK) || die;
close(UTT2LANG) || die;
close(DB) || die;
system("rm -r $tmp_dir");
system("utils/fix_data_dir.sh $out_dir");
(system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") == 0)
|| die "Error validating data dir.";