Blame view

egs/lre/v1/local/make_lre03.pl 3.18 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  #!/usr/bin/env perl
  use warnings; #sed replacement for -w perl parameter
  #
  # Copyright 2014  David Snyder  Daniel Povey
   
  
  
  if (@ARGV != 2) {
    print STDERR "Usage: $0 <path-to-LDC2006S31> <output-dir>
  ";
    print STDERR "e.g. $0 /export/corpora4/LDC/LDC2006S31 data
  ";
    exit(1);
  }
  
  ($base, $out_base_dir) = @ARGV;
  
  $db_file = $base . "/docs/LID03_KEY.v3";
  open(DB, "<$db_file")
    || die "Failed opening input file $db_file";
  
  $out_dir = $out_base_dir . "/lre03";
  $data_dir = $base . "/data/lid03e1";
  if (system("mkdir -p $out_dir") != 0) {
    die "Error making directory $out_dir"; 
  }
  
  open(WAV, ">$out_dir" . '/wav.scp') 
    || die "Failed opening output file $out_dir/wav.scp";
  open(UTT2LANG, ">$out_dir" . '/utt2lang') 
    || die "Failed opening output file $out_dir/utt2lang";
  open(UTT2SPK, ">$out_dir" . '/utt2spk') 
    || die "Failed opening output file $out_dir/utt2spk";
  open(SPK2GEN, ">$out_dir" . '/spk2gender')
    || die "Failed opening output file $out_dir/spk2gender";
  
  while($line = <DB>) {
    chomp($line);
    @toks = split(" ", $line);
    $seg_id = lc $toks[0];
    $lang = lc $toks[1];
    # $conv_id = $toks[2];
    $channel = $toks[3];
    $duration = $toks[4];
    $gender = lc $toks[6];
    $channel = substr($channel, 1, 1); # they are either A1 or B2: we want the
                                       # numeric channel.
  
    $wav = "$base/data/lid03e1/test/$duration/$seg_id.sph";
    if (! -f $wav) {
      print STDERR "No such file $wav
  ";
      next;
    }
    $uttId = "lre03_${seg_id}";
    
    print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |
  ";
    print UTT2SPK "$uttId $uttId
  ";
    print UTT2LANG "$uttId $lang
  ";
    print SPK2GEN "$uttId $gender
  ";
  }
  
  close(WAV) || die;
  close(UTT2SPK) || die;
  close(UTT2LANG) || die;
  close(SPK2GEN) || die;
  close(DB) || die;
  
  system("utils/fix_data_dir.sh $out_dir");
  (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") == 0) 
    || die "Error validating data dir.";
  
  
  for $set ("lid96d1", "lid96e1") {
    $out_dir = $out_base_dir . "/$set";
    $data_dir = $base . "/data/$set/test/";
    if (system("mkdir -p $out_dir") != 0) {
      die "Error making directory $out_dir"; 
    }
    
    open(WAV, ">$out_dir" . '/wav.scp') 
      || die "Failed opening output file $out_dir/wav.scp";
    open(UTT2LANG, ">$out_dir" . '/utt2lang') 
      || die "Failed opening output file $out_dir/utt2lang";
    open(UTT2SPK, ">$out_dir" . '/utt2spk') 
      || die "Failed opening output file $out_dir/utt2spk";
    for $duration ("10", "3", "30") {
      $key = "$data_dir/$duration/seg_lang.ndx";
      open(KEY, "<$key") 
        || die "Failed opening input file $key";
      while ($line = <KEY>) {
        chomp($line);
        ($seg_id, $lang) = split(" ", $line);
  
        $wav = "$data_dir/$duration/$seg_id.sph";
        $uttId = "${set}_${seg_id}";
        print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav |
  ";
        print UTT2SPK "$uttId $uttId
  ";
        print UTT2LANG "$uttId $lang
  ";
        # Gender information is absent here, not outputting spk2gender file.
      }
      close(KEY) || die;
    }
    close(WAV) || die;
    close(UTT2SPK) || die;
    close(UTT2LANG) || die;
    system("utils/fix_data_dir.sh $out_dir");
    (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") == 0) 
      || die "Error validating data dir.";
  }