Blame view

egs/lre07/v1/lid/remove_dialect.pl 579 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  #!/usr/bin/env perl
  # Removes the dialect parts on an utt2lang file.
  # For example <utt> chinese.wu is converted to <utt> chinese.
  
  my ($utt2lang_file) = @ARGV;
  open(UTT2LANG, "<$utt2lang_file") or die "no utt2lang file";
  $utt2lang_short = "";
  while(<UTT2LANG>) {
    $line = $_;
    chomp($line);
    @words = split(" ", $line);
    $utt = $words[0];
    $lang_long = $words[1];
    @lang_parts = split('[.]', $lang_long);
    # The actual language. Other parts are dialects or subcategories.
    $lang = $lang_parts[0];
    $utt2lang_short .= $utt . " " . $lang . "
  ";
  }
  print $utt2lang_short;