Blame view

egs/lre07/v2/local/lre07_eval/lre07_targets.pl 3.34 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  #!/usr/bin/env perl
  #
  # Copyright 2014  David Snyder
  # Apache 2.0.
  #
  # Creates the target and nontarget files used by score_lre07.v01d.pl for
  # NIST LRE 2007 General Language Recognition closed-set evaluation.
  # See http://www.itl.nist.gov/iad/mig//tests/lre/2007/LRE07EvalPlan-v8b.pdf
  # for more details on the evaluation. 
  
  if (@ARGV != 5) {
    print STDERR "Usage: $0 <path-to-posteriors> <path-to-utt2lang> \
      <path-to-languages.txt> <path-to-targets-output> \
      <path-to-nontargets-output>
  ";
    exit(1);
  }
  
  ($posts, $utt2lang, $languages, $targets, $nontargets) = @ARGV;
  %lang_to_idx = ();
  %idx_to_lang = ();
  %utt_to_lang = ();
  $oos_lang = "zzz";
  open(LANG2IDX, "<", $languages) || die "Cannot open $languages file";
  while (<LANG2IDX>) {
    chomp;
    @toks = split(" ", $_);
    $lang = $toks[0];
    $idx = $toks[1];
    $lang_to_idx{$lang} = $idx;
    $idx_to_lang{$idx} = $lang;
  }
  close(LANG2IDX) || die;
  
  open(UTT2LANG, "<", $utt2lang) || die "Cannot open $utt2lang file";
  while (<UTT2LANG>) {
    chomp;
    @toks = split(" ", $_);
    $utt = $toks[0];
    $lang = $toks[1];
    $utt_to_lang{$utt} = $lang;
  }
  close(UTT2LANG) || die;
  
  open(POSTS, "<", $posts) || die "Cannot open $posts file";
  open(TARGETS, ">", $targets) || die "Cannot open $targets file";
  open(NONTARGETS, ">", $nontargets) || die "Cannot open $nontargets file";
  while($line = <POSTS>) {
    chomp($line);
    $line =~ s/[\[\]]//g;
    @toks = split(" ", $line);
    $utt = $toks[0];
    $actual_lang = $utt_to_lang{$utt};
    $size = $#toks + 1;
    $max_lang = "zzz";
    $max_log_prob = -9**9**9; #-inf
    $target_prob = 0;
    # Handle target
    for ($i = 1; $i < $size; $i++) {
      if ($max_log_prob < $toks[$i]) {
        $max_log_prob = $toks[$i];
        $max_lang = $idx_to_lang{$i-1};
      }
      if ($actual_lang eq $idx_to_lang{$i-1}) {
        print "$actual_lang $idx_to_lang{$i-1}
  ";
      }
      if (index($actual_lang, $idx_to_lang{$i-1}) != -1 
        || $actual_lang eq $idx_to_lang{$i-1}) {
        $target_prob = exp($toks[$i]); 
      }
    }
  
    if (index($actual_lang, ".") != -1) {
      @lang_parts = split("[.]", $actual_lang);
      $lang = $lang_parts[0];
    } else {
      $lang = $actual_lang;
    }
    if ($lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) {
      if (index($actual_lang, $max_lang) != -1 || $actual_lang eq $max_lang) {
        print TARGETS "general_lr $lang closed_set $utt t $target_prob "
              ."$actual_lang
  ";
      } else {
        print TARGETS "general_lr $lang closed_set $utt f $target_prob "
              ."$actual_lang
  ";
      }
    }
    # Handle nontarget
    for ($i = 1; $i < $size; $i++) {
      $nontarget_lang = $idx_to_lang{$i-1};
      next if (index($actual_lang, $nontarget_lang) != -1 
        || $actual_lang eq $nontarget_lang);
  
      # if the nontarget lang is most probable
      if ($nontarget_lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) {
        $prob = exp($toks[$i]);
        if (index($max_lang, $nontarget_lang) != -1 
          || $max_lang eq $nontarget_lang) {
          print NONTARGETS "general_lr $nontarget_lang closed_set $utt t "
                ."$prob $actual_lang
  ";
        } else {
          print NONTARGETS "general_lr $nontarget_lang closed_set $utt f "
                ."$prob $actual_lang
  ";
        }
      }
    }
  }
  close(POSTS) || die;
  close(TARGETS) || die;
  close(NONTARGETS) || die;