Blame view
egs/lre07/v1/local/lre07_eval/lre07_targets.pl
3.34 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
#!/usr/bin/env perl # # Copyright 2014 David Snyder # Apache 2.0. # # Creates the target and nontarget files used by score_lre07.v01d.pl for # NIST LRE 2007 General Language Recognition closed-set evaluation. # See http://www.itl.nist.gov/iad/mig//tests/lre/2007/LRE07EvalPlan-v8b.pdf # for more details on the evaluation. if (@ARGV != 5) { print STDERR "Usage: $0 <path-to-posteriors> <path-to-utt2lang> \ <path-to-languages.txt> <path-to-targets-output> \ <path-to-nontargets-output> "; exit(1); } ($posts, $utt2lang, $languages, $targets, $nontargets) = @ARGV; %lang_to_idx = (); %idx_to_lang = (); %utt_to_lang = (); $oos_lang = "zzz"; open(LANG2IDX, "<", $languages) || die "Cannot open $languages file"; while (<LANG2IDX>) { chomp; @toks = split(" ", $_); $lang = $toks[0]; $idx = $toks[1]; $lang_to_idx{$lang} = $idx; $idx_to_lang{$idx} = $lang; } close(LANG2IDX) || die; open(UTT2LANG, "<", $utt2lang) || die "Cannot open $utt2lang file"; while (<UTT2LANG>) { chomp; @toks = split(" ", $_); $utt = $toks[0]; $lang = $toks[1]; $utt_to_lang{$utt} = $lang; } close(UTT2LANG) || die; open(POSTS, "<", $posts) || die "Cannot open $posts file"; open(TARGETS, ">", $targets) || die "Cannot open $targets file"; open(NONTARGETS, ">", $nontargets) || die "Cannot open $nontargets file"; while($line = <POSTS>) { chomp($line); $line =~ s/[\[\]]//g; @toks = split(" ", $line); $utt = $toks[0]; $actual_lang = $utt_to_lang{$utt}; $size = $#toks + 1; $max_lang = "zzz"; $max_log_prob = -9**9**9; #-inf $target_prob = 0; # Handle target for ($i = 1; $i < $size; $i++) { if ($max_log_prob < $toks[$i]) { $max_log_prob = $toks[$i]; $max_lang = $idx_to_lang{$i-1}; } if ($actual_lang eq $idx_to_lang{$i-1}) { print "$actual_lang $idx_to_lang{$i-1} "; } if (index($actual_lang, $idx_to_lang{$i-1}) != -1 || $actual_lang eq $idx_to_lang{$i-1}) { $target_prob = exp($toks[$i]); } } if (index($actual_lang, ".") != -1) { @lang_parts = split("[.]", $actual_lang); $lang = $lang_parts[0]; } else { $lang = $actual_lang; } if ($lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) { if (index($actual_lang, $max_lang) != -1 || $actual_lang eq $max_lang) { print TARGETS "general_lr $lang closed_set $utt t $target_prob " ."$actual_lang "; } else { print TARGETS "general_lr $lang closed_set $utt f $target_prob " ."$actual_lang "; } } # Handle nontarget for ($i = 1; $i < $size; $i++) { $nontarget_lang = $idx_to_lang{$i-1}; next if (index($actual_lang, $nontarget_lang) != -1 || $actual_lang eq $nontarget_lang); # if the nontarget lang is most probable if ($nontarget_lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) { $prob = exp($toks[$i]); if (index($max_lang, $nontarget_lang) != -1 || $max_lang eq $nontarget_lang) { print NONTARGETS "general_lr $nontarget_lang closed_set $utt t " ."$prob $actual_lang "; } else { print NONTARGETS "general_lr $nontarget_lang closed_set $utt f " ."$prob $actual_lang "; } } } } close(POSTS) || die; close(TARGETS) || die; close(NONTARGETS) || die; |