Blame view
egs/lre/v1/lid/balance_priors_to_test.pl
1.39 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#!/usr/bin/env perl use warnings; #sed replacement for -w perl parameter my ($train_file, $test_file, $lang_file, $priors_file) = @ARGV; open(UTT2LANG_TRAIN, "<$train_file") or die "no utt2lang training file"; %train_count = (); $train_tot = 0; while(<UTT2LANG_TRAIN>) { $line = $_; chomp($line); @words = split(" ", $line); $lang = $words[1]; if (not exists($train_count{$lang})) { $train_count{$lang} = 1; } else { $train_count{$lang} += 1; } $train_tot += 1; } open(UTT2LANG_TEST, "<$test_file"); %test_count = (); $test_tot = 0; while(<UTT2LANG_TEST>) { $line = $_; chomp($line); @words = split(" ", $line); $lang = $words[1]; if (not exists($test_count{$lang})) { $test_count{$lang} = 1; } else { $test_count{$lang} += 1; } $test_tot += 1; } foreach my $key (keys %train_count) { if (not exists($test_count{$key})) { $test_count{$key} = 0; } } # load languages file open(LANGUAGES, "<$lang_file"); @idx_to_lang = (); $largest_idx = 0; while(<LANGUAGES>) { $line = $_; chomp($line); @words = split(" ", $line); $lang = $words[0]; $idx = $words[1]; $idx_to_lang[$idx + 0] = $lang; if ($idx > $largest_idx) { $largest_idx = $idx; } } $priors = " [ "; foreach $lang (@idx_to_lang) { $ratio = (1.0*$test_count{$lang}) / $train_count{$lang}; $priors .= "$ratio "; } $priors .= " ]"; open(PRIORS, ">$priors_file"); print PRIORS $priors; |