lre07_eval.sh 2.45 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77


#!/bin/bash
# Copyright  2014   David Snyder
# Apache 2.0.
#
# Calculates the 3s, 10s, and 30s error rates and C_avgs on the LRE07 
# General Language Recognition closed-set using the directory containing 
# the language identification posteriors.  Detailed results such as the 
# probability of misses for individual languages are computed in 
# local/lre07_eval/lre07_results.

. ./cmd.sh
. ./path.sh
set -e

posterior_dir=$1
languages_file=$2

mkdir -p local/lre07_eval/lre07_results
lre07_dir=local/lre07_eval/lre07_results

local/lre07_eval/lre07_targets.pl $posterior_dir/posteriors data/lre07/utt2lang \
  $languages_file $lre07_dir/targets \
  $lre07_dir/nontargets>/dev/null

# Create the the score (eg, targets.scr) file.
local/lre07_eval/score_lre07.v01d.pl -t $lre07_dir/targets -n $lre07_dir/nontargets

# Compute the posterior probabilities for each duration, as well as
# the target and nontarget files.
for dur in "3" "10" "30"; do
  utils/filter_scp.pl -f 1 data/lre07/"$dur"sec \
    $posterior_dir/posteriors > \
    $posterior_dir/posteriors_"$dur"sec
  local/lre07_eval/lre07_targets.pl $posterior_dir/posteriors_"$dur"sec \
    <(utils/filter_scp.pl -f 1 data/lre07/"$dur"sec data/lre07/utt2lang) \
    $languages_file \
    "$lre07_dir"/targets_"$dur"sec "$lre07_dir"/nontargets_"$dur"sec>/dev/null
  local/lre07_eval/score_lre07.v01d.pl -t "$lre07_dir"/targets_"$dur"sec -n \
    "$lre07_dir"/nontargets_"$dur"sec>/dev/null
done

printf '% 15s' 'Duration (sec):'
for dur in "avg" "3" "10" "30"; do
  printf '% 7s' $dur;
done
echo

printf '% 15s' 'ER (%):'

# Get the overall classification and then individual duration error rates.
er=$(compute-wer --text ark:<(lid/remove_dialect.pl data/lre07/utt2lang) \
  ark:$posterior_dir/output 2>/dev/null | grep "WER" | awk '{print $2 }')
printf '% 7.2f' $er

for dur in "3" "10" "30"; do
  er=$(compute-wer --text ark:<(utils/filter_scp.pl -f 1 \
    data/lre07/"$dur"sec data/lre07/utt2lang | lid/remove_dialect.pl -) \
    ark:<(utils/filter_scp.pl -f 1 data/lre07/"$dur"sec \
      $posterior_dir/output) \
    2>/dev/null | grep "WER" | awk '{print $2 }')
    printf '% 7.2f' $er
done
echo

printf '% 15s' 'C_avg (%):'

# Get the overall C_avg and then C_avgs for the individual durations.
cavg=$(tail -n 1 $lre07_dir/targets.scr \
     | awk '{print 100*$4 }')
printf '% 7.2f' $cavg

for dur in "3" "10" "30"; do
  cavg=$(tail -n 1 $lre07_dir/targets_${dur}sec.scr \
       | awk '{print 100.0*$4 }')
  printf '% 7.2f' $cavg
done
echo