Blame view
egs/lre07/v1/lid/run_logistic_regression.sh
3.08 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
#!/bin/bash # Copyright 2014 David Snyder, Daniel Povey # Apache 2.0. # # This script trains a logistic regression model on top of # i-Vectors, and evaluates it on the NIST LRE07 closed-set # evaluation. . ./cmd.sh . ./path.sh set -e train_dir=exp/ivectors_train test_dir=exp/ivectors_lre07 model_dir=exp/ivectors_train train_utt2lang=data/train_lr/utt2lang test_utt2lang=data/lre07/utt2lang prior_scale=1.0 apply_log=true # If true, the output of the binary # logistitic-regression-eval are log-posteriors. # Probabilities are the output if this is false. conf=conf/logistic-regression.conf languages=local/general_lr_closed_set_langs.txt if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; mkdir -p $model_dir/log model=$model_dir/logistic_regression model_rebalanced=$model_dir/logistic_regression_rebalanced train_ivectors="ark:ivector-normalize-length \ scp:$train_dir/ivector.scp ark:- |"; test_ivectors="ark:ivector-normalize-length \ scp:$test_dir/ivector.scp ark:- |"; classes="ark:lid/remove_dialect.pl $train_utt2lang \ | utils/sym2int.pl -f 2 $languages - |" # A uniform prior. #utils/sym2int.pl -f 2 $languages \ # <(lid/remove_dialect.pl $train_utt2lang) | \ # awk '{print $2}' | sort -n | uniq -c | \ # awk 'BEGIN{printf(" [ ");} {printf("%s ", 1.0/$1); } END{print(" ]"); }' \ # >$model_dir/inv_priors.vec # Create priors to rebalance the model. The following script rebalances # the languages as ( count(lang_test) / count(lang_train) )^(prior_scale). lid/balance_priors_to_test.pl \ <(lid/remove_dialect.pl <(utils/filter_scp.pl -f 1 \ $train_dir/ivector.scp $train_utt2lang)) \ <(lid/remove_dialect.pl $test_utt2lang) \ $languages \ $prior_scale \ $model_dir/priors.vec logistic-regression-train --config=$conf "$train_ivectors" \ "$classes" $model \ 2>$model_dir/log/logistic_regression.log logistic-regression-copy --scale-priors=$model_dir/priors.vec \ $model $model_rebalanced logistic-regression-eval --apply-log=$apply_log $model \ "$train_ivectors" ark,t:$train_dir/posteriors cat $train_dir/posteriors | \ awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \ utils/int2sym.pl -f 2 $languages \ >$train_dir/output # note: we treat the language as a sentence; it happens that the WER/SER # corresponds to the recognition error rate. compute-wer --mode=present --text ark:<(lid/remove_dialect.pl $train_utt2lang) \ ark:$train_dir/output # Evaluate on test data. Most likely a NIST LRE. logistic-regression-eval --apply-log=$apply_log $model_rebalanced \ "$test_ivectors" ark,t:$test_dir/posteriors cat $test_dir/posteriors | \ awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \ utils/int2sym.pl -f 2 $languages \ >$test_dir/output compute-wer --text ark:<(lid/remove_dialect.pl $test_utt2lang) \ ark:$test_dir/output |