extract_ivectors_online.sh
5.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/bin/bash
# Copyright 2013 Daniel Povey
# Apache 2.0.
set -o pipefail
# This script extracts iVectors for a set of utterances, given
# features and a trained iVector extractor.
# The script is based on ^/egs/sre08/v1/sid/extract_ivectors.sh. Instead of
# extracting a single iVector per utterance, it extracts one every few frames
# (controlled by the --ivector-period option, e.g. 10, which is to save compute).
# This is used in training (and not-really-online testing) of neural networks
# for online decoding.
# Rather than treating each utterance separately, it carries forward
# information from one utterance to the next, within the speaker.
# Begin configuration section.
nj=30
cmd="run.pl"
stage=0
num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
ivector_period=10
posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
# inter-frame correlations. Making this small during iVector
# extraction is equivalent to scaling up the prior, and will
# will tend to produce smaller iVectors where data-counts are
# small. It's not so important that this match the value
# used when training the iVector extractor, but more important
# that this match the value used when you do real online decoding
# with the neural nets trained with these iVectors.
compress=true # If true, compress the iVectors stored on disk (it's lossy
# compression, as used for feature matrices).
max_count=0 # The use of this option (e.g. --max-count 100) can make
# iVectors more consistent for different lengths of
# utterance, by scaling up the prior term when the
# data-count exceeds this value. The data-count is after
# posterior-scaling, so assuming the posterior-scale is 0.1,
# --max-count 100 starts having effect after 1000 frames, or
# 10 seconds of data.
use_vad=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <data> <extractor-dir> <ivector-dir>"
echo " e.g.: $0 data/train exp/nnet2_online/extractor exp/nnet2_online/ivectors_train"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --nj <n|10> # Number of jobs"
echo " --stage <stage|0> # To control partial reruns"
echo " --num-gselect <n|5> # Number of Gaussians to select using"
echo " # diagonal model."
echo " --min-post <float;default=0.025> # Pruning threshold for posteriors"
echo " --ivector-period <int;default=10> # How often to extract an iVector (frames)"
exit 1;
fi
data=$1
srcdir=$2
dir=$3
extra_files=
if $use_vad; then
extra_files=$data/vad.scp
fi
for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
$srcdir/online_cmvn.conf $srcdir/final.mat $extra_files; do
[ ! -f $f ] && echo "$0: No such file $f" && exit 1;
done
# Set various variables.
mkdir -p $dir/log $dir/conf
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
#utils/split_data.sh $data $nj || exit 1;
echo $ivector_period > $dir/ivector_period || exit 1;
splice_opts=$(cat $srcdir/splice_opts)
# the program ivector-extract-online2 does a bunch of stuff in memory and is
# config-driven... this was easier in this case because the same code is
# involved in online decoding. We need to create a config file for iVector
# extraction.
ieconf=$dir/conf/ivector_extractor.conf
echo -n >$ieconf
cp $srcdir/online_cmvn.conf $dir/conf/ || exit 1;
echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$ieconf
for x in $(echo $splice_opts); do echo "$x"; done > $dir/conf/splice.conf
echo "--ivector-period=$ivector_period" >>$ieconf
echo "--splice-config=$dir/conf/splice.conf" >>$ieconf
echo "--lda-matrix=$srcdir/final.mat" >>$ieconf
echo "--global-cmvn-stats=$srcdir/global_cmvn.stats" >>$ieconf
echo "--diag-ubm=$srcdir/final.dubm" >>$ieconf
echo "--ivector-extractor=$srcdir/final.ie" >>$ieconf
echo "--num-gselect=$num_gselect" >>$ieconf
echo "--min-post=$min_post" >>$ieconf
echo "--posterior-scale=$posterior_scale" >>$ieconf
echo "--max-remembered-frames=1000" >>$ieconf # the default
echo "--max-count=$max_count" >>$ieconf
absdir=$(utils/make_absolute.sh $dir)
for n in $(seq $nj); do
# This will do nothing unless the directory $dir/storage exists;
# it can be used to distribute the data among multiple machines.
utils/create_data_link.pl $dir/ivector_online.$n.ark
done
if [ $stage -le 0 ]; then
echo "$0: extracting iVectors"
extra_opts=
if $use_vad; then
extra_opts="--frame-weights-rspecifier=scp:$data/vad.scp"
fi
$cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
ivector-extract-online2 --config=$ieconf $extra_opts \
ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \
copy-feats --compress=$compress ark:- \
ark,scp:$absdir/ivector_online.JOB.ark,$absdir/ivector_online.JOB.scp || exit 1;
fi
if [ $stage -le 1 ]; then
echo "$0: combining iVectors across jobs"
for j in $(seq $nj); do cat $dir/ivector_online.$j.scp; done >$dir/ivector_online.scp || exit 1;
fi
steps/nnet2/get_ivector_id.sh $srcdir > $dir/final.ie.id || exit 1
echo "$0: done extracting (online) iVectors to $dir using the extractor in $srcdir."