Blame view

egs/voxceleb/v1/run.sh 5.52 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  #!/bin/bash
  # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
  #             2017   Johns Hopkins University (Author: Daniel Povey)
  #        2017-2018   David Snyder
  #             2018   Ewald Enzinger
  # Apache 2.0.
  #
  # See ../README.txt for more info on data required.
  # Results (mostly equal error-rates) are inline in comments below.
  
  . ./cmd.sh
  . ./path.sh
  set -e
  mfccdir=`pwd`/mfcc
  vaddir=`pwd`/mfcc
  
  # The trials file is downloaded by local/make_voxceleb1_v2.pl.
  voxceleb1_trials=data/voxceleb1_test/trials
  voxceleb1_root=/export/corpora/VoxCeleb1
  voxceleb2_root=/export/corpora/VoxCeleb2
  
  stage=0
  
  if [ $stage -le 0 ]; then
    local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
    local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
    # This script creates data/voxceleb1_test and data/voxceleb1_train for latest version of VoxCeleb1.
    # Our evaluation set is the test portion of VoxCeleb1.
    local/make_voxceleb1_v2.pl $voxceleb1_root dev data/voxceleb1_train
    local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test
    # if you downloaded the dataset soon after it was released, you will want to use the make_voxceleb1.pl script instead.
    # local/make_voxceleb1.pl $voxceleb1_root data
    # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
    # This should give 7,323 speakers and 1,276,888 utterances.
    utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1_train
  fi
  
  if [ $stage -le 1 ]; then
    # Make MFCCs and compute the energy-based VAD for each dataset
    for name in train voxceleb1_test; do
      steps/make_mfcc.sh --write-utt2num-frames true \
        --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
        data/${name} exp/make_mfcc $mfccdir
      utils/fix_data_dir.sh data/${name}
      sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
        data/${name} exp/make_vad $vaddir
      utils/fix_data_dir.sh data/${name}
    done
  fi
  
  if [ $stage -le 2 ]; then
    # Train the UBM.
    sid/train_diag_ubm.sh --cmd "$train_cmd --mem 4G" \
      --nj 40 --num-threads 8 \
      data/train 2048 \
      exp/diag_ubm
  
    sid/train_full_ubm.sh --cmd "$train_cmd --mem 25G" \
      --nj 40 --remove-low-count-gaussians false \
      data/train \
      exp/diag_ubm exp/full_ubm
  fi
  
  if [ $stage -le 3 ]; then
    # In this stage, we train the i-vector extractor.
    #
    # Note that there are well over 1 million utterances in our training set,
    # and it takes an extremely long time to train the extractor on all of this.
    # Also, most of those utterances are very short.  Short utterances are
    # harmful for training the i-vector extractor.  Therefore, to reduce the
    # training time and improve performance, we will only train on the 100k
    # longest utterances.
    utils/subset_data_dir.sh \
      --utt-list <(sort -n -k 2 data/train/utt2num_frames | tail -n 100000) \
      data/train data/train_100k
    # Train the i-vector extractor.
    sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 16G" \
      --ivector-dim 400 --num-iters 5 \
      exp/full_ubm/final.ubm data/train_100k \
      exp/extractor
  fi
  
  if [ $stage -le 4 ]; then
    sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 80 \
      exp/extractor data/train \
      exp/ivectors_train
  
    sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 40 \
      exp/extractor data/voxceleb1_test \
      exp/ivectors_voxceleb1_test
  fi
  
  if [ $stage -le 5 ]; then
    # Compute the mean vector for centering the evaluation i-vectors.
    $train_cmd exp/ivectors_train/log/compute_mean.log \
      ivector-mean scp:exp/ivectors_train/ivector.scp \
      exp/ivectors_train/mean.vec || exit 1;
  
    # This script uses LDA to decrease the dimensionality prior to PLDA.
    lda_dim=200
    $train_cmd exp/ivectors_train/log/lda.log \
      ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
      "ark:ivector-subtract-global-mean scp:exp/ivectors_train/ivector.scp ark:- |" \
      ark:data/train/utt2spk exp/ivectors_train/transform.mat || exit 1;
  
    # Train the PLDA model.
    $train_cmd exp/ivectors_train/log/plda.log \
      ivector-compute-plda ark:data/train/spk2utt \
      "ark:ivector-subtract-global-mean scp:exp/ivectors_train/ivector.scp ark:- | transform-vec exp/ivectors_train/transform.mat ark:- ark:- | ivector-normalize-length ark:-  ark:- |" \
      exp/ivectors_train/plda || exit 1;
  fi
  
  if [ $stage -le 6 ]; then
    $train_cmd exp/scores/log/voxceleb1_test_scoring.log \
      ivector-plda-scoring --normalize-length=true \
      "ivector-copy-plda --smoothing=0.0 exp/ivectors_train/plda - |" \
      "ark:ivector-subtract-global-mean exp/ivectors_train/mean.vec scp:exp/ivectors_voxceleb1_test/ivector.scp ark:- | transform-vec exp/ivectors_train/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
      "ark:ivector-subtract-global-mean exp/ivectors_train/mean.vec scp:exp/ivectors_voxceleb1_test/ivector.scp ark:- | transform-vec exp/ivectors_train/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
      "cat '$voxceleb1_trials' | cut -d\  --fields=1,2 |" exp/scores_voxceleb1_test || exit 1;
  fi
  
  if [ $stage -le 7 ]; then
    eer=`compute-eer <(local/prepare_for_eer.py $voxceleb1_trials exp/scores_voxceleb1_test) 2> /dev/null`
    mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null`
    mindcf2=`sid/compute_min_dcf.py --p-target 0.001 exp/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null`
    echo "EER: $eer%"
    echo "minDCF(p-target=0.01): $mindcf1"
    echo "minDCF(p-target=0.001): $mindcf2"
    # EER: 5.329%
    # minDCF(p-target=0.01): 0.4933
    # minDCF(p-target=0.001): 0.6168
  fi