extract_ivector_lang.sh 1.22 KB
#!/bin/bash

# Copyright 2016 Pegah Ghahremani

# This scripts extracts iVector using global iVector extractor
# trained on all languages in multilingual setup.

. ./cmd.sh
set -e
stage=1
train_set=train_sp_hires # train_set used to extract ivector using shared ivector
                         # extractor.
ivector_suffix=_gb
nnet3_affix=

[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1

. conf/common_vars.sh || exit 1;

. ./utils/parse_options.sh

lang=$1
global_extractor=$2

if [ $stage -le 7 ]; then
  # We extract iVectors on all the train_nodup data, which will be what we
  # train the system on.
  # having a larger number of speakers is helpful for generalization, and to
  # handle per-utterance decoding well (iVector starts at zero).
  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/$lang/${train_set} data/$lang/${train_set}_max2
  if [ ! -f exp/$lang/nnet3${nnet3_affix}/ivectors_${train_set}${ivector_suffix}/ivector_online.scp ]; then
    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 200 \
      data/$lang/${train_set}_max2 $global_extractor exp/$lang/nnet3${nnet3_affix}/ivectors_${train_set}${ivector_suffix} || exit 1;
  fi
fi
exit 0;