Blame view
egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
3.56 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
#!/bin/bash # This script trains a multilingual model using 6 layer TDNN + Xent # with 42 dim bottleneck layer in th fifth layer. # Then it extracts bottleneck features for input language "lang" and # train SAT model using these feautures. # Copyright 2016 Pegah Ghahremani # Apache 2.0 set -e #Exit on non-zero return code from any command set -o pipefail #Exit if any of the commands in the pipeline will #return non-zero return code . conf/common_vars.sh || exit 1; set -u #Fail on an undefined variable bnf_train_stage=-10 # the stage variable used in multilingual bottleneck training. stage=1 speed_perturb=true multilingual_dir=exp/nnet3/multi_bnf global_extractor=exp/multi/nnet3/extractor bnf_dim=42 . ./utils/parse_options.sh lang=$1 langconf=conf/$lang/lang.conf [ ! -f $langconf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1; . $langconf || exit 1; [ ! -f local.conf ] && echo 'the file local.conf does not exist!' && exit 1; . local.conf || exit 1; suffix= if $speed_perturb; then suffix=_sp fi exp_dir=exp/$lang datadir=data/$lang/train${suffix}_hires_mfcc_pitch appended_dir=data/$lang/train${suffix}_hires_mfcc_pitch_bnf data_bnf_dir=data/$lang/train${suffix}_bnf dump_bnf_dir=bnf/$lang ivector_dir=$exp_dir/nnet3/ivectors_train${suffix}_gb ############################################################################### # # Training multilingual model with bottleneck layer # ############################################################################### mkdir -p $multilingual_dir${suffix} if [ ! -f $multilingual_dir${suffix}/.done ]; then echo "$0: Train multilingual DNN using Bottleneck layer with lang list = ${lang_list[@]}" . local/nnet3/run_tdnn_multilingual.sh --dir $multilingual_dir \ --bnf-dim $bnf_dim \ --global-extractor $global_extractor \ --train-stage $bnf_train_stage --stage $stage || exit 1; touch $multilingual_dir${suffix}/.done else echo "$0 Skip multilingual DNN training; you can force to run this step by deleting $multilingual_dir${suffix}/.done" fi [ ! -d $dump_bnf_dir ] && mkdir -p $dump_bnf_dir if [ ! -f $data_bnf_dir/.done ]; then multilingual_dir=$multilingual_dir${suffix} mkdir -p $dump_bnf_dir # put the archives in ${dump_bnf_dir}/. steps/nnet3/make_bottleneck_features.sh --use-gpu true --nj 70 --cmd "$train_cmd" \ --ivector-dir $ivector_dir \ tdnn_bn.renorm $datadir $data_bnf_dir \ $multilingual_dir $dump_bnf_dir $exp_dir/make_train_bnf || exit 1; touch $data_bnf_dir/.done else echo "$0 Skip Bottleneck feature extraction; You can force to run this step deleting $data_bnf_dir/.done." fi if [ ! -d $appended_dir/.done ]; then steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ $data_bnf_dir $datadir $appended_dir \ $exp_dir/append_hires_mfcc_bnf $dump_bnf_dir || exit 1; steps/compute_cmvn_stats.sh $appended_dir \ $exp_dir/make_cmvn_mfcc_bnf $dump_bnf_dir || exit 1; touch $appended_dir/.done fi if [ ! $exp_dir/tri5b/.done -nt $data_bnf_dir/.done ]; then steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \ --dim 60 --boost-silence $boost_sil --cmd "$train_cmd" \ $numLeavesMLLT $numGaussMLLT $appended_dir data/$lang/lang $exp_dir/tri5_ali_sp $exp_dir/tri5b ; touch $exp_dir/tri5b/.done fi if [ ! $exp_dir/tri6/.done -nt $exp_dir/tri5b/.done ]; then steps/train_sat.sh --boost-silence $boost_sil --cmd "$train_cmd" \ $numLeavesSAT $numGaussSAT $appended_dir data/$lang/lang \ $exp_dir/tri5b $exp_dir/tri6 touch $exp_dir/tri6/.done fi exit 0; |