Blame view

egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh 3.56 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  #!/bin/bash
  
  # This script trains a multilingual model using 6 layer TDNN + Xent
  # with 42 dim bottleneck layer in th fifth layer.
  # Then it extracts bottleneck features for input language "lang" and
  # train SAT model using these feautures.
  
  # Copyright 2016  Pegah Ghahremani
  # Apache 2.0
  
  set -e           #Exit on non-zero return code from any command
  set -o pipefail  #Exit if any of the commands in the pipeline will
                   #return non-zero return code
  . conf/common_vars.sh || exit 1;
  
  set -u           #Fail on an undefined variable
  bnf_train_stage=-10 # the stage variable used in multilingual bottleneck training.
  stage=1
  speed_perturb=true
  multilingual_dir=exp/nnet3/multi_bnf
  global_extractor=exp/multi/nnet3/extractor
  bnf_dim=42
  . ./utils/parse_options.sh
  
  
  lang=$1
  
  langconf=conf/$lang/lang.conf
  
  [ ! -f $langconf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1;
  . $langconf || exit 1;
  
  [ ! -f local.conf ] && echo 'the file local.conf does not exist!' && exit 1;
  . local.conf || exit 1;
  
  suffix=
  if $speed_perturb; then
    suffix=_sp
  fi
  
  exp_dir=exp/$lang
  datadir=data/$lang/train${suffix}_hires_mfcc_pitch
  appended_dir=data/$lang/train${suffix}_hires_mfcc_pitch_bnf
  data_bnf_dir=data/$lang/train${suffix}_bnf
  dump_bnf_dir=bnf/$lang
  ivector_dir=$exp_dir/nnet3/ivectors_train${suffix}_gb
  ###############################################################################
  #
  # Training multilingual model with bottleneck layer
  #
  ###############################################################################
  mkdir -p $multilingual_dir${suffix}
  
  if [ ! -f $multilingual_dir${suffix}/.done ]; then
    echo "$0: Train multilingual DNN using Bottleneck layer with lang list = ${lang_list[@]}"
    . local/nnet3/run_tdnn_multilingual.sh --dir $multilingual_dir \
       --bnf-dim $bnf_dim \
       --global-extractor $global_extractor \
       --train-stage $bnf_train_stage --stage $stage  || exit 1;
  
    touch $multilingual_dir${suffix}/.done
  else
    echo "$0 Skip multilingual DNN training; you can force to run this step by deleting $multilingual_dir${suffix}/.done"
  fi
  
  [ ! -d $dump_bnf_dir ] && mkdir -p $dump_bnf_dir
  if [ ! -f $data_bnf_dir/.done ]; then
    multilingual_dir=$multilingual_dir${suffix}
    mkdir -p $dump_bnf_dir
    # put the archives in ${dump_bnf_dir}/.
    steps/nnet3/make_bottleneck_features.sh --use-gpu true --nj 70 --cmd "$train_cmd" \
      --ivector-dir $ivector_dir \
      tdnn_bn.renorm $datadir $data_bnf_dir \
      $multilingual_dir $dump_bnf_dir $exp_dir/make_train_bnf || exit 1;
    touch $data_bnf_dir/.done
  else
    echo "$0 Skip Bottleneck feature extraction; You can force to run this step deleting $data_bnf_dir/.done."
  fi
  
  if [ ! -d $appended_dir/.done ]; then
    steps/append_feats.sh --cmd "$train_cmd" --nj 4 \
      $data_bnf_dir $datadir $appended_dir \
      $exp_dir/append_hires_mfcc_bnf $dump_bnf_dir || exit 1;
    steps/compute_cmvn_stats.sh $appended_dir \
      $exp_dir/make_cmvn_mfcc_bnf $dump_bnf_dir || exit 1;
    touch $appended_dir/.done
  fi
  
  if [ ! $exp_dir/tri5b/.done -nt $data_bnf_dir/.done ]; then
    steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \
      --dim 60 --boost-silence $boost_sil --cmd "$train_cmd" \
      $numLeavesMLLT $numGaussMLLT $appended_dir data/$lang/lang $exp_dir/tri5_ali_sp $exp_dir/tri5b ;
    touch $exp_dir/tri5b/.done
  fi
  
  if [ ! $exp_dir/tri6/.done -nt $exp_dir/tri5b/.done ]; then
    steps/train_sat.sh --boost-silence $boost_sil --cmd "$train_cmd" \
      $numLeavesSAT $numGaussSAT $appended_dir data/$lang/lang \
      $exp_dir/tri5b $exp_dir/tri6
    touch $exp_dir/tri6/.done
  fi
  
  exit 0;