Blame view

egs/aishell2/s5/local/run_gmm.sh 3.69 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  #!/bin/bash
  # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
  #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
  #           2018 Emotech LTD (Author: Xuechen LIU)
  # Apache 2.0
  
  set -e
  
  # number of jobs
  nj=20
  stage=1
  
  . ./cmd.sh
  [ -f ./path.sh ] && . ./path.sh;
  . ./utils/parse_options.sh
  
  # nj for dev and test
  dev_nj=$(wc -l data/dev/spk2utt | awk '{print $1}' || exit 1;)
  test_nj=$(wc -l data/test/spk2utt | awk '{print $1}' || exit 1;)
  
  # Now make MFCC features.
  if [ $stage -le 1 ]; then
    # mfccdir should be some place with a largish disk where you
    # want to store MFCC features.
    for x in train dev test; do
      steps/make_mfcc_pitch.sh --pitch-config conf/pitch.conf --cmd "$train_cmd" --nj $nj \
        data/$x exp/make_mfcc/$x mfcc || exit 1;
      steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1;
      utils/fix_data_dir.sh data/$x || exit 1;
    done
    
    # subset the training data for fast startup
    for x in 100 300; do
      utils/subset_data_dir.sh data/train ${x}000 data/train_${x}k
    done
  fi
  
  # mono
  if [ $stage -le 2 ]; then
    # training
    steps/train_mono.sh --cmd "$train_cmd" --nj $nj \
      data/train_100k data/lang exp/mono || exit 1;
  
    # decoding
    utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph || exit 1;
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
      exp/mono/graph data/dev exp/mono/decode_dev
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
      exp/mono/graph data/test exp/mono/decode_test
    
    # alignment
    steps/align_si.sh --cmd "$train_cmd" --nj $nj \
      data/train_300k data/lang exp/mono exp/mono_ali || exit 1;
  fi 
  
  # tri1
  if [ $stage -le 3 ]; then
    # training
    steps/train_deltas.sh --cmd "$train_cmd" \
     4000 32000 data/train_300k data/lang exp/mono_ali exp/tri1 || exit 1;
    
    # decoding
    utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1;
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
      exp/tri1/graph data/dev exp/tri1/decode_dev
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
      exp/tri1/graph data/test exp/tri1/decode_test
    
    # alignment
    steps/align_si.sh --cmd "$train_cmd" --nj $nj \
      data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
  fi
  
  # tri2
  if [ $stage -le 4 ]; then
    # training
    steps/train_deltas.sh --cmd "$train_cmd" \
     7000 56000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1;
  
    # decoding
    utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${dev_nj} \
      exp/tri2/graph data/dev exp/tri2/decode_dev
    steps/decode.sh --cmd "$decode_cmd" --config conf/decode.conf --nj ${test_nj} \
      exp/tri2/graph data/test exp/tri2/decode_test
    
    # alignment
    steps/align_si.sh --cmd "$train_cmd" --nj $nj \
      data/train data/lang exp/tri2 exp/tri2_ali || exit 1;
  fi
  
  # tri3
  if [ $stage -le 5 ]; then
    # training [LDA+MLLT]
    steps/train_lda_mllt.sh --cmd "$train_cmd" \
     10000 80000 data/train data/lang exp/tri2_ali exp/tri3 || exit 1;
  
    # decoding
    utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1;
    steps/decode.sh --cmd "$decode_cmd" --nj ${dev_nj} --config conf/decode.conf \
      exp/tri3/graph data/dev exp/tri3/decode_dev
    steps/decode.sh --cmd "$decode_cmd" --nj ${test_nj} --config conf/decode.conf \
      exp/tri3/graph data/test exp/tri3/decode_test
    
    # alignment
    steps/align_si.sh --cmd "$train_cmd" --nj $nj \
      data/train data/lang exp/tri3 exp/tri3_ali || exit 1;
    
    steps/align_si.sh --cmd "$train_cmd" --nj ${nj} \
      data/dev data/lang exp/tri3 exp/tri3_ali_dev || exit 1;
  fi
  
  echo "local/run_gmm.sh succeeded"
  exit 0;