Blame view

egs/spanish_dimex100/s5/run.sh 2.9 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  #!/bin/bash
  
  . ./path.sh || exit 1
  . ./cmd.sh || exit 1
  
  ########
  # Config
  ########
  
  train_cmd="utils/run.pl"
  decode_cmd="utils/run.pl"
  
  CORPUS_DIR="CorpusDimex100"
  
  N_HMM=2000 # leaves
  N_GAUSSIANS=11000
  
  
  #################
  # Download corpus
  #################
  
  echo
  echo Downloading corpus
  echo
  if [ ! -d "$CORPUS_DIR" ]; then
    wget http://turing.iimas.unam.mx/~luis/DIME/DIMEx100/DVD/DVDCorpusDimex100.zip || exit 1;
    unzip DVDCorpusDimex100.zip || exit 1;
  fi
  
  
  ##################
  # Data preparation
  ##################
  
  echo
  echo Data preparation
  echo
  rm -rf data exp mfcc
  local/data_prep.sh "$CORPUS_DIR"
  utils/fix_data_dir.sh "data/train"
  utils/fix_data_dir.sh "data/test"
  
  
  #####################
  # Features generation
  #####################
  
  echo
  echo Features generation
  echo
  steps/make_mfcc.sh --cmd "$train_cmd" "data/train" "exp/make_mfcc/train" mfcc
  steps/make_mfcc.sh --cmd "$train_cmd" "data/test"  "exp/make_mfcc/test"  mfcc
  
  steps/compute_cmvn_stats.sh "data/train" "exp/make_mfcc/train" mfcc
  steps/compute_cmvn_stats.sh "data/test" "exp/make_mfcc/test" mfcc
  
  utils/validate_data_dir.sh "data/train"
  utils/validate_data_dir.sh "data/test"
  
  
  #######################
  # Lang data preparation
  #######################
  
  echo
  echo Language data preparation
  echo
  rm -rf data/local/dict
  local/lang_prep.sh "$CORPUS_DIR"
  utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
  utils/fix_data_dir.sh "data/train"
  utils/fix_data_dir.sh "data/test"
  
  
  ############################
  # Language model preparation
  ############################
  
  echo
  echo Language model preparation
  echo
  local/lm_prep.sh
  
  
  #######################
  # Training and Decoding
  #######################
  
  echo
  echo Training
  echo
  # utils/subset_data_dir.sh --first data/train 500 data/train_500
  
  # Training and aligning
  steps/train_mono.sh --cmd "$train_cmd" data/train data/lang exp/mono || exit 1
  steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_aligned || exit 1
  steps/train_deltas.sh "$N_HMM" "$N_GAUSSIANS" data/train data/lang exp/mono_aligned exp/tri1 || exit 1
  steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_aligned || exit 1
  
  # train tri2b [LDA+MLLT]
  steps/train_lda_mllt.sh --cmd "$train_cmd" "$N_HMM" "$N_GAUSSIANS" data/train data/lang exp/tri1_aligned exp/tri2b || exit 1;
  utils/mkgraph.sh data/lang exp/tri2b exp/tri2b/graph
  steps/align_si.sh --cmd "$train_cmd" data/train data/lang exp/tri2b exp/tri2b_aligned || exit 1
  
  #  Do MMI on top of LDA+MLLT.
  steps/make_denlats.sh --cmd "$train_cmd" data/train data/lang exp/tri2b exp/tri2b_denlats || exit 1;
  steps/train_mmi.sh --boost 0.05 data/train data/lang exp/tri2b_aligned exp/tri2b_denlats exp/tri2b_mmi_b0.05 || exit 1;
  
  
  
  # Decoding
  echo
  echo Decoding
  echo
  steps/decode.sh --config conf/decode.config --cmd "$decode_cmd" exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_test
  
  for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done