run.sh 4.7 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190


#!/bin/bash 

# Trains on 11 hours of speechfrom CTELL{ONE,TWO,THREE,FOUR,FIVE}
# Uses the QCRI vowelized Arabic lexicon.
# Converts the Buckwalter encoding to utf8.
. ./cmd.sh
. ./path.sh
stage=0

. ./utils/parse_options.sh

set -e
set -o pipefail
set u

# Do not change tmpdir, other scripts under local depend on it
tmpdir=data/local/tmp

# The speech corpus is on openslr.org
speech="http://www.openslr.org/resources/46/Tunisian_MSA.tar.gz"

# We use the QCRI lexicon.
lex="http://alt.qcri.org/resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2"

# We train the lm on subtitles.
subs_src="http://opus.nlpl.eu/download.php?f=OpenSubtitles2018/mono/OpenSubtitles2018.ar.gz"

if [ $stage -le 1 ]; then
  # Downloads archive to this script's directory
  local/tamsa_download.sh $speech

  local/qcri_lexicon_download.sh $lex

  local/subs_download.sh $subs_src
fi

# preparation stages will store files under data/
# Delete the entire data directory when restarting.
if [ $stage -le 2 ]; then
  local/prepare_data.sh
fi

if [ $stage -le 3 ]; then
  mkdir -p $tmpdir/dict
  local/qcri_buckwalter2utf8.sh > $tmpdir/dict/qcri_utf8.txt
fi

if [ $stage -le 4 ]; then
  local/prepare_dict.sh $tmpdir/dict/qcri_utf8.txt
fi

if [ $stage -le 5 ]; then
  # prepare the lang directory
  utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
fi

if [ $stage -le 6 ]; then
  echo "Preparing the subs data for lm training."
  local/subs_prepare_data.pl 
fi

if [ $stage -le 7 ]; then
  echo "lm training."
  local/prepare_lm.sh  $tmpdir/subs/lm/in_vocabulary.txt
fi

if [ $stage -le 8 ]; then
  echo "Making grammar fst."
  utils/format_lm.sh \
    data/lang data/local/lm/trigram.arpa.gz data/local/dict/lexicon.txt \
    data/lang_test
fi

if [ $stage -le 9 ]; then
  # extract acoustic features
  for fld in devtest train test; do
    steps/make_mfcc.sh data/$fld exp/make_mfcc/$fld mfcc
    utils/fix_data_dir.sh data/$fld
    steps/compute_cmvn_stats.sh data/$fld exp/make_mfcc mfcc
    utils/fix_data_dir.sh data/$fld
  done
fi

if [ $stage -le 10 ]; then
  echo "$0: monophone training"
  steps/train_mono.sh  data/train data/lang exp/mono
fi

if [ $stage -le 11 ]; then
  # monophone evaluation
  (
    # make decoding graph for monophones
    utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph

    # test monophones
    for x in devtest test; do
      nspk=$(wc -l < data/$x/spk2utt)
      steps/decode.sh  --nj $nspk exp/mono/graph data/$x exp/mono/decode_${x}
    done
  ) &
fi

if [ $stage -le 12 ]; then
  # align with monophones
  steps/align_si.sh  data/train data/lang exp/mono exp/mono_ali
fi

if [ $stage -le 13 ]; then
  echo "$0: Starting  triphone training in exp/tri1"
  steps/train_deltas.sh \
    --boost-silence 1.25 1000 6000 data/train data/lang exp/mono_ali exp/tri1
fi

wait

if [ $stage -le 14 ]; then
  # test cd gmm hmm models
  # make decoding graphs for tri1
  (
    utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph

    # decode test data with tri1 models
    for x in devtest test; do
      nspk=$(wc -l < data/$x/spk2utt)
      steps/decode.sh --nj $nspk exp/tri1/graph data/$x exp/tri1/decode_${x}
    done
  ) &
fi

if [ $stage -le 15 ]; then
  # align with triphones
  steps/align_si.sh  data/train data/lang exp/tri1 exp/tri1_ali
fi

if [ $stage -le 16 ]; then
  echo "$0: Starting (lda_mllt) triphone training in exp/tri2b"
  steps/train_lda_mllt.sh \
    --splice-opts "--left-context=3 --right-context=3" 500 5000 \
    data/train data/lang exp/tri1_ali exp/tri2b
fi

wait

if [ $stage -le 17 ]; then
  (
    #  make decoding FSTs for tri2b models
    utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph

    # decode  test with tri2b models
    for x in devtest test; do
      nspk=$(wc -l < data/$x/spk2utt)
      steps/decode.sh --nj $nspk exp/tri2b/graph data/$x exp/tri2b/decode_${x}
    done
  ) &
fi

if [ $stage -le 18 ]; then
  # align with lda and mllt adapted triphones
  steps/align_si.sh \
    --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali
fi

if [ $stage -le 19 ]; then
  echo "$0: Starting (SAT) triphone training in exp/tri3b"
  steps/train_sat.sh 800 8000 data/train data/lang exp/tri2b_ali exp/tri3b
fi

if [ $stage -le 20 ]; then
  (
    # make decoding graphs for SAT models
    utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph

    # decode test sets with tri3b models
    for x in devtest test; do
      nspk=$(wc -l < data/$x/spk2utt)
      steps/decode_fmllr.sh --nj $nspk exp/tri3b/graph data/$x exp/tri3b/decode_${x}
    done
  ) &
fi

if [ $stage -le 21 ]; then
  # align with tri3b models
  echo "$0: Starting exp/tri3b_ali"
  steps/align_fmllr.sh data/train data/lang exp/tri3b exp/tri3b_ali
fi

if [ $stage -le 22 ]; then
  # train and test chain models
  local/chain/run_tdnn.sh
fi