run.sh 5.82 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162


#!/bin/bash

. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
           ## This relates to the queue.
. ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a

# This is a shell script, but it's recommended that you run the commands one by
# one by copying and pasting into the shell.


# Download the corpus and prepare parallel lists of sound files and text files
# Divide the corpus into train, dev and test sets
local/sprak_data_prep.sh  || exit 1;
utils/fix_data_dir.sh data/train || exit 1;

# Perform text normalisation, prepare dict folder and LM data transcriptions
local/copy_dict || exit 1;

utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang_tmp data/lang || exit 1;

# Now make MFCC features.
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcctest


# Extract mfccs
# p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some
# wave files are corrupt
# Will return a warning message because of the corrupt audio files, but compute them anyway
# If this step fails and prints a partial diff, rerun from sprak_data_prep.sh

steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/test exp/make_mfcc/test test mfcc || exit 1;
steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/train exp/make_mfcc/train mfcc || exit 1;


# Compute cepstral mean and variance normalisation
steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test mfcc || exit 1;
steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train mfcc || exit 1;


# Repair data set (remove corrupt data points with corrupt audio)

utils/fix_data_dir.sh data/test || exit 1;
utils/fix_data_dir.sh data/train || exit 1;


# Train LM with irstlm
#creates 3g or 4g dictionary and importantly G.fst
#local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "3g" data/lang data/local/train3_lm &> data/local/3g.log &
local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "4g" data/lang data/local/train4_lm &> data/local/4g.log || exit 1;

#speed test only 120 utterances per speaker
utils/subset_data_dir.sh --per-spk data/test 120 data/test120_p_spk || exit 1;


# Train monophone model on short utterances  AFTER THIS ONE CAN SEE THE ALIGNMNT BETWEEN FRAMES AND PHONES USING COMMAND SHOW_ALIGNMENTS
steps/train_mono.sh --nj 10 --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;

# Ensure that LMs are created

utils/mkgraph.sh data/lang_test_4g exp/mono exp/mono/graph_4g || exit 1;

# Ensure that all graphs are constructed


steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \
  exp/mono/graph_4g data/test120_p_spk exp/mono/decode || exit 1;

# Get alignments from monophone system.
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
  data/train data/lang exp/mono exp/mono_ali || exit 1;

# train tri1 [first triphone pass]
# steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
steps/train_deltas.sh --cmd "$train_cmd" \
  5800 96000 data/train data/lang exp/mono_ali exp/tri1|| exit 1;


#make graph
utils/mkgraph.sh data/lang_test_4g exp/tri1 exp/tri1/graph_4g || exit 1;

steps/decode.sh --config conf/decode.config --nj 10 --cmd "$decode_cmd" \
  exp/tri1/graph_4g data/test120_p_spk exp/tri1/decode_test120_p_spk || exit 1;


steps/align_si.sh --nj 10 --cmd "$train_cmd" \
  data/train data/lang exp/tri1 exp/tri1_ali || exit 1;


# Train tri2a, which is deltas + delta-deltas.
steps/train_deltas.sh --cmd "$train_cmd" \
  7500 125000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1;

utils/mkgraph.sh data/lang_test_4g exp/tri2a exp/tri2a/graph_4g || exit 1;

steps/decode.sh --nj 10 --cmd "$decode_cmd" \
  exp/tri2a/graph_4g data/test120_p_spk exp/tri2a/decode_test120_p_spk|| exit 1;


steps/train_lda_mllt.sh --cmd "$train_cmd" \
   --splice-opts "--left-context=5 --right-context=5" \
   7500 125000 data/train data/lang exp/tri1_ali exp/tri2b || exit 1;

utils/mkgraph.sh data/lang_test_4g exp/tri2b exp/tri2b/graph_4g || exit 1;
steps/decode.sh --nj 10 --cmd "$decode_cmd" \
  exp/tri2b/graph_4g data/test120_p_spk exp/tri2b/decode_test120_p_spk || exit 1;


steps/align_si.sh  --nj 10 --cmd "$train_cmd" \
  --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali  || exit 1;


# From 2b system, train 3b which is LDA + MLLT + SAT.
steps/train_sat.sh --cmd "$train_cmd" \
  7500 125000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1;

# Trying 4-gram language model
utils/mkgraph.sh data/lang_test_4g exp/tri3b exp/tri3b/graph_4g || exit 1;

steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
  exp/tri3b/graph_4g data/test120_p_spk exp/tri3b/decode_test120_p_spk || exit 1;

# This is commented out for now as it's not important for the main recipe.
## Train RNN for reranking
#local/sprak_train_rnnlms.sh data/local/dict data/dev/transcripts.uniq data/local/rnnlms/g_c380_d1k_h100_v130k
## Consumes a lot of memory! Do not run in parallel
#local/sprak_run_rnnlms_tri3b.sh data/lang_test_3g data/local/rnnlms/g_c380_d1k_h100_v130k data/test1k exp/tri3b/decode_3g_test1k


# From 3b system
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
  data/train data/lang exp/tri3b exp/tri3b_ali || exit 1;

# From 3b system, train another SAT system (tri4a) with all the si284 data.

steps/train_sat.sh  --cmd "$train_cmd" \
  13000 300000 data/train data/lang exp/tri3b_ali exp/tri4a || exit 1;

utils/mkgraph.sh data/lang_test_4g exp/tri4a exp/tri4a/graph_4g || exit 1;
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
   exp/tri4a/graph_4g data/test120_p_spk exp/tri4a/decode_test120_p_spk || exit 1;


# alignment used to train nnets
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
  data/train data/lang exp/tri4a exp/tri4a_ali || exit 1;

## Works
local/sprak_run_nnet_cpu.sh 4g test120_p_spk || exit 1;


# Getting results [see RESULTS file]
for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done