run_sgmm_multiling.sh 5.43 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112


#!/bin/bash

# Multilingual setup for SGMMs.
# Caution: this is just a stub, intended to show some others what to do, it
# is not functional yet.

# We treat the WSJ setup as the "other language"-- in fact it's the same language,
# of course, but we treat the phones there as a distinct set.
# The only important thing is that the WSJ data has the same sample rate as the
# RM data.

# add the prefix to all the words and phones:

mkdir -p data_ml exp_ml # ml stands for "multilingual"

utils/add_lang_prefix.sh data/lang rm: data_ml/lang_rm

utils/add_lang_prefix.sh ../../wsj/s5/data/lang wsj: data_ml/lang_wsj

# add the prefix to all the words, utterance-ids, and speaker-ids.
utils/add_data_prefix.sh data/train rm: data_ml/train_rm

utils/add_data_prefix.sh ../../wsj/s5/data/train_si284 wsj: data_ml/train_si284_wsj


# Merge the "lang" directories.  This will change the phones.txt and words.txt,
# to incorporate all the symbols in the original setups.
utils/merge_lang.sh data_ml/lang_rm data_ml/lang_wsj data_ml/lang_rm_wsj

utils/combine_data.sh data_ml/train_rm data_ml/train_si284_wsj data_ml/train

# the call to utils/convert_models.sh below will
# convert the RM LDA+MLLT system to use the new "lang" directory.
# This script converts the models in the directory to use the new integer values
# for the phones, as in data/lang_rm_wsj.
# Everything else will be copied.  The only thing changed in the models is
# the transition-ids.  We'll need a program call like
#  gmm-convert <phone-map-file> <model-in> <model-out>
# where each line of phone-map-file has two lines, (phone-in phone-out).
# This will just affect the transition model, by mapping all the phone-ids.
# We'll also need a program
#  convert-tree <phone-map-file> <tree-in> <tree-out>

utils/convert_models.sh exp/tri2b data_ml/lang_rm exp_ml/tri2b_rm data_ml/lang_rm_wsj

utils/convert_models.sh ../../wsj/exp/tri4b data_ml/lang_wsj exp_ml/tri4b_wsj data_ml/lang

# Re-do the alignment of the RM tri2b setup with the converted models
# (this avoids the hassle of converting the alignment.)
steps/align_si.sh --nj 8 --cmd "$train_cmd"  data_ml/train_rm data_ml/lang exp_ml/tri2b_rm \
    exp_ml/tri2b_rm_ali || exit 1;

# Now, starting from those alignments train an RM system with the same LDA+MLLT
# matrix as the WSJ system.  The training script takes this from the alignment directory,
# so it's sufficient to put it there:
cp exp_ml/tri4b_wsj/final.mat exp_ml/tri2b_rm_ali/final.mat

steps/train_sat.sh 1800 9000 data/train data/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm_ali || exit 1;

# Train an LDA+MLLT+SAT system for RM that uses the same LDA+MLLT transforms as for WSJ.
steps/train_sat.sh 1800 9000 data_ml/train_rm data_ml/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm || exit 1;

# Now merge the RM and WSJ models.  This will create trees and transition-models
# that handle the two (disjoint) sets of phones that the RM and WSJ models
# contain.  We'll need a program "merge-tree" and a program "gmm-merge".  The
# "merge-tree" program will need, for each tree, a record of which sets of
# phones it was supposed to handle, since this is not recorded in the tree
# itself-- we can get this from the transition models which do record this.
# probably the "merge-tree" program will have usage:
# merge-tree <tree1> <phone-set-1> <tree2> <phone-set-2> ... <tree-out>
# where the phone-set-n's will probably be filenames that contain lists of
# the phones.
# The "gmm-merge" program will have the usage:
# gmm-merge <model1> <model2> ... <model-out>

steps/merge_models.sh data_ml/tri3b_rm data_ml/tri4b_wsj data_ml/tri4b

steps/align_fmllr.sh --nj 32 --cmd "$train_cmd" data_ml/train data_ml/lang exp_ml/tri4b \
  exp_ml/tri4b_ali || exit 1;


steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 600 \
   data_ml/train data_ml/lang exp_ml/tri4b_ali exp_ml/ubm5a || exit 1;

# Use slightly larger SGMM parameters than the WSJ setup.
steps/train_sgmm2.sh --cmd "$train_cmd" \
  15000 30000 data_ml/train data_ml/lang exp_ml/tri4b_ali \
    exp_ml/ubm5a/final.ubm exp_ml/sgmm2_5a || exit 1;


# This convert_models.sh script will also have the effect of subsetting
# the model, because some of the phones are undefined in the destination.
# We should make sure that the programs "gmm-convert" and "convert-tree"
# accept a phone map that does not map all of the phones we have-- it would
# delete those phones.  The --reduce option to the script would be passed
# into those programs, and confirm to them that that's "really" what we want
# to do.
utils/convert_models.sh --reduce true exp_ml/sgmm2_5a data_ml/lang exp/sgmm2_5c_ml data/lang

(
  utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_tgpr
  steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
    exp/sgmm2_5c_ml/graph_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_tgpr_dev93
  steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
    exp/sgmm2_5c_ml/graph_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_tgpr_eval92

  utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_bd_tgpr || exit 1;
  steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
    exp/sgmm2_5c_ml/graph_bd_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_bd_tgpr_dev93
  steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
    exp/sgmm2_5c_ml/graph_bd_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_bd_tgpr_eval92
) &