Blame view

egs/wsj/s5/steps/nnet3/make_tdnn_configs.py 12.4 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  #!/usr/bin/env python
  
  # This script is deprecated, please use ../xconfig_to_configs.py
  
  # we're using python 3.x style print but want it to work in python 2.x,
  from __future__ import print_function
  import re, os, argparse, sys, math, warnings
  
  
  parser = argparse.ArgumentParser(description="Writes config files and variables "
                                   "for TDNNs creation and training",
                                   epilog="See steps/nnet3/train_tdnn.sh for example.");
  parser.add_argument("--splice-indexes", type=str,
                      help="Splice indexes at each hidden layer, e.g. '-3,-2,-1,0,1,2,3 0 -2,2 0 -4,4 0 -8,8'")
  parser.add_argument("--feat-dim", type=int,
                      help="Raw feature dimension, e.g. 13")
  parser.add_argument("--ivector-dim", type=int,
                      help="iVector dimension, e.g. 100", default=0)
  parser.add_argument("--include-log-softmax", type=str,
                      help="add the final softmax layer ", default="true", choices = ["false", "true"])
  parser.add_argument("--final-layer-normalize-target", type=float,
                      help="RMS target for final layer (set to <1 if final layer learns too fast",
                      default=1.0)
  parser.add_argument("--pnorm-input-dim", type=int,
                      help="input dimension to p-norm nonlinearities")
  parser.add_argument("--pnorm-output-dim", type=int,
                      help="output dimension of p-norm nonlinearities")
  parser.add_argument("--relu-dim", type=int,
                      help="dimension of ReLU nonlinearities")
  parser.add_argument("--use-presoftmax-prior-scale", type=str,
                      help="if true, a presoftmax-prior-scale is added",
                      choices=['true', 'false'], default = "true")
  parser.add_argument("--num-targets", type=int,
                      help="number of network targets (e.g. num-pdf-ids/num-leaves)")
  parser.add_argument("config_dir",
                      help="Directory to write config files and variables");
  
  print(' '.join(sys.argv))
  
  args = parser.parse_args()
  
  if not os.path.exists(args.config_dir):
      os.makedirs(args.config_dir)
  
  ## Check arguments.
  if args.splice_indexes is None:
      sys.exit("--splice-indexes argument is required");
  if args.feat_dim is None or not (args.feat_dim > 0):
      sys.exit("--feat-dim argument is required");
  if args.num_targets is None or not (args.num_targets > 0):
      sys.exit("--num-targets argument is required");
  if not args.relu_dim is None:
      if not args.pnorm_input_dim is None or not args.pnorm_output_dim is None:
          sys.exit("--relu-dim argument not compatible with "
                   "--pnorm-input-dim or --pnorm-output-dim options");
      nonlin_input_dim = args.relu_dim
      nonlin_output_dim = args.relu_dim
  else:
      if not args.pnorm_input_dim > 0 or not args.pnorm_output_dim > 0:
          sys.exit("--relu-dim not set, so expected --pnorm-input-dim and "
                   "--pnorm-output-dim to be provided.");
      nonlin_input_dim = args.pnorm_input_dim
      nonlin_output_dim = args.pnorm_output_dim
  
  if args.use_presoftmax_prior_scale == "true":
      use_presoftmax_prior_scale = True
  else:
      use_presoftmax_prior_scale = False
  
  ## Work out splice_array e.g. splice_array = [ [ -3,-2,...3 ], [0], [-2,2], .. [ -8,8 ] ]
  splice_array = []
  left_context = 0
  right_context = 0
  split1 = args.splice_indexes.split();  # we already checked the string is nonempty.
  if len(split1) < 1:
      sys.exit("invalid --splice-indexes argument, too short: "
               + args.splice_indexes)
  try:
      for string in split1:
          split2 = string.split(",")
          if len(split2) < 1:
              sys.exit("invalid --splice-indexes argument, too-short element: "
                       + args.splice_indexes)
          int_list = []
          for int_str in split2:
              int_list.append(int(int_str))
          if not int_list == sorted(int_list):
              sys.exit("elements of --splice-indexes must be sorted: "
                       + args.splice_indexes)
          left_context += -int_list[0]
          right_context += int_list[-1]
          splice_array.append(int_list)
  except ValueError as e:
      sys.exit("invalid --splice-indexes argument " + args.splice_indexes + str(e))
  left_context = max(0, left_context)
  right_context = max(0, right_context)
  num_hidden_layers = len(splice_array)
  input_dim = len(splice_array[0]) * args.feat_dim  +  args.ivector_dim
  
  f = open(args.config_dir + "/vars", "w")
  print('left_context={}'.format(left_context), file=f)
  print('right_context={}'.format(right_context), file=f)
  # the initial l/r contexts are actually not needed.
  # print('initial_left_context=' + str(splice_array[0][0]), file=f)
  # print('initial_right_context=' + str(splice_array[0][-1]), file=f)
  print('num_hidden_layers={}'.format(num_hidden_layers), file=f)
  f.close()
  
  f = open(args.config_dir + "/init.config", "w")
  print('# Config file for initializing neural network prior to', file=f)
  print('# preconditioning matrix computation', file=f)
  print('input-node name=input dim={}'.format(args.feat_dim), file=f)
  list=[ ('Offset(input, {0})'.format(n) if n != 0 else 'input' ) for n in splice_array[0] ]
  if args.ivector_dim > 0:
      print('input-node name=ivector dim={}'.format(args.ivector_dim), file=f)
      list.append('ReplaceIndex(ivector, t, 0)')
  # example of next line:
  # output-node name=output input="Append(Offset(input, -3), Offset(input, -2), Offset(input, -1), ... , Offset(input, 3), ReplaceIndex(ivector, t, 0))"
  print('output-node name=output input=Append({0})'.format(", ".join(list)), file=f)
  f.close()
  
  for l in range(1, num_hidden_layers + 1):
      f = open(args.config_dir + "/layer{0}.config".format(l), "w")
      print('# Config file for layer {0} of the network'.format(l), file=f)
      if l == 1:
          print('component name=lda type=FixedAffineComponent matrix={0}/lda.mat'.
                format(args.config_dir), file=f)
      cur_dim = (nonlin_output_dim * len(splice_array[l-1]) if l > 1 else input_dim)
  
      print('# Note: param-stddev in next component defaults to 1/sqrt(input-dim).', file=f)
      print('component name=affine{0} type=NaturalGradientAffineComponent '
            'input-dim={1} output-dim={2} bias-stddev=0'.
          format(l, cur_dim, nonlin_input_dim), file=f)
      if args.relu_dim is not None:
          print('component name=nonlin{0} type=RectifiedLinearComponent dim={1}'.
                format(l, args.relu_dim), file=f)
      else:
          print('# In nnet3 framework, p in P-norm is always 2.', file=f)
          print('component name=nonlin{0} type=PnormComponent input-dim={1} output-dim={2}'.
                format(l, args.pnorm_input_dim, args.pnorm_output_dim), file=f)
      print('component name=renorm{0} type=NormalizeComponent dim={1} target-rms={2}'.format(
          l, nonlin_output_dim,
          (1.0 if l < num_hidden_layers else args.final_layer_normalize_target)), file=f)
      print('component name=final-affine type=NaturalGradientAffineComponent '
            'input-dim={0} output-dim={1} param-stddev=0 bias-stddev=0'.format(
            nonlin_output_dim, args.num_targets), file=f)
      # printing out the next two, and their component-nodes, for l > 1 is not
      # really necessary as they will already exist, but it doesn't hurt and makes
      # the structure clearer.
      if args.include_log_softmax == "true":
          if use_presoftmax_prior_scale :
              print('component name=final-fixed-scale type=FixedScaleComponent '
                    'scales={0}/presoftmax_prior_scale.vec'.format(
                      args.config_dir), file=f)
          print('component name=final-log-softmax type=LogSoftmaxComponent dim={0}'.format(
                  args.num_targets), file=f)
      print('# Now for the network structure', file=f)
      if l == 1:
          splices = [ ('Offset(input, {0})'.format(n) if n != 0 else 'input') for n in splice_array[l-1] ]
          if args.ivector_dim > 0: splices.append('ReplaceIndex(ivector, t, 0)')
          orig_input='Append({0})'.format(', '.join(splices))
          # e.g. orig_input = 'Append(Offset(input, -2), ... Offset(input, 2), ivector)'
          print('component-node name=lda component=lda input={0}'.format(orig_input),
                file=f)
          cur_input='lda'
      else:
          # e.g. cur_input = 'Append(Offset(renorm1, -2), renorm1, Offset(renorm1, 2))'
          splices = [ ('Offset(renorm{0}, {1})'.format(l-1, n) if n !=0 else 'renorm{0}'.format(l-1))
                      for n in splice_array[l-1] ]
          cur_input='Append({0})'.format(', '.join(splices))
      print('component-node name=affine{0} component=affine{0} input={1} '.
            format(l, cur_input), file=f)
      print('component-node name=nonlin{0} component=nonlin{0} input=affine{0}'.
            format(l), file=f)
      print('component-node name=renorm{0} component=renorm{0} input=nonlin{0}'.
            format(l), file=f)
  
      print('component-node name=final-affine component=final-affine input=renorm{0}'.
            format(l), file=f)
  
      if args.include_log_softmax == "true":
          if use_presoftmax_prior_scale:
              print('component-node name=final-fixed-scale component=final-fixed-scale input=final-affine',
                    file=f)
              print('component-node name=final-log-softmax component=final-log-softmax '
                    'input=final-fixed-scale', file=f)
          else:
              print('component-node name=final-log-softmax component=final-log-softmax '
                    'input=final-affine', file=f)
          print('output-node name=output input=final-log-softmax', file=f)
      else:
          print('output-node name=output input=final-affine', file=f)
      f.close()
  
  # component name=nonlin1 type=PnormComponent input-dim=$pnorm_input_dim output-dim=$pnorm_output_dim
  # component name=renorm1 type=NormalizeComponent dim=$pnorm_output_dim
  # component name=final-affine type=NaturalGradientAffineComponent input-dim=$pnorm_output_dim output-dim=$num_leaves param-stddev=0 bias-stddev=0
  # component name=final-log-softmax type=LogSoftmaxComponent dim=$num_leaves
  
  
  # ## Write file $config_dir/init.config to initialize the network, prior to computing the LDA matrix.
  # ##will look like this, if we have iVectors:
  # input-node name=input dim=13
  # input-node name=ivector dim=100
  # output-node name=output input="Append(Offset(input, -3), Offset(input, -2), Offset(input, -1), ... , Offset(input, 3), ReplaceIndex(ivector, t, 0))"
  
  # ## Write file $config_dir/layer1.config that adds the LDA matrix, assumed to be in the config directory as
  # ## lda.mat, the first hidden layer, and the output layer.
  # component name=lda type=FixedAffineComponent matrix=$config_dir/lda.mat
  # component name=affine1 type=NaturalGradientAffineComponent input-dim=$lda_input_dim output-dim=$pnorm_input_dim bias-stddev=0
  # component name=nonlin1 type=PnormComponent input-dim=$pnorm_input_dim output-dim=$pnorm_output_dim
  # component name=renorm1 type=NormalizeComponent dim=$pnorm_output_dim
  # component name=final-affine type=NaturalGradientAffineComponent input-dim=$pnorm_output_dim output-dim=$num_leaves param-stddev=0 bias-stddev=0
  # component name=final-log-softmax type=LogSoftmax dim=$num_leaves
  # # InputOf(output) says use the same Descriptor of the current "output" node.
  # component-node name=lda component=lda input=InputOf(output)
  # component-node name=affine1 component=affine1 input=lda
  # component-node name=nonlin1 component=nonlin1 input=affine1
  # component-node name=renorm1 component=renorm1 input=nonlin1
  # component-node name=final-affine component=final-affine input=renorm1
  # component-node name=final-log-softmax component=final-log-softmax input=final-affine
  # output-node name=output input=final-log-softmax
  
  
  # ## Write file $config_dir/layer2.config that adds the second hidden layer.
  # component name=affine2 type=NaturalGradientAffineComponent input-dim=$lda_input_dim output-dim=$pnorm_input_dim bias-stddev=0
  # component name=nonlin2 type=PnormComponent input-dim=$pnorm_input_dim output-dim=$pnorm_output_dim
  # component name=renorm2 type=NormalizeComponent dim=$pnorm_output_dim
  # component name=final-affine type=NaturalGradientAffineComponent input-dim=$pnorm_output_dim output-dim=$num_leaves param-stddev=0 bias-stddev=0
  # component-node name=affine2 component=affine2 input=Append(Offset(renorm1, -2), Offset(renorm1, 2))
  # component-node name=nonlin2 component=nonlin2 input=affine2
  # component-node name=renorm2 component=renorm2 input=nonlin2
  # component-node name=final-affine component=final-affine input=renorm2
  # component-node name=final-log-softmax component=final-log-softmax input=final-affine
  # output-node name=output input=final-log-softmax
  
  
  # ## ... etc.  In this example it would go up to $config_dir/layer5.config.