Blame view

egs/wsj/s5/steps/nnet2/make_multisplice_configs.py 8.12 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  #!/usr/bin/env python
  # Copyright 2014  Johns Hopkins University (Authors: Daniel Povey and Vijayaditya Peddinti).  Apache 2.0.
  
  # Creates the nnet.config and hidde_*.config scripts used in train_pnorm_multisplice.sh
  # Parses the splice string to generate relevant variables for get_egs.sh, get_lda.sh and nnet/hidden.config files
  
  from __future__ import division
  from __future__ import print_function
  import re, argparse, sys, math, warnings
  
  # returns the set of frame indices required to perform the convolution
  # between sequences with frame indices in x and y
  def get_convolution_index_set(x, y):
    z = []
    for i in range(len(x)):
      for j in range(len(y)):
        z.append(x[i]+y[j])
    z = list(set(z))
    z.sort()
    return z
  
  def parse_splice_string(splice_string):
    layerwise_splice_indexes = splice_string.split('layer')[1:]
    print(splice_string.split('layer'))
    contexts={}
    first_right_context = 0 # default value
    first_left_context = 0 # default value
    nnet_frame_indexes = [0] # frame indexes required by the network
                             # at the initial layer (will be used in 
                             # determining the context for get_egs.sh)
    try:
      for cur_splice_indexes in layerwise_splice_indexes:
        layer_index, frame_indexes  = cur_splice_indexes.split("/")
        frame_indexes = [int(x) for x in frame_indexes.split(':')]
        layer_index = int(layer_index)
        assert(layer_index >= 0)
        if layer_index == 0:
          first_left_context = min(frame_indexes)
          first_right_context = max(frame_indexes)
          try:
            assert(frame_indexes == list(range(first_left_context, first_right_context+1)))
          except AssertionError:
            raise Exception('Currently the first splice component just accepts contiguous context.')
          try:
            assert((first_left_context <=0) and (first_right_context >=0))
          except AssertionError:
            raise Exception("""get_lda.sh script does not support postive left-context or negative right context.
            left context provided is %d and right context provided is %d.""" % (first_left_context, first_right_context))
          # convolve the current splice indices with the splice indices until last layer
        nnet_frame_indexes = get_convolution_index_set(frame_indexes, nnet_frame_indexes)
        cur_context = ":".join([str(x) for x in frame_indexes])
        contexts[layer_index] = cur_context
    except ValueError:
      raise Exception('Unknown format in splice_indexes variable: {0}'.format(params.splice_indexes))
    print(nnet_frame_indexes)
    max_left_context = min(nnet_frame_indexes)
    max_right_context = max(nnet_frame_indexes)
    return [contexts, ' nnet_left_context={0};
   nnet_right_context={1}
   first_left_context={2};
   first_right_context={3}
  '.format(abs(max_left_context), abs(max_right_context), abs(first_left_context), abs(first_right_context) )]
  
  def create_config_files(output_dir, params):
    pnorm_p = 2
    pnorm_input_dim = params.pnorm_input_dim
    pnorm_output_dim = params.pnorm_output_dim
    contexts, context_variables = parse_splice_string(params.splice_indexes)
    var_file = open("{0}/vars".format(output_dir), "w")
    var_file.write(context_variables)
    var_file.close()
  
    try:
      assert(max(contexts.keys()) < params.num_hidden_layers)
    except AssertionError:
      raise Exception("""Splice string provided is {2}.
      Number of hidden layers {0}, is less than the number of context specifications provided.
      Splicing is supported only until layer {1}.""".format(params.num_hidden_layers, params.num_hidden_layers - 1, params.splice_indexes))
  
    stddev=1.0/math.sqrt(pnorm_input_dim)
    try :
      nnet_config = ["SpliceComponent input-dim={0} context={1} const-component-dim={2}".format(params.total_input_dim, contexts[0], params.ivector_dim),
      "FixedAffineComponent matrix={0}".format(params.lda_mat),
      "AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev={4} bias-stddev={5}".format(params.lda_dim, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev),
      ("PnormComponent input-dim={0} output-dim={1} p={2}".format(pnorm_input_dim, pnorm_output_dim, pnorm_p) if pnorm_input_dim != pnorm_output_dim else "RectifiedLinearComponent dim={0}".format(pnorm_input_dim)),
      "NormalizeComponent dim={0}".format(pnorm_output_dim),
      "AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev=0 bias-stddev=0".format(pnorm_output_dim, params.num_targets, params.online_preconditioning_opts, params.initial_learning_rate),
      "SoftmaxComponent dim={0}".format(params.num_targets)]
  
      nnet_config_file = open(("{0}/nnet.config").format(output_dir), "w")
      nnet_config_file.write("
  ".join(nnet_config))
      nnet_config_file.close()
    except KeyError:
      raise Exception('A splice layer is expected to be the first layer. Provide a context for the first layer.')
  
    for i in range(1, params.num_hidden_layers): #just run till num_hidden_layers-1 since we do not add splice before the final affine transform
      lines=[]
      context_len = 1
      if i in contexts:
          # Adding the splice component as a context is provided
          lines.append("SpliceComponent input-dim=%d context=%s " % (pnorm_output_dim, contexts[i]))
          context_len = len(contexts[i].split(":"))
      # Add the hidden layer, which is a composition of an affine component, pnorm component and normalization component
      lines.append("AffineComponentPreconditionedOnline input-dim=%d output-dim=%d %s learning-rate=%f param-stddev=%f bias-stddev=%f" 
          % ( pnorm_output_dim*context_len, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev))
      if pnorm_input_dim != pnorm_output_dim:
        lines.append("PnormComponent input-dim=%d output-dim=%d p=%d" % (pnorm_input_dim, pnorm_output_dim, pnorm_p))
      else:
        lines.append("RectifiedLinearComponent dim=%d" % (pnorm_input_dim)) 
        warnings.warn("Using the RectifiedLinearComponent, in place of the PnormComponent as pnorm_input_dim == pnorm_output_dim")
      lines.append("NormalizeComponent dim={0}".format(pnorm_output_dim))
      out_file = open("{0}/hidden_{1}.config".format(output_dir, i), 'w')
      out_file.write("
  ".join(lines))
      out_file.close()
  
  
  if __name__ == "__main__":
    print(" ".join(sys.argv))
    parser = argparse.ArgumentParser()
    parser.add_argument('--splice-indexes', type=str, help='string specifying the indexes for the splice layers throughout the network')
    parser.add_argument('--total-input-dim', type=int, help='dimension of the input to the network')
    parser.add_argument('--ivector-dim', type=int, help='dimension of the ivector portion of the neural network input')
    parser.add_argument('--lda-mat', type=str, help='lda-matrix used after the first splice component')
    parser.add_argument('--lda-dim', type=str, help='dimension of the lda output')
    parser.add_argument('--pnorm-input-dim', type=int, help='dimension of input to pnorm layer')
    parser.add_argument('--pnorm-output-dim', type=int, help='dimension of output of pnorm layer')
    parser.add_argument('--online-preconditioning-opts', type=str, help='extra options for the AffineComponentPreconditionedOnline component')
    parser.add_argument('--initial-learning-rate', type=float, help='')
    parser.add_argument('--num-targets', type=int, help='#targets for the neural network ')
    parser.add_argument('--num-hidden-layers', type=int, help='#hidden layers in the neural network ')
    parser.add_argument('--bias-stddev', type=float, help='standard deviation of r.v. used for bias component initialization')
    parser.add_argument("mode", type=str, help="contexts|configs")
    parser.add_argument("output_dir", type=str, help="output directory to store the files")
    params = parser.parse_args() 
    
    print(params)
    if params.mode == "contexts":
      [context, context_variables] = parse_splice_string(params.splice_indexes)
      var_file = open("{0}/vars".format(params.output_dir), "w")
      var_file.write(context_variables)
      var_file.close()
    elif params.mode == "configs":
      create_config_files(params.output_dir, params)
    else:
      raise Exception("mode has to be in the set {contexts, configs}")