Blame view
egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
8.12 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
#!/usr/bin/env python # Copyright 2014 Johns Hopkins University (Authors: Daniel Povey and Vijayaditya Peddinti). Apache 2.0. # Creates the nnet.config and hidde_*.config scripts used in train_pnorm_multisplice.sh # Parses the splice string to generate relevant variables for get_egs.sh, get_lda.sh and nnet/hidden.config files from __future__ import division from __future__ import print_function import re, argparse, sys, math, warnings # returns the set of frame indices required to perform the convolution # between sequences with frame indices in x and y def get_convolution_index_set(x, y): z = [] for i in range(len(x)): for j in range(len(y)): z.append(x[i]+y[j]) z = list(set(z)) z.sort() return z def parse_splice_string(splice_string): layerwise_splice_indexes = splice_string.split('layer')[1:] print(splice_string.split('layer')) contexts={} first_right_context = 0 # default value first_left_context = 0 # default value nnet_frame_indexes = [0] # frame indexes required by the network # at the initial layer (will be used in # determining the context for get_egs.sh) try: for cur_splice_indexes in layerwise_splice_indexes: layer_index, frame_indexes = cur_splice_indexes.split("/") frame_indexes = [int(x) for x in frame_indexes.split(':')] layer_index = int(layer_index) assert(layer_index >= 0) if layer_index == 0: first_left_context = min(frame_indexes) first_right_context = max(frame_indexes) try: assert(frame_indexes == list(range(first_left_context, first_right_context+1))) except AssertionError: raise Exception('Currently the first splice component just accepts contiguous context.') try: assert((first_left_context <=0) and (first_right_context >=0)) except AssertionError: raise Exception("""get_lda.sh script does not support postive left-context or negative right context. left context provided is %d and right context provided is %d.""" % (first_left_context, first_right_context)) # convolve the current splice indices with the splice indices until last layer nnet_frame_indexes = get_convolution_index_set(frame_indexes, nnet_frame_indexes) cur_context = ":".join([str(x) for x in frame_indexes]) contexts[layer_index] = cur_context except ValueError: raise Exception('Unknown format in splice_indexes variable: {0}'.format(params.splice_indexes)) print(nnet_frame_indexes) max_left_context = min(nnet_frame_indexes) max_right_context = max(nnet_frame_indexes) return [contexts, ' nnet_left_context={0}; nnet_right_context={1} first_left_context={2}; first_right_context={3} '.format(abs(max_left_context), abs(max_right_context), abs(first_left_context), abs(first_right_context) )] def create_config_files(output_dir, params): pnorm_p = 2 pnorm_input_dim = params.pnorm_input_dim pnorm_output_dim = params.pnorm_output_dim contexts, context_variables = parse_splice_string(params.splice_indexes) var_file = open("{0}/vars".format(output_dir), "w") var_file.write(context_variables) var_file.close() try: assert(max(contexts.keys()) < params.num_hidden_layers) except AssertionError: raise Exception("""Splice string provided is {2}. Number of hidden layers {0}, is less than the number of context specifications provided. Splicing is supported only until layer {1}.""".format(params.num_hidden_layers, params.num_hidden_layers - 1, params.splice_indexes)) stddev=1.0/math.sqrt(pnorm_input_dim) try : nnet_config = ["SpliceComponent input-dim={0} context={1} const-component-dim={2}".format(params.total_input_dim, contexts[0], params.ivector_dim), "FixedAffineComponent matrix={0}".format(params.lda_mat), "AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev={4} bias-stddev={5}".format(params.lda_dim, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev), ("PnormComponent input-dim={0} output-dim={1} p={2}".format(pnorm_input_dim, pnorm_output_dim, pnorm_p) if pnorm_input_dim != pnorm_output_dim else "RectifiedLinearComponent dim={0}".format(pnorm_input_dim)), "NormalizeComponent dim={0}".format(pnorm_output_dim), "AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev=0 bias-stddev=0".format(pnorm_output_dim, params.num_targets, params.online_preconditioning_opts, params.initial_learning_rate), "SoftmaxComponent dim={0}".format(params.num_targets)] nnet_config_file = open(("{0}/nnet.config").format(output_dir), "w") nnet_config_file.write(" ".join(nnet_config)) nnet_config_file.close() except KeyError: raise Exception('A splice layer is expected to be the first layer. Provide a context for the first layer.') for i in range(1, params.num_hidden_layers): #just run till num_hidden_layers-1 since we do not add splice before the final affine transform lines=[] context_len = 1 if i in contexts: # Adding the splice component as a context is provided lines.append("SpliceComponent input-dim=%d context=%s " % (pnorm_output_dim, contexts[i])) context_len = len(contexts[i].split(":")) # Add the hidden layer, which is a composition of an affine component, pnorm component and normalization component lines.append("AffineComponentPreconditionedOnline input-dim=%d output-dim=%d %s learning-rate=%f param-stddev=%f bias-stddev=%f" % ( pnorm_output_dim*context_len, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev)) if pnorm_input_dim != pnorm_output_dim: lines.append("PnormComponent input-dim=%d output-dim=%d p=%d" % (pnorm_input_dim, pnorm_output_dim, pnorm_p)) else: lines.append("RectifiedLinearComponent dim=%d" % (pnorm_input_dim)) warnings.warn("Using the RectifiedLinearComponent, in place of the PnormComponent as pnorm_input_dim == pnorm_output_dim") lines.append("NormalizeComponent dim={0}".format(pnorm_output_dim)) out_file = open("{0}/hidden_{1}.config".format(output_dir, i), 'w') out_file.write(" ".join(lines)) out_file.close() if __name__ == "__main__": print(" ".join(sys.argv)) parser = argparse.ArgumentParser() parser.add_argument('--splice-indexes', type=str, help='string specifying the indexes for the splice layers throughout the network') parser.add_argument('--total-input-dim', type=int, help='dimension of the input to the network') parser.add_argument('--ivector-dim', type=int, help='dimension of the ivector portion of the neural network input') parser.add_argument('--lda-mat', type=str, help='lda-matrix used after the first splice component') parser.add_argument('--lda-dim', type=str, help='dimension of the lda output') parser.add_argument('--pnorm-input-dim', type=int, help='dimension of input to pnorm layer') parser.add_argument('--pnorm-output-dim', type=int, help='dimension of output of pnorm layer') parser.add_argument('--online-preconditioning-opts', type=str, help='extra options for the AffineComponentPreconditionedOnline component') parser.add_argument('--initial-learning-rate', type=float, help='') parser.add_argument('--num-targets', type=int, help='#targets for the neural network ') parser.add_argument('--num-hidden-layers', type=int, help='#hidden layers in the neural network ') parser.add_argument('--bias-stddev', type=float, help='standard deviation of r.v. used for bias component initialization') parser.add_argument("mode", type=str, help="contexts|configs") parser.add_argument("output_dir", type=str, help="output directory to store the files") params = parser.parse_args() print(params) if params.mode == "contexts": [context, context_variables] = parse_splice_string(params.splice_indexes) var_file = open("{0}/vars".format(params.output_dir), "w") var_file.write(context_variables) var_file.close() elif params.mode == "configs": create_config_files(params.output_dir, params) else: raise Exception("mode has to be in the set {contexts, configs}") |