make_cnn_proto.py
7.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python
# Copyright 2014 Brno University of Technology (author: Katerina Zmolikova, Karel Vesely)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Generated Nnet prototype, to be initialized by 'nnet-initialize'.
from __future__ import division
from __future__ import print_function
import math, random, sys
from optparse import OptionParser
###
### Parse options
###
usage="%prog [options] <feat-dim> <num-leaves> <num-hidden-layers> <num-hidden-neurons> >nnet-proto-file"
parser = OptionParser(usage)
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>) [default: %default]',
default='<Sigmoid>', type='string');
parser.add_option('--num-filters1', dest='num_filters1',
help='Number of filters in first convolutional layer [default: %default]',
default=128, type='int')
parser.add_option('--num-filters2', dest='num_filters2',
help='Number of filters in second convolutional layer [default: %default]',
default=256, type='int')
parser.add_option('--pool-size', dest='pool_size',
help='Size of pooling [default: %default]',
default=3, type='int')
parser.add_option('--pool-step', dest='pool_step',
help='Step of pooling [default: %default]',
default=3, type='int')
parser.add_option('--pool-type', dest='pool_type',
help='Type of pooling (Max || Average) [default: %default]',
default='Max', type='string')
parser.add_option('--pitch-dim', dest='pitch_dim',
help='Number of features representing pitch [default: %default]',
default=0, type='int')
parser.add_option('--delta-order', dest='delta_order',
help='Order of delta features [default: %default]',
default=2, type='int')
parser.add_option('--splice', dest='splice',
help='Length of splice [default: %default]',
default=5,type='int')
parser.add_option('--patch-step1', dest='patch_step1',
help='Patch step of first convolutional layer [default: %default]',
default=1, type='int')
parser.add_option('--patch-dim1', dest='patch_dim1',
help='Dim of convolutional kernel in 1st layer (freq. axis) [default: %default]',
default=8, type='int')
parser.add_option('--patch-dim2', dest='patch_dim2',
help='Dim of convolutional kernel in 2nd layer (freq. axis) [default: %default]',
default=4, type='int')
parser.add_option('--dir', dest='protodir',
help='Directory, where network prototypes will be saved [default: %default]',
default='.', type='string')
parser.add_option('--num-pitch-neurons', dest='num_pitch_neurons',
help='Number of neurons in layers processing pitch features [default: %default]',
default='200', type='int')
(o,args) = parser.parse_args()
if len(args) != 1 :
parser.print_help()
sys.exit(1)
feat_dim = int(args[0]);
### End parse options
feat_raw_dim = feat_dim / (o.delta_order+1) / (o.splice*2+1) - o.pitch_dim # we need number of feats without deltas and splice and pitch
# Check
assert(feat_dim > 0)
assert(o.pool_type == 'Max' or o.pool_type == 'Average')
###
### Print prototype of the network
###
# Begin the prototype
print("<NnetProto>")
# Convolutional part of network
num_patch1 = 1 + (feat_raw_dim - o.patch_dim1) / o.patch_step1
num_pool = 1 + (num_patch1 - o.pool_size) / o.pool_step
patch_dim2 = o.patch_dim2
patch_step2 = o.patch_step1
patch_stride2 = num_pool # same as layer1 outputs
num_patch2 = 1 + (num_pool - patch_dim2) / patch_step2
inputdim_of_cnn = feat_dim
outputdim_of_cnn = o.num_filters2*num_patch2
convolution_proto = ''
convolution_proto += "<ConvolutionalComponent> <InputDim> %d <OutputDim> %d <PatchDim> %d <PatchStep> %d <PatchStride> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f\n" % \
(feat_raw_dim * (o.delta_order+1) * (o.splice*2+1), o.num_filters1 * num_patch1, o.patch_dim1, o.patch_step1, feat_raw_dim, -1.0, 2.0, 0.02, 30) #~8x11x3 = 264 inputs
convolution_proto += "<%sPoolingComponent> <InputDim> %d <OutputDim> %d <PoolSize> %d <PoolStep> %d <PoolStride> %d\n" % \
(o.pool_type, o.num_filters1*num_patch1, o.num_filters1*num_pool, o.pool_size, o.pool_step, o.num_filters1)
convolution_proto += "<Rescale> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
(o.num_filters1*num_pool, o.num_filters1*num_pool, 1)
convolution_proto += "<AddShift> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
(o.num_filters1*num_pool, o.num_filters1*num_pool, 0)
convolution_proto += "%s <InputDim> %d <OutputDim> %d\n" % \
(o.activation_type, o.num_filters1*num_pool, o.num_filters1*num_pool)
convolution_proto += "<ConvolutionalComponent> <InputDim> %d <OutputDim> %d <PatchDim> %d <PatchStep> %d <PatchStride> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f\n" % \
(o.num_filters1*num_pool, outputdim_of_cnn, patch_dim2, patch_step2, patch_stride2, -2.0, 4.0, 0.1, 50) #~4x128 = 512 inputs
convolution_proto += "<Rescale> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
(outputdim_of_cnn, outputdim_of_cnn, 1)
convolution_proto += "<AddShift> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
(outputdim_of_cnn, outputdim_of_cnn, 0)
convolution_proto += "%s <InputDim> %d <OutputDim> %d\n" % \
(o.activation_type, outputdim_of_cnn, outputdim_of_cnn)
if (o.pitch_dim > 0):
# convolutional part
f_conv = open('%s/nnet.proto.convolution' % o.protodir, 'w')
f_conv.write('<NnetProto>\n')
f_conv.write(convolution_proto)
f_conv.write('</NnetProto>\n')
f_conv.close()
# pitch part
f_pitch = open('%s/nnet.proto.pitch' % o.protodir, 'w')
f_pitch.write('<NnetProto>\n')
f_pitch.write('<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n' % \
((o.pitch_dim * (o.delta_order+1) * (o.splice*2+1)), o.num_pitch_neurons, -2, 4, 0.02))
f_pitch.write('%s <InputDim> %d <OutputDim> %d\n' % \
(o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons))
f_pitch.write('<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n' % \
(o.num_pitch_neurons, o.num_pitch_neurons, -2, 4, 0.1))
f_pitch.write('%s <InputDim> %d <OutputDim> %d\n' % \
(o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons))
f_pitch.write('</NnetProto>\n')
f_pitch.close()
# paralell part
vector = ''
for i in range(1, inputdim_of_cnn, feat_raw_dim + o.pitch_dim):
vector += '%d:1:%d ' % (i, i + feat_raw_dim - 1)
for i in range(feat_raw_dim+1, inputdim_of_cnn + 1, feat_raw_dim + o.pitch_dim):
vector += '%d:1:%d ' % (i, i + o.pitch_dim - 1)
print('<Copy> <InputDim> %d <OutputDim> %d <BuildVector> %s </BuildVector>' % \
(inputdim_of_cnn, inputdim_of_cnn, vector))
print('<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
(inputdim_of_cnn, o.num_pitch_neurons + outputdim_of_cnn, '%s/nnet.proto.convolution' % o.protodir, '%s/nnet.proto.pitch' % o.protodir))
else: # no pitch
print(convolution_proto)
# We are done!
sys.exit(0)