gen_mlp_init.py
3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/python -u
# ./gen_mlp_init.py
# script generateing NN initialization
#
# author: Karel Vesely
#
import math, random
import sys
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--dim', dest='dim', help='d1:d2:d3 layer dimensions in the network')
parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
parser.add_option('--normalized', dest='normalized', help='Generate normalized weights according to X.Glorot paper, U[-x,x] x=sqrt(6)/(sqrt(dim_in+dim_out))', action='store_true', default=False)
parser.add_option('--activation', dest='activation', help='activation type tag (def. <sigmoid>)', default='<sigmoid>')
parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
parser.add_option('--linOutput', dest='linOutput', help='generate MLP with linear output', action='store_true', default=False)
parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
(options, args) = parser.parse_args()
if(options.dim == None):
parser.print_help()
sys.exit(1)
#seeding
seedval=int(options.seedval)
if(seedval != 0):
random.seed(seedval)
dimStrL = options.dim.split(':')
dimL = []
for i in range(len(dimStrL)):
dimL.append(int(dimStrL[i]))
#print dimL,'linBN',options.linBNdim
for layer in range(len(dimL)-1):
print '<affinetransform>', dimL[layer+1], dimL[layer]
#precompute...
nomalized_interval = math.sqrt(6.0) / math.sqrt(dimL[layer+1]+dimL[layer])
#weight matrix
print '['
for row in range(dimL[layer+1]):
for col in range(dimL[layer]):
if(options.normalized):
print random.random()*2.0*nomalized_interval - nomalized_interval,
elif(options.gauss):
if(options.inputscale):
print 3/math.sqrt(dimL[layer])*random.gauss(0.0,1.0),
else:
print 0.1*random.gauss(0.0,1.0),
else:
if(options.inputscale):
print (random.random()-0.5)*2*3/math.sqrt(dimL[layer]),
else:
print random.random()/5.0-0.1,
print #newline for each row
print ']'
#bias vector
print '[',
for idx in range(dimL[layer+1]):
if(int(options.linBNdim) == dimL[layer+1]):
print '0.0',
elif(layer == len(dimL)-2):#last layer (softmax)
print '0.0',
elif(options.negbias):
print random.random()/5.0-4.1,
else:
print '0.0',
print ']'
if(int(options.linBNdim) != dimL[layer+1]):
if(layer == len(dimL)-2):
if(not(options.linOutput)) :
print '<softmax>', dimL[layer+1], dimL[layer+1]
else:
#print '<sigmoid>', dimL[layer+1], dimL[layer+1]
print options.activation, dimL[layer+1], dimL[layer+1]