-
Notifications
You must be signed in to change notification settings - Fork 9
/
deconv.py
457 lines (380 loc) · 17.4 KB
/
deconv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
# Authors: Francesco Visin, Kyle Kastner
# License: BSD 3-Clause
from collections import Iterable
import numpy
import theano.tensor as T
from theano.sandbox.cuda.basic_ops import gpu_contiguous
from lasagne import init
from lasagne import nonlinearities
from lasagne.utils import as_tuple
from lasagne.layers import get_output, get_output_shape, Layer
def conv_output_length(input_length, filter_size, stride, pad=0):
"""Helper function to compute the output size of a convolution operation
This function computes the length along a single axis, which corresponds
to a 1D convolution. It can also be used for convolutions with higher
dimensionalities by using it individually for each axis.
Parameters
----------
input_length : int
The size of the input.
filter_size : int
The size of the filter.
stride : int
The stride of the convolution operation.
pad : int, 'full' or 'same' (default: 0)
By default, the convolution is only computed where the input and the
filter fully overlap (a valid convolution). When ``stride=1``, this
yields an output that is smaller than the input by ``filter_size - 1``.
The `pad` argument allows you to implicitly pad the input with zeros,
extending the output size.
A single integer results in symmetric zero-padding of the given size on
both borders.
``'full'`` pads with one less than the filter size on both sides. This
is equivalent to computing the convolution wherever the input and the
filter overlap by at least one position.
``'same'`` pads with half the filter size on both sides (one less on
the second side for an even filter size). When ``stride=1``, this
results in an output size equal to the input size.
Returns
-------
int
The output size corresponding to the given convolution parameters.
Raises
------
RuntimeError
When an invalid padding is specified, a `RuntimeError` is raised.
"""
if input_length is None:
return None
if pad == 'valid':
output_length = input_length - filter_size + 1
elif pad == 'full':
output_length = input_length + filter_size - 1
elif pad == 'same':
output_length = input_length
elif isinstance(pad, int):
output_length = input_length + 2 * pad - filter_size + 1
else:
raise ValueError('Invalid pad: {0}'.format(pad))
# This is the integer arithmetic equivalent to
# np.ceil(output_length / stride)
output_length = (output_length + stride - 1) // stride
return output_length
class BaseConvLayer(Layer):
"""
lasagne.layers.BaseConvLayer(incoming, num_filters, filter_size,
stride=1, pad=0, untie_biases=False,
W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True,
n=None, **kwargs)
Convolutional layer base class
Base class for performing an `n`-dimensional convolution on its input,
optionally adding a bias and applying an elementwise nonlinearity. Note
that this class cannot be used in a Lasagne network, only its subclasses
can (e.g., :class:`Conv1DLayer`, :class:`Conv2DLayer`).
Parameters
----------
incoming : a :class:`Layer` instance or a tuple
The layer feeding into this layer, or the expected input shape. Must
be a tensor of 2+`n` dimensions:
``(batch_size, num_input_channels, <n spatial dimensions>)``.
num_filters : int
The number of learnable convolutional filters this layer has.
filter_size : int or iterable of int
An integer or an `n`-element tuple specifying the size of the filters.
stride : int or iterable of int
An integer or an `n`-element tuple specifying the stride of the
convolution operation.
pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
By default, the convolution is only computed where the input and the
filter fully overlap (a valid convolution). When ``stride=1``, this
yields an output that is smaller than the input by ``filter_size - 1``.
The `pad` argument allows you to implicitly pad the input with zeros,
extending the output size.
A single integer results in symmetric zero-padding of the given size on
all borders, a tuple of `n` integers allows different symmetric padding
per dimension.
``'full'`` pads with one less than the filter size on both sides. This
is equivalent to computing the convolution wherever the input and the
filter overlap by at least one position.
``'same'`` pads with half the filter size (rounded down) on both sides.
When ``stride=1`` this results in an output size equal to the input
size. Even filter size is not supported.
``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
Note that ``'full'`` and ``'same'`` can be faster than equivalent
integer values due to optimizations by Theano.
untie_biases : bool (default: False)
If ``False``, the layer will have a bias parameter for each channel,
which is shared across all positions in this channel. As a result, the
`b` attribute will be a vector (1D).
If ``True``, the layer will have separate bias parameters for each
position in each channel. As a result, the `b` attribute will be an
`n`-dimensional tensor.
W : Theano shared variable, expression, numpy array or callable
Initial value, expression or initializer for the weights.
These should be a tensor of 2+`n` dimensions with shape
``(num_filters, num_input_channels, <n spatial dimensions>)``.
See :func:`lasagne.utils.create_param` for more information.
b : Theano shared variable, expression, numpy array, callable or ``None``
Initial value, expression or initializer for the biases. If set to
``None``, the layer will have no biases. Otherwise, biases should be
a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
``False``. If it is set to ``True``, its shape should be
``(num_filters, <n spatial dimensions>)`` instead.
See :func:`lasagne.utils.create_param` for more information.
nonlinearity : callable or None
The nonlinearity that is applied to the layer activations. If None
is provided, the layer will be linear.
flip_filters : bool (default: True)
Whether to flip the filters before sliding them over the input,
performing a convolution (this is the default), or not to flip them and
perform a correlation. Note that for some other convolutional layers in
Lasagne, flipping incurs an overhead and is disabled by default --
check the documentation when using learned weights from another layer.
n : int or None
The dimensionality of the convolution (i.e., the number of spatial
dimensions of each feature map and each convolutional filter). If
``None``, will be inferred from the input shape.
**kwargs
Any additional keyword arguments are passed to the `Layer` superclass.
Attributes
----------
W : Theano shared variable or expression
Variable or expression representing the filter weights.
b : Theano shared variable or expression
Variable or expression representing the biases.
"""
def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0,
untie_biases=False,
W=init.GlorotUniform(), b=init.Constant(0.),
nonlinearity=nonlinearities.rectify, flip_filters=True,
n=None, **kwargs):
super(BaseConvLayer, self).__init__(incoming, **kwargs)
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
if n is None:
n = len(self.input_shape) - 2
elif n != len(self.input_shape) - 2:
raise ValueError("Tried to create a %dD convolution layer with "
"input shape %r. Expected %d input dimensions "
"(batchsize, channels, %d spatial dimensions)." %
(n, self.input_shape, n+2, n))
self.n = n
self.num_filters = num_filters
self.filter_size = as_tuple(filter_size, n, int)
self.flip_filters = flip_filters
self.stride = as_tuple(stride, n, int)
self.untie_biases = untie_biases
if pad == 'same':
if any(s % 2 == 0 for s in self.filter_size):
raise NotImplementedError(
'`same` padding requires odd filter size.')
if pad == 'valid':
self.pad = as_tuple(0, n)
elif pad in ('full', 'same'):
self.pad = pad
else:
self.pad = as_tuple(pad, n, int)
self.W = self.add_param(W, self.get_W_shape(), name="W")
if b is None:
self.b = None
else:
if self.untie_biases:
biases_shape = (num_filters,) + self.output_shape[2:]
else:
biases_shape = (num_filters,)
self.b = self.add_param(b, biases_shape, name="b",
regularizable=False)
def get_W_shape(self):
"""Get the shape of the weight matrix `W`.
Returns
-------
tuple of int
The shape of the weight matrix.
"""
num_input_channels = self.input_shape[1]
return (self.num_filters, num_input_channels) + self.filter_size
def get_output_shape_for(self, input_shape):
pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * self.n
batchsize = input_shape[0]
return ((batchsize, self.num_filters) +
tuple(conv_output_length(input, filter, stride, p)
for input, filter, stride, p
in zip(input_shape[2:], self.filter_size,
self.stride, pad)))
def get_output_for(self, input, **kwargs):
conved = self.convolve(input, **kwargs)
if self.b is None:
activation = conved
elif self.untie_biases:
activation = conved + T.shape_padleft(self.b, 1)
else:
activation = conved + self.b.dimshuffle(('x', 0) + ('x',) * self.n)
return self.nonlinearity(activation)
def convolve(self, input, **kwargs):
"""
Symbolically convolves `input` with ``self.W``, producing an output of
shape ``self.output_shape``. To be implemented by subclasses.
Parameters
----------
input : Theano tensor
The input minibatch to convolve
**kwargs
Any additional keyword arguments from :meth:`get_output_for`
Returns
-------
Theano tensor
`input` convolved according to the configuration of this layer,
without any bias or nonlinearity applied.
"""
raise NotImplementedError("BaseConvLayer does not implement the "
"convolve() method. You will want to "
"use a subclass such as Conv2DLayer.")
class TransposeConv2DLayer(BaseConvLayer):
"""An upsampling Layer that transposes a convolution.
This layer upsamples its input using the transpose of a convolution,
also known as fractional convolution in some contexts.
Parameters
----------
input_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
The shape of the input (upsampled) parameter.
A tuple/list of len 4, with the first two dimensions
being None or int or Constant and the last two dimensions being
Tensor or int or Constant. If None defaults to Lasagne's shape
inference on the `incoming` input argument, which will fail
in case `batchsize` or `channels` are not fixed-sized.
Notes
-----
Expects the input to be in format: batchsize, channels, rows, cols
"""
def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0,
untie_biases=False, W=init.GlorotUniform(),
b=init.Constant(0.), nonlinearity=None, flip_filters=True,
in_shape=None, **kwargs):
if in_shape is None:
tensor_shape = get_output(incoming).shape
in_shape = get_output_shape(
incoming)[:2] + (tensor_shape[2],) + (tensor_shape[3],)
self.in_shape = in_shape
super(TransposeConv2DLayer, self).__init__(
incoming, num_filters, filter_size, stride, pad, untie_biases, W,
b, nonlinearity, flip_filters, **kwargs)
def get_W_shape(self):
"""Get the shape of the weight matrix `W`.
Returns
-------
tuple of int
The shape of the weight matrix.
"""
num_input_channels = self.in_shape[1]
return (num_input_channels, self.num_filters) + self.filter_size
def get_output_shape_for(self, input_shape):
pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * self.n
batchsize = input_shape[0]
return ((batchsize, self.num_filters) + tuple(t_conv_out_size(
input_shape[2:], self.filter_size, self.stride, pad)))
def get_output_for(self, input, **kwargs):
conved = self.convolve(input, **kwargs)
if self.b is None:
activation = conved
elif self.untie_biases:
# activation = conved + T.shape_padleft(self.b, 1)
activation = conved + self.b.dimshuffle('x', 0, 1, 2)
else:
activation = conved + self.b.dimshuffle(('x', 0) + ('x',) * self.n)
return self.nonlinearity(activation)
def convolve(self, input, **kwargs):
"""
Symbolically convolves `input` with ``self.W``, producing an output of
shape ``self.output_shape``.
Parameters
----------
input : Theano tensor
The input minibatch to convolve
**kwargs
Any additional keyword arguments from :meth:`get_output_for`
Returns
-------
Theano tensor
`input` convolved according to the configuration of this layer,
without any bias or nonlinearity applied.
"""
#filters = gpu_contiguous(self.W)
#input = gpu_contiguous(input)
filters = self.W
input = input
# in_shape = get_output(self.input_layer).shape
kshp = [None if isinstance(el, T.TensorVariable) else
el for el in self.get_W_shape()]
in_shape = self.in_shape
out_shape = t_conv_out_size(in_shape[2:], self.filter_size,
self.stride, self.pad)
for el in out_shape:
if isinstance(el, T.TensorVariable):
el = None
out_shape = [in_shape[0]] + [self.num_filters] + list(out_shape)
return T.nnet.abstract_conv.conv2d_grad_wrt_inputs(
output_grad=input,
filters=filters,
input_shape=out_shape,
filter_shape=kshp,
border_mode=self.pad,
subsample=self.stride,
filter_flip=self.flip_filters)
# return op(filters, input, in_shape[:-2])
class Unpool2DLayer(Layer):
"""
This layer performs unpooling over the last two dimensions
of a 4D tensor.
"""
def __init__(self, incoming, ds, **kwargs):
super(Unpool2DLayer, self).__init__(incoming, **kwargs)
if (isinstance(ds, int)):
raise ValueError('ds must have len == 2')
else:
ds = tuple(ds)
if len(ds) != 2:
raise ValueError('ds must have len == 2')
if ds[0] != ds[1]:
raise ValueError('ds should be symmetric (I am lazy)')
self.ds = ds
def get_output_shape_for(self, input_shape):
output_shape = list(input_shape)
output_shape[2] = input_shape[2] * self.ds[0]
output_shape[3] = input_shape[3] * self.ds[1]
return tuple(output_shape)
def get_output_for(self, input, **kwargs):
ds = self.ds
input_shape = input.shape
output_shape = self.get_output_shape_for(input_shape)
return input.repeat(self.ds[0], axis=2).repeat(self.ds[1], axis=3)
def t_conv_out_size(input_size, filter_size, stride, pad):
"""Computes the length of the output of a transposed convolution
Parameters
----------
input_size : int, Iterable or Theano tensor
The size of the input of the transposed convolution
filter_size : int, Iterable or Theano tensor
The size of the filter
stride : int, Iterable or Theano tensor
The stride of the transposed convolution
pad : int, Iterable, Theano tensor or string
The padding of the transposed convolution
"""
if input_size is None:
return None
input_size = numpy.array(input_size)
filter_size = numpy.array(filter_size)
stride = numpy.array(stride)
if isinstance(pad, (int, Iterable)) and not isinstance(pad, str):
pad = numpy.array(pad)
output_size = (input_size - 1) * stride + filter_size - 2*pad
elif pad == 'full':
output_size = input_size * stride - filter_size - stride + 2
elif pad == 'valid':
output_size = (input_size - 1) * stride + filter_size
elif pad == 'same':
output_size = input_size
return output_size