From 078dd05b734d5330001043c9fb5c9c59521e73c8 Mon Sep 17 00:00:00 2001 From: Feiyu Chan Date: Thu, 9 Apr 2020 09:42:28 +0800 Subject: [PATCH] add Conv2D/Conv2DTranspose/Conv3D/Conv3DTranspose in paddle.nn.layer (#23488) * add Conv/ConvTranspose layers in paddle.nn.layer, test=develop * add example code in docstring, test=develop * update unittests to work with test_runner, test=develop --- python/paddle/fluid/dygraph/nn.py | 11 +- .../tests/unittests/test_conv2d_layer.py | 247 ++++++ .../unittests/test_conv2d_transpose_layer.py | 268 ++++++ .../tests/unittests/test_conv3d_layer.py | 250 ++++++ .../unittests/test_conv3d_transpose_layer.py | 268 ++++++ python/paddle/nn/__init__.py | 4 +- python/paddle/nn/functional/__init__.py | 1 + python/paddle/nn/layer/__init__.py | 7 +- python/paddle/nn/layer/conv.py | 827 +++++++++++++++++- 9 files changed, 1867 insertions(+), 16 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_conv2d_layer.py create mode 100644 python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py create mode 100644 python/paddle/fluid/tests/unittests/test_conv3d_layer.py create mode 100644 python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 51eaccc058..03fe763aae 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -651,12 +651,11 @@ class Conv3DTranspose(layers.Layer): ] + self._filter_size self.weight = self.create_parameter( dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - if self._bias_attr: - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): pre_bias = self._helper.create_variable_for_type_inference( diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py new file mode 100644 index 0000000000..64653ce2e7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py @@ -0,0 +1,247 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class Conv2DTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=4, + spartial_shape=(16, 16), + num_channels=6, + num_filters=8, + filter_size=3, + padding=0, + stride=1, + dilation=1, + groups=1, + act=None, + no_bias=False, + use_cudnn=True, + data_format="NCHW", + dtype="float32"): + super(Conv2DTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spartial_shape = spartial_shape + self.filter_size = filter_size + + self.padding = padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.act = act + self.no_bias = no_bias + self.use_cudnn = use_cudnn + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + self.channel_last = self.data_format == "NHWC" + if self.channel_last: + input_shape = (self.batch_size, ) + self.spartial_shape + ( + self.num_channels, ) + else: + input_shape = (self.batch_size, self.num_channels + ) + self.spartial_shape + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] * 2 + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.num_filters, self.num_channels + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform( + -1, 1, size=(self.num_filters, )).astype(self.dtype) + else: + self.bias = None + + def fluid_layer(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1,self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + weight_attr = I.NumpyArrayInitializer(self.weight) + if self.bias is None: + bias_attr = False + else: + bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv2d( + x_var, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + use_cudnn=self.use_cudnn, + act=self.act, + data_format=self.data_format) + feed_dict = {"input": self.input} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1,self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.num_filters, ), dtype=self.dtype) + y_var = F.conv2d( + x_var, + w_var, + b_var if not self.no_bias else None, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = dg.to_variable(self.input) + conv = nn.Conv2D( + self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format, + dtype=self.dtype) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + place = fluid.CPUPlace() + result1 = self.fluid_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class Conv2DErrorTestCase(Conv2DTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv2DTestCase(methodName='runTest')) + suite.addTest( + Conv2DTestCase( + methodName='runTest', stride=[1, 2], dilation=2)) + suite.addTest( + Conv2DTestCase( + methodName='runTest', stride=2, dilation=(2, 1))) + suite.addTest( + Conv2DTestCase( + methodName='runTest', padding="same", no_bias=True, act="sigmoid")) + suite.addTest( + Conv2DTestCase( + methodName='runTest', filter_size=(3, 3), padding='valid')) + suite.addTest(Conv2DTestCase(methodName='runTest', padding=(2, 3))) + suite.addTest(Conv2DTestCase(methodName='runTest', padding=[1, 2, 2, 1])) + suite.addTest( + Conv2DTestCase( + methodName='runTest', padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) + suite.addTest(Conv2DTestCase(methodName='runTest', data_format="NHWC")) + suite.addTest( + Conv2DTestCase( + methodName='runTest', + data_format="NHWC", + padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) + suite.addTest( + Conv2DTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + Conv2DTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + use_cudnn=False, + act="sigmoid", + padding="valid")) + + +def add_error_cases(suite): + suite.addTest( + Conv2DErrorTestCase( + methodName='runTest', use_cudnn="not_valid")) + suite.addTest( + Conv2DErrorTestCase( + methodName='runTest', num_channels=5, groups=2)) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py new file mode 100644 index 0000000000..989836d599 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -0,0 +1,268 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class Conv2DTransposeTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=4, + spartial_shape=(16, 16), + num_channels=6, + num_filters=8, + filter_size=3, + output_size=None, + padding=0, + stride=1, + dilation=1, + groups=1, + act=None, + no_bias=False, + use_cudnn=True, + data_format="NCHW", + dtype="float32"): + super(Conv2DTransposeTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spartial_shape = spartial_shape + self.filter_size = filter_size + self.output_size = output_size + + self.padding = padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.act = act + self.no_bias = no_bias + self.use_cudnn = use_cudnn + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + self.channel_last = self.data_format == "NHWC" + if self.channel_last: + input_shape = (self.batch_size, ) + self.spartial_shape + ( + self.num_channels, ) + else: + input_shape = (self.batch_size, self.num_channels + ) + self.spartial_shape + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] * 2 + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.num_channels, self.num_filters + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform( + -1, 1, size=(self.num_filters, )).astype(self.dtype) + else: + self.bias = None + + def fluid_layer(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1,self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + weight_attr = I.NumpyArrayInitializer(self.weight) + if self.bias is None: + bias_attr = False + else: + bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv2d_transpose( + x_var, + self.num_filters, + filter_size=self.filter_size, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + use_cudnn=self.use_cudnn, + act=self.act, + data_format=self.data_format) + feed_dict = {"input": self.input} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1,self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.num_filters, ), dtype=self.dtype) + y_var = F.conv2d_transpose( + x_var, + w_var, + None if self.no_bias else b_var, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = dg.to_variable(self.input) + conv = nn.Conv2DTranspose( + self.num_channels, + self.num_filters, + self.filter_size, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format, + dtype=self.dtype) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + place = fluid.CPUPlace() + result1 = self.fluid_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class Conv2DTransposeErrorTestCase(Conv2DTransposeTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv2DTransposeTestCase(methodName='runTest', act="relu")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', stride=[1, 2], no_bias=True, dilation=2)) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', + filter_size=(3, 3), + output_size=[20, 36], + stride=[1, 2], + dilation=2)) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', stride=2, dilation=(2, 1))) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', padding="valid")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', padding='valid')) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', filter_size=1, padding=(2, 3))) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', padding=[1, 2, 2, 1])) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', data_format="NHWC")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', + data_format="NHWC", + padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + use_cudnn=False, + act="sigmoid", + padding="valid")) + + +def add_error_cases(suite): + suite.addTest( + Conv2DTransposeErrorTestCase( + methodName='runTest', use_cudnn="not_valid")) + suite.addTest( + Conv2DTransposeErrorTestCase( + methodName='runTest', num_channels=5, groups=2)) + suite.addTest( + Conv2DTransposeErrorTestCase( + methodName='runTest', output_size="not_valid")) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py new file mode 100644 index 0000000000..cf582c6210 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -0,0 +1,250 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class Conv3DTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=4, + spartial_shape=(8, 8, 8), + num_channels=6, + num_filters=8, + filter_size=3, + padding=0, + stride=1, + dilation=1, + groups=1, + act=None, + no_bias=False, + use_cudnn=True, + data_format="NCDHW", + dtype="float32"): + super(Conv3DTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spartial_shape = spartial_shape + self.filter_size = filter_size + + self.padding = padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.act = act + self.no_bias = no_bias + self.use_cudnn = use_cudnn + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + self.channel_last = self.data_format == "NDHWC" + if self.channel_last: + input_shape = (self.batch_size, ) + self.spartial_shape + ( + self.num_channels, ) + else: + input_shape = (self.batch_size, self.num_channels + ) + self.spartial_shape + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] * 3 + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.num_filters, self.num_channels + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform( + -1, 1, size=(self.num_filters, )).astype(self.dtype) + else: + self.bias = None + + def fluid_layer(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1, -1, self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + weight_attr = I.NumpyArrayInitializer(self.weight) + if self.bias is None: + bias_attr = False + else: + bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv3d( + x_var, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + use_cudnn=self.use_cudnn, + act=self.act, + data_format=self.data_format) + feed_dict = {"input": self.input} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1, -1, self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.num_filters, ), dtype=self.dtype) + y_var = F.conv3d( + x_var, + w_var, + None if self.no_bias else b_var, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = dg.to_variable(self.input) + conv = nn.Conv3D( + self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format, + dtype=self.dtype) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + place = fluid.CPUPlace() + result1 = self.fluid_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class Conv3DErrorTestCase(Conv3DTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv3DTestCase(methodName='runTest')) + suite.addTest( + Conv3DTestCase( + methodName='runTest', stride=[1, 2, 1], dilation=2)) + suite.addTest( + Conv3DTestCase( + methodName='runTest', stride=2, dilation=(2, 1, 2))) + suite.addTest( + Conv3DTestCase( + methodName='runTest', padding="same", no_bias=True)) + suite.addTest( + Conv3DTestCase( + methodName='runTest', filter_size=(3, 2, 3), padding='valid')) + suite.addTest(Conv3DTestCase(methodName='runTest', padding=(2, 3, 1))) + suite.addTest( + Conv3DTestCase( + methodName='runTest', padding=[1, 2, 2, 1, 2, 3])) + suite.addTest( + Conv3DTestCase( + methodName='runTest', + padding=[[0, 0], [0, 0], [1, 2], [2, 1], [2, 2]])) + suite.addTest(Conv3DTestCase(methodName='runTest', data_format="NDHWC")) + suite.addTest( + Conv3DTestCase( + methodName='runTest', + data_format="NDHWC", + padding=[[0, 0], [1, 1], [3, 3], [2, 2], [0, 0]])) + suite.addTest( + Conv3DTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + Conv3DTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + use_cudnn=False, + act="sigmoid", + padding="valid")) + + +def add_error_cases(suite): + suite.addTest( + Conv3DErrorTestCase( + methodName='runTest', use_cudnn="not_valid")) + suite.addTest( + Conv3DErrorTestCase( + methodName='runTest', num_channels=5, groups=2)) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py new file mode 100644 index 0000000000..acaf33467d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -0,0 +1,268 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class Conv3DTransposeTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=2, + spartial_shape=(8, 8, 8), + num_channels=6, + num_filters=8, + filter_size=3, + output_size=None, + padding=0, + stride=1, + dilation=1, + groups=1, + act=None, + no_bias=False, + use_cudnn=True, + data_format="NCDHW", + dtype="float32"): + super(Conv3DTransposeTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spartial_shape = spartial_shape + self.filter_size = filter_size + self.output_size = output_size + + self.padding = padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.act = act + self.no_bias = no_bias + self.use_cudnn = use_cudnn + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + self.channel_last = self.data_format == "NDHWC" + if self.channel_last: + input_shape = (self.batch_size, ) + self.spartial_shape + ( + self.num_channels, ) + else: + input_shape = (self.batch_size, self.num_channels + ) + self.spartial_shape + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] * 3 + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.num_channels, self.num_filters + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if self.no_bias: + self.bias = None + else: + self.bias = np.random.uniform( + -1, 1, size=(self.num_filters, )).astype(self.dtype) + + def fluid_layer(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1, -1, self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + weight_attr = I.NumpyArrayInitializer(self.weight) + if self.bias is None: + bias_attr = False + else: + bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv3d_transpose( + x_var, + self.num_filters, + filter_size=self.filter_size, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + use_cudnn=self.use_cudnn, + act=self.act, + data_format=self.data_format) + feed_dict = {"input": self.input} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, -1, -1, -1, self.num_channels) \ + if self.channel_last else (-1, self.num_channels, -1, -1, -1) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.num_filters, ), dtype=self.dtype) + y_var = F.conv3d_transpose( + x_var, + w_var, + None if self.no_bias else b_var, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = dg.to_variable(self.input) + conv = nn.Conv3DTranspose( + self.num_channels, + self.num_filters, + self.filter_size, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + act=self.act, + use_cudnn=self.use_cudnn, + data_format=self.data_format, + dtype=self.dtype) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + place = fluid.CPUPlace() + result1 = self.fluid_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class Conv3DTransposeErrorTestCase(Conv3DTransposeTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv3DTransposeTestCase(methodName='runTest', act="tanh")) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', stride=[1, 2, 1], dilation=2, no_bias=True)) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', + output_size=[12, 19, 12], + stride=[1, 2, 1], + dilation=2)) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', stride=2, dilation=(2, 1, 2))) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', padding="valid")) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', padding='valid')) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', filter_size=1, padding=(2, 3, 1))) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', padding=[1, 2, 2, 3, 2, 1])) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', + padding=[[0, 0], [0, 0], [2, 3], [1, 2], [2, 1]])) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', data_format="NDHWC")) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', + data_format="NDHWC", + padding=[[0, 0], [1, 1], [2, 2], [3, 3], [0, 0]])) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + Conv3DTransposeTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + use_cudnn=False, + act="sigmoid", + padding="valid")) + + +def add_error_cases(suite): + suite.addTest( + Conv3DTransposeErrorTestCase( + methodName='runTest', use_cudnn="not_valid")) + suite.addTest( + Conv3DTransposeErrorTestCase( + methodName='runTest', num_channels=5, groups=2)) + suite.addTest( + Conv3DTransposeErrorTestCase( + methodName='runTest', output_size="not_valid")) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 3fd7da1ec1..1155484fdc 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -14,7 +14,7 @@ # TODO: import all neural network related api under this directory, # including layers, linear, conv, rnn etc. -__all__ = [] +# __all__ = [] # TODO: define alias in nn directory # from .clip import ErrorClipByValue #DEFINE_ALIAS @@ -58,6 +58,8 @@ __all__ = [] # from .layer.loss import MSELoss #DEFINE_ALIAS from .layer.loss import L1Loss #DEFINE_ALIAS from .layer import loss #DEFINE_ALIAS +from .layer import conv #DEFINE_ALIAS +from .layer.conv import Conv2D, Conv2DTranspose, Conv3D, Conv3DTranspose #DEFINE_ALIAS # from .layer.loss import NLLLoss #DEFINE_ALIAS # from .layer.loss import BCELoss #DEFINE_ALIAS # from .layer.learning_rate import CosineDecay #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 9e517726eb..347ccc0e37 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -17,6 +17,7 @@ # __all__ = [ ] # TODO: define alias in functional directory +from . import conv from .conv import conv2d #DEFINE_ALIAS from .conv import conv2d_transpose #DEFINE_ALIAS from .conv import conv3d #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 54e5c8e980..8e6eeec089 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define activation functions of neural network +# TODO: define activation functions of neural network from . import loss -__all__ = [loss] +from . import conv + +from .loss import * +from .conv import * diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 7577e3ee3c..330841f094 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -12,10 +12,823 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define classes of convolutional neural network -# __all__ = ['Conv2D', -# 'Conv2DTranspose', -# 'Conv3D', -# 'Conv3DTranspose', -# 'TreeConv', -# 'Conv1D'] +# TODO: define classes of convolutional neural network +__all__ = [ + 'Conv2D', + 'Conv2DTranspose', + 'Conv3D', + 'Conv3DTranspose', + # 'TreeConv', + # 'Conv1D' +] + +import numpy as np + +from ...fluid.dygraph import layers +from ...fluid.initializer import Normal +from .. import functional as F +from ...fluid.layers import utils +from ..functional.conv import _update_padding_nd + + +def _get_default_param_initializer(num_channels, filter_size): + filter_elem_num = num_channels * np.prod(filter_size) + std = (2.0 / filter_elem_num)**0.5 + return Normal(0.0, std, 0) + + +class Conv2D(layers.Layer): + """ + This interface is used to construct a callable object of the ``Conv2D`` class. + For more details, refer to code examples. + The convolution2D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input and + Output are in NCHW format, where N is batch size, C is the number of + the feature map, H is the height of the feature map, and W is the width of the feature map. + Filter's shape is [MCHW] , where M is the number of output feature map, + C is the number of input feature map, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input feature map divided by the groups. + Please refer to UFLDL's `convolution + `_ + for more details. + If bias attribution and activation type are provided, bias is added to the + output of the convolution, and the corresponding activation function is + applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \\sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a ``Tensor`` with NCHW format. + * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 + + Parameters: + num_channels(int): The number of channels in the input image. + num_filters(int): The number of filter. It is as same as the output + feature map. + filter_size (int or tuple): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`on both sides + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + stride (int or tuple, optional): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: 1. + dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: 1. + groups (int, optional): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: 1. + param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + of conv2d. If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with :math:`Normal(0.0, std)`, + and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. + bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True. + act (str, optional): Activation type, if it is set to None, activation is not appended. + Default: None. + data_format (str, optional): Data format that specifies the layout of input. + It can be "NCHW" or "NHWC". Default: "NCHW". + dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". + + Attribute: + **weight** (Parameter): the learnable weights of filter of this layer. + + **bias** (Parameter or None): the learnable bias of this layer. + + Returns: + None + + Raises: + ValueError: if ``use_cudnn`` is not a bool value. + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + import paddle.fluid.dygraph as dg + from paddle import nn + + x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') + place = fluid.CPUPlace() + with dg.guard(place): + x_var = dg.to_variable(x) + conv = nn.Conv2D(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 6, 6) + """ + + def __init__(self, + num_channels, + num_filters, + filter_size, + padding=0, + stride=1, + dilation=1, + groups=1, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + data_format="NCHW", + dtype='float32'): + super(Conv2D, self).__init__() + assert param_attr is not False, "param_attr should not be False here." + self._num_channels = num_channels + self._num_filters = num_filters + self._groups = groups + if num_channels % groups != 0: + raise ValueError("num_channels must be divisible by groups.") + self._act = act + self._data_format = data_format + self._dtype = dtype + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + self._use_cudnn = use_cudnn + + self._filter_size = utils.convert_to_list(filter_size, 2, 'filter_size') + self._stride = utils.convert_to_list(stride, 2, 'stride') + self._dilation = utils.convert_to_list(dilation, 2, 'dilation') + channel_last = (data_format == "NHWC") + self._padding = padding # leave it to F.conv2d + + self._param_attr = param_attr + self._bias_attr = bias_attr + + num_filter_channels = num_channels // groups + filter_shape = [self._num_filters, num_filter_channels + ] + self._filter_size + + self.weight = self.create_parameter( + attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype, + default_initializer=_get_default_param_initializer( + self._num_channels, filter_shape)) + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + out = F.conv2d( + input, + self.weight, + bias=self.bias, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + use_cudnn=self._use_cudnn, + act=self._act, + data_format=self._data_format) + return out + + +class Conv2DTranspose(layers.Layer): + """ + This interface is used to construct a callable object of the ``Conv2DTranspose`` class. + For more details, refer to code examples. + The convolution2D transpose layer calculates the output based on the input, + filter, and dilations, strides, paddings. Input and output + are in NCHW format. Where N is batch size, C is the number of feature map, + H is the height of the feature map, and W is the width of the feature map. + Filter's shape is [MCHW] , where M is the number of input feature map, + C is the number of output feature map, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input feature map divided by the groups. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + The details of convolution transpose layer, please refer to the following explanation and references + `conv2dtranspose `_ . + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a ``Tensor`` with NCHW format. + * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\ + H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\ + W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) + + Parameters: + num_channels(int): The number of channels in the input image. + num_filters(int): The number of the filter. It is as same as the output + feature map. + filter_size(int or tuple): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + output_size(int or tuple, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). None if use + filter_size, padding, and stride to calculate output_size. + if output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + stride(int or tuple, optional): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: 1. + dilation(int or tuple, optional): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: 1. + groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: 1. + param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d_transpose. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d_transpose + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True. + act (str, optional): Activation type, if it is set to None, activation is not appended. + Default: None. + data_format (str, optional): Data format that specifies the layout of input. + It can be "NCHW" or "NHWC". Default: "NCHW". + dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". + + Attribute: + **weight** (Parameter): the learnable weights of filters of this layer. + + **bias** (Parameter or None): the learnable bias of this layer. + + Returns: + None + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + import paddle.fluid.dygraph as dg + from paddle import nn + + x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') + place = fluid.CPUPlace() + with dg.guard(place): + x_var = dg.to_variable(x) + conv = nn.Conv2DTranspose(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 10, 10) + """ + + def __init__(self, + num_channels, + num_filters, + filter_size, + output_size=None, + padding=0, + stride=1, + dilation=1, + groups=1, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + data_format="NCHW", + dtype='float32'): + super(Conv2DTranspose, self).__init__() + assert param_attr is not False, "param_attr should not be False in conv2d_transpose." + self._param_attr = param_attr + self._bias_attr = bias_attr + self._act = act + self._groups = groups + self._num_channels = num_channels + self._num_filters = num_filters + self._use_cudnn = use_cudnn + self._data_format = data_format + self._dtype = dtype + + self._stride = utils.convert_to_list(stride, 2, 'stride') + self._dilation = utils.convert_to_list(dilation, 2, 'dilation') + self._filter_size = utils.convert_to_list(filter_size, 2, 'filter_size') + if output_size is None: + self._output_size = output_size + elif isinstance(output_size, (list, tuple, int)): + self._output_size = utils.convert_to_list(output_size, 2, + 'output_size') + else: + raise ValueError( + "output_size should be int, ot list[int] or tuple[int]") + self._padding = padding + + filter_shape = [self._num_channels, num_filters // groups + ] + self._filter_size + self.weight = self.create_parameter( + dtype=self._dtype, shape=filter_shape, attr=self._param_attr) + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + out = F.conv2d_transpose( + input, + self.weight, + bias=self.bias, + output_size=self._output_size, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + use_cudnn=self._use_cudnn, + act=self._act, + data_format=self._data_format) + return out + + +class Conv3D(layers.Layer): + """ + **Convlution3D Layer** + + The convolution3D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input(Input) and + Output(Output) are multidimensional tensors with a shape of + :math:`[N, C, D, H, W]` . Where N is batch size, C is the number of + channels, D is the depth of the feature, H is the height of the feature, + and W is the width of the feature. Convlution3D is similar with Convlution2D + but adds one dimension(depth). If bias attribution and activation type are + provided, bias is added to the output of the convolution, and the + corresponding activation function is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCDHW or NDHWC format. + * :math:`W`: Filter value, a tensor with MCDHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)` + + - Output: + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ + H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 + + Parameters: + num_channels(int): The number of channels in the input image. + num_filters(int): The number of filter. It is as same as the output image channel. + filter_size (int|tuple, optional): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). + Otherwise, the filter will be a square, filter_size_depth = filter_size_height + = filter_size_width = filter_size. + stride (int|tuple, optional): The stride size. If stride is a tuple, it must + contain three integers, (stride_D, stride_H, stride_W). Otherwise, the + stride_D = stride_H = stride_W = stride. The default value is 1. + padding (int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + dilation (int|tuple, optional): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. The default value is 1. + groups (int, optional): The groups number of the Conv3d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. The default value is 1. + param_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + of conv3d. If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as param_attr. If it is set to None, the parameter + is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is + :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. + bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. The default value is None. + use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. The default value is True. + act (str, optional): Activation type, if it is set to None, activation is not appended. + The default value is None. + data_format (str, optional): Data format that specifies the layout of input. + It can be "NCDHW" or "NDHWC". Default: "NCDHW". + dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". + + Attribute: + **weight** (Parameter): the learnable weights of filters of this layer. + + **bias** (Parameter): the learnable bias of this layer. + + Returns: + None. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + import paddle.fluid.dygraph as dg + from paddle import nn + + x = np.random.uniform(-1, 1, (2, 4, 8, 8, 8)).astype('float32') + place = fluid.CPUPlace() + with dg.guard(place): + x_var = dg.to_variable(x) + conv = nn.Conv3D(4, 6, (3, 3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 6, 6, 6) + """ + + def __init__(self, + num_channels, + num_filters, + filter_size, + padding=0, + stride=1, + dilation=1, + groups=1, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + data_format="NCDHW", + dtype='float32'): + super(Conv3D, self).__init__() + assert param_attr is not False, "param_attr should not be False here." + self._num_channels = num_channels + self._num_filters = num_filters + self._groups = groups + self._act = act + self._use_cudnn = use_cudnn + self._dtype = dtype + self._data_format = data_format + + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._dilation = utils.convert_to_list(dilation, 3, 'dilation') + self._filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') + channel_last = (data_format == "NDHWC") + self._padding = padding + + self._param_attr = param_attr + self._bias_attr = bias_attr + + if num_channels % groups != 0: + raise ValueError("num_channels must be divisible by groups.") + num_filter_channels = num_channels // groups + + filter_shape = [num_filters, num_filter_channels] + self._filter_size + + self.weight = self.create_parameter( + attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype, + default_initializer=_get_default_param_initializer( + self._num_channels, self._filter_size)) + + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + out = F.conv3d( + input, + self.weight, + bias=self.bias, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + use_cudnn=self._use_cudnn, + act=self._act, + data_format=self._data_format) + return out + + +class Conv3DTranspose(layers.Layer): + """ + **Convlution3D transpose layer** + + The convolution3D transpose layer calculates the output based on the input, + filter, and dilations, strides, paddings. Input(Input) and output(Output) + are in NCDHW format. Where N is batch size, C is the number of channels, + D is the depth of the feature, H is the height of the feature, and W + is the width of the feature. Parameters(dilations, strides, paddings) are + two elements. These two elements represent height and width, respectively. + The details of convolution transpose layer, please refer to the following + explanation and references `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCDHW format. + * :math:`W`: Filter value, a tensor with MCDHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\ + H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\ + D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\ + H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\ + + **Note**: + + The conv3d_transpose can be seen as the backward of the conv3d. For conv3d, + when stride > 1, conv3d maps multiple input shape to the same output shape, + so for conv3d_transpose, when stride > 1, input shape maps multiple output shape. + If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \ + H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output + size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`, + the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` + and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must + between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, + conv3d_transpose can compute the kernel size automatically. + + + Parameters: + num_channels(int): The number of channels in the input image. + num_filters(int): The number of the filter. It is as same as the output + image channel. + filter_size(int|tuple): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + output_size(int or tuple, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). None if use + filter_size, padding, and stride to calculate output_size. + if output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + The default value is 1. + dilation(int|tuple, optional): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. The default value is 1. + groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + The default value is 1. + param_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. The default value is None. + bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv3d_transpose + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. The default value is None. + use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. The default value is True. + act (str, optional): Activation type, if it is set to None, activation is not appended. + The default value is None. + data_format (str, optional): Data format that specifies the layout of input. + It can be "NCDHW" or "NDHWC". Default: "NCDHW". + + Attribute: + **weight** (Parameter): the learnable weights of filters of this layer. + + **bias** (Parameter): the learnable bias of this layer. + + Returns: + None. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + import paddle.fluid.dygraph as dg + from paddle import nn + + x = np.random.uniform(-1, 1, (2, 4, 8, 8, 8)).astype('float32') + place = fluid.CPUPlace() + with dg.guard(place): + x_var = dg.to_variable(x) + conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 10, 10, 10) + """ + + def __init__(self, + num_channels, + num_filters, + filter_size, + output_size=None, + padding=0, + stride=1, + dilation=1, + groups=1, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + data_format="NCDHW", + dtype='float32'): + super(Conv3DTranspose, self).__init__() + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + assert param_attr is not False, "param_attr should not be False in conv3d_transpose." + self._num_channels = num_channels + self._num_filters = num_filters + self._groups = groups + self._use_cudnn = use_cudnn + self._act = act + self._dtype = dtype + self._data_format = data_format + + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._dilation = utils.convert_to_list(dilation, 3, 'dilation') + self._filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') + channel_last = (data_format == "NDHWC") + self._padding = padding + if output_size is None: + self._output_size = output_size + elif isinstance(output_size, (list, tuple, int)): + self._output_size = utils.convert_to_list(output_size, 3, + 'output_size') + else: + raise ValueError( + "output_size should be int, ot list[int] or tuple[int]") + + self._param_attr = param_attr + self._bias_attr = bias_attr + + filter_shape = [num_channels, num_filters // groups] + self._filter_size + self.weight = self.create_parameter( + dtype=self._dtype, shape=filter_shape, attr=self._param_attr) + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + out = F.conv3d_transpose( + input, + self.weight, + bias=self.bias, + output_size=self._output_size, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + use_cudnn=self._use_cudnn, + act=self._act, + data_format=self._data_format) + return out -- GitLab