diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index b44aa4ce4f893720ef55a7daf1d7b1e757c7480c..25e887ba6675e6c28bcd44c3b57c2ea571c075e3 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -37,6 +37,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { auto filter_dims = ctx->GetInputDim("Filter"); std::vector output_size = ctx->Attrs().Get>("output_size"); + std::vector output_padding = + ctx->Attrs().Get>("output_padding"); std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); std::vector dilations = ctx->Attrs().Get>("dilations"); @@ -78,6 +80,12 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { platform::errors::InvalidArgument( "The Attr(output_size) and Attr(stride) of Op(conv_transpose) " "should be the same.")); + if (output_padding.size()) + PADDLE_ENFORCE_EQ( + output_padding.size(), strides.size(), + platform::errors::InvalidArgument( + "The Attr(output_padding) and Attr(stride) of Op(conv_transpose) " + "should be the same.")); const int64_t C = (data_layout != DataLayout::kNHWC ? in_dims[1] @@ -136,6 +144,27 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { infer_shape + strides[i])); } output_shape.push_back(output_size[i]); + } else if (output_padding.size()) { + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_GE( + output_padding[i], 0, + platform::errors::InvalidArgument( + "output_padding of Op(ConvTransposeOp) should not be " + "less than the 0. But received output_padding = " + "[%s], whose dim %d is less than 0", + framework::make_ddim(output_padding), i)); + PADDLE_ENFORCE_LT( + output_padding[i], std::max(strides[i], dilations[i]), + platform::errors::InvalidArgument( + "output_padding of Op(ConvTransposeOp) should be less " + "than either stride or dilation. But received output_size = " + "[%s], " + "whose dim %d is not less than either stride (%d) or " + "dilation (%d)", + framework::make_ddim(output_size), i, strides[i], + dilations[i])); + } + output_shape.push_back((infer_shape + output_padding[i])); } else { output_shape.push_back(infer_shape); } @@ -223,10 +252,14 @@ void Conv2DTransposeOpMaker::Make() { "The format of output tensor is X (one-dimensional) of size equal" "to the number of output channels. Only used with MKL-DNN.") .AsDispensable(); - AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is the same as input tensor."); + AddAttr>("output_padding", + "(vector default: []), Additional size added " + "to one side of each dimension in the output " + "shape") + .SetDefault({}); AddAttr>("output_size", "(vector default: []), the " "size of the output tensor") @@ -338,6 +371,11 @@ void Conv3DTransposeOpMaker::Make() { "Where N is batch size, C is " "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); + AddAttr>("output_padding", + "(vector default: []), Additional size added " + "to one side of each dimension in the output " + "shape") + .SetDefault({}); AddAttr>("output_size", "(vector default: []), the " "size of the output tensor") diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py index 989836d5993af5620a7b5fbd86c07b028e419fc4..ba450b345b8a309f5d7ff1e7a5c149809f55f46c 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -29,13 +29,12 @@ class Conv2DTransposeTestCase(unittest.TestCase): num_filters=8, filter_size=3, output_size=None, + output_padding=0, padding=0, stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCHW", dtype="float32"): super(Conv2DTransposeTestCase, self).__init__(methodName) @@ -45,14 +44,13 @@ class Conv2DTransposeTestCase(unittest.TestCase): self.spartial_shape = spartial_shape self.filter_size = filter_size self.output_size = output_size + self.output_padding = output_padding self.padding = padding self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -93,6 +91,7 @@ class Conv2DTransposeTestCase(unittest.TestCase): bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv2d_transpose( x_var, self.num_filters, @@ -104,8 +103,6 @@ class Conv2DTransposeTestCase(unittest.TestCase): groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -125,17 +122,22 @@ class Conv2DTransposeTestCase(unittest.TestCase): "weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data( "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv2d_transpose( + + if self.output_padding != 0: + output_size = None + else: + output_size = self.output_size + + y_var = F.conv_transpose2d( x_var, w_var, None if self.no_bias else b_var, - output_size=self.output_size, + output_size=output_size, padding=self.padding, + output_padding=self.output_padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -147,32 +149,38 @@ class Conv2DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv2DTranspose( + + if self.output_padding != 0: + output_size = None + else: + output_size = self.output_size + + conv = nn.ConvTranspose2d( self.num_channels, self.num_filters, self.filter_size, - output_size=self.output_size, padding=self.padding, + output_padding=self.output_padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) - y_var = conv(x_var) + y_var = conv(x_var, output_size) y_np = y_var.numpy() return y_np def _test_equivalence(self, place): place = fluid.CPUPlace() + result1 = self.fluid_layer(place) result2 = self.functional(place) + with dg.guard(place): result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) np.testing.assert_array_almost_equal(result2, result3) @@ -194,7 +202,7 @@ class Conv2DTransposeErrorTestCase(Conv2DTransposeTestCase): def add_cases(suite): - suite.addTest(Conv2DTransposeTestCase(methodName='runTest', act="relu")) + suite.addTest(Conv2DTransposeTestCase(methodName='runTest')) suite.addTest( Conv2DTransposeTestCase( methodName='runTest', stride=[1, 2], no_bias=True, dilation=2)) @@ -211,9 +219,6 @@ def add_cases(suite): suite.addTest( Conv2DTransposeTestCase( methodName='runTest', padding="valid")) - suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', padding='valid')) suite.addTest( Conv2DTransposeTestCase( methodName='runTest', filter_size=1, padding=(2, 3))) @@ -240,15 +245,22 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + spartial_shape=(7, 7), + filter_size=[5, 5], + groups=1, + padding=2, + stride=2, + output_size=[14, 14], + output_padding=[1, 1], )) def add_error_cases(suite): - suite.addTest( - Conv2DTransposeErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv2DTransposeErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index f4418150e8a69d795ff544073b6ba6dd7431e44b..913db51da500b6c324abfab61744dfc1947bf7a5 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -77,8 +77,13 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): output_size = attrs['output_size'] out_h = output_size[0] + pad_h_0 + pad_h_1 out_w = output_size[1] + pad_w_0 + pad_w_1 - - out = np.zeros((in_n, out_c, out_h, out_w), dtype=input_.dtype) + out_pad_h = 0 + out_pad_w = 0 + if 'output_padding' in attrs: + out_pad_h = attrs['output_padding'][0] + out_pad_w = attrs['output_padding'][1] + out = np.zeros( + (in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), dtype=input_.dtype) for n in range(in_n): for i in range(in_h): @@ -99,7 +104,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): out[n, g * f_out_c + k, i1:i2:dilations[0], j1:j2: dilations[1]] += tmp_out - out = out[:, :, pad_h_0:out_h - pad_h_1, pad_w_0:out_w - pad_w_1] + out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, pad_w_0:out_w - pad_w_1 + + out_pad_w] if attrs['data_format'] == 'NHWC': out = np.transpose(out, [0, 2, 3, 1]) return out @@ -114,6 +120,7 @@ class TestConv2dTransposeOp(OpTest): self.use_cudnn = False self.use_mkldnn = False self.output_size = None + self.output_padding = [] self.data_format = "NCHW" self.pad = [0, 0] self.padding_algorithm = "EXPLICIT" @@ -138,6 +145,9 @@ class TestConv2dTransposeOp(OpTest): if self.output_size is not None: self.attrs['output_size'] = self.output_size + if len(self.output_padding) > 0: + self.attrs['output_padding'] = self.output_padding + output = conv2dtranspose_forward_naive(input_, filter_, self.attrs).astype(self.dtype) @@ -290,6 +300,18 @@ class TestWithEvenUpsample(TestConv2dTransposeOp): self.filter_size = [f_c, 6, 5, 5] +class TestWithEvenUpsampleOutputPadding(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_padding = [1, 1] + self.input_size = [2, 3, 7, 7] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 5, 5] + + class Test_NHWC(TestConv2dTransposeOp): def init_test_case(self): self.pad = [0, 0] @@ -375,6 +397,19 @@ class TestWithEvenUpsample_NHWC(TestConv2dTransposeOp): self.data_format = 'NHWC' +class TestWithEvenUpsample_NHWC_output_padding(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_padding = [1, 1] + self.input_size = [2, 7, 7, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 5, 5] + self.data_format = 'NHWC' + + # ------------ test_cudnn ------------ @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py index acaf33467dbfc1c580ab3a36f08d0c2a26d7c239..e30f0cd3ecd0b872efa53c85e0666e4a6fb00a88 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -33,9 +33,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCDHW", dtype="float32"): super(Conv3DTransposeTestCase, self).__init__(methodName) @@ -50,9 +48,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -104,8 +100,6 @@ class Conv3DTransposeTestCase(unittest.TestCase): groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -125,7 +119,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): "weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data( "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv3d_transpose( + y_var = F.conv_transpose3d( x_var, w_var, None if self.no_bias else b_var, @@ -134,8 +128,6 @@ class Conv3DTransposeTestCase(unittest.TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -147,23 +139,19 @@ class Conv3DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv3DTranspose( + conv = nn.ConvTranspose3d( self.num_channels, self.num_filters, self.filter_size, - output_size=self.output_size, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) - y_var = conv(x_var) + y_var = conv(x_var, self.output_size) y_np = y_var.numpy() return y_np @@ -194,7 +182,7 @@ class Conv3DTransposeErrorTestCase(Conv3DTransposeTestCase): def add_cases(suite): - suite.addTest(Conv3DTransposeTestCase(methodName='runTest', act="tanh")) + suite.addTest(Conv3DTransposeTestCase(methodName='runTest')) suite.addTest( Conv3DTransposeTestCase( methodName='runTest', stride=[1, 2, 1], dilation=2, no_bias=True)) @@ -240,15 +228,10 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) def add_error_cases(suite): - suite.addTest( - Conv3DTransposeErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv3DTransposeErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py index 21986f1b98d869289ddb34a65316aca57c83f9d9..1fb07bf4345909deb5485a89232270336658ae8b 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py @@ -37,8 +37,6 @@ class TestFunctionalConv2D(TestCase): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def prepare(self): @@ -90,8 +88,6 @@ class TestFunctionalConv2D(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) exe.run(start) @@ -115,7 +111,7 @@ class TestFunctionalConv2D(TestCase): "weight", self.weight.shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, None if self.no_bias else bias, @@ -124,9 +120,7 @@ class TestFunctionalConv2D(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -140,7 +134,7 @@ class TestFunctionalConv2D(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, bias, @@ -148,10 +142,8 @@ class TestFunctionalConv2D(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) out = y.numpy() return out @@ -189,8 +181,6 @@ class TestFunctionalConv2DError(TestCase): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def test_exception(self): @@ -225,7 +215,7 @@ class TestFunctionalConv2DError(TestCase): "weight", self.weight_shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, None if self.no_bias else bias, @@ -234,9 +224,7 @@ class TestFunctionalConv2DError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) class TestFunctionalConv2DCase2(TestFunctionalConv2D): @@ -249,8 +237,6 @@ class TestFunctionalConv2DCase2(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -264,8 +250,6 @@ class TestFunctionalConv2DCase3(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = True - self.act = None - self.use_cudnn = True self.data_format = "NCHW" @@ -279,8 +263,6 @@ class TestFunctionalConv2DCase4(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -294,8 +276,6 @@ class TestFunctionalConv2DCase5(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -309,8 +289,6 @@ class TestFunctionalConv2DCase6(TestFunctionalConv2D): self.dilation = (2, 1) self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -324,8 +302,6 @@ class TestFunctionalConv2DCase7(TestFunctionalConv2D): self.dilation = 1 self.groups = 4 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NHWC" @@ -340,8 +316,6 @@ class TestFunctionalConv2DCase8(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -355,8 +329,6 @@ class TestFunctionalConv2DCase9(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -370,8 +342,6 @@ class TestFunctionalConv2DCase10(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -385,8 +355,6 @@ class TestFunctionalConv2DCase11(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -400,8 +368,6 @@ class TestFunctionalConv2DCase12(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -415,8 +381,6 @@ class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -430,8 +394,6 @@ class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -445,8 +407,6 @@ class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -460,23 +420,6 @@ class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" self.data_format = "NCHW" @@ -491,8 +434,6 @@ class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -506,8 +447,6 @@ class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "not_valid" @@ -521,8 +460,6 @@ class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py index f8e7818315fa077df4d8ad0d6d3f76b47501b5e9..7441f7cb915e8b1fdd2155fff79e145fb6a00c0f 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py @@ -38,7 +38,6 @@ class TestFunctionalConv3DTranspose(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def prepare(self): @@ -90,7 +89,6 @@ class TestFunctionalConv3DTranspose(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) @@ -115,7 +113,7 @@ class TestFunctionalConv3DTranspose(TestCase): "weight", self.weight.shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, None if self.no_bias else bias, @@ -124,9 +122,9 @@ class TestFunctionalConv3DTranspose(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -140,7 +138,7 @@ class TestFunctionalConv3DTranspose(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, bias, @@ -148,10 +146,10 @@ class TestFunctionalConv3DTranspose(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) out = y.numpy() return out @@ -190,7 +188,6 @@ class TestFunctionalConv3DTransposeError(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def test_exception(self): @@ -225,7 +222,7 @@ class TestFunctionalConv3DTransposeError(TestCase): "weight", self.weight_shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, None if self.no_bias else bias, @@ -234,9 +231,9 @@ class TestFunctionalConv3DTransposeError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): @@ -250,7 +247,6 @@ class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -265,7 +261,6 @@ class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -280,7 +275,6 @@ class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = True self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -295,7 +289,6 @@ class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -310,7 +303,6 @@ class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): self.groups = 4 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NDHWC" @@ -326,7 +318,6 @@ class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -341,7 +332,6 @@ class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -356,7 +346,6 @@ class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -371,7 +360,6 @@ class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -386,7 +374,6 @@ class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -402,7 +389,6 @@ class TestFunctionalConv3DTransposeErrorCase2( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -418,7 +404,6 @@ class TestFunctionalConv3DTransposeErrorCase3( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -434,7 +419,6 @@ class TestFunctionalConv3DTransposeErrorCase4( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -450,23 +434,6 @@ class TestFunctionalConv3DTransposeErrorCase5( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeErrorCase6( - TestFunctionalConv3DTransposeError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" self.data_format = "NCDHW" @@ -483,7 +450,6 @@ class TestFunctionalConv3DTransposeErrorCase7( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -499,7 +465,6 @@ class TestFunctionalConv3DTransposeErrorCase8( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "not_valid" @@ -515,7 +480,6 @@ class TestFunctionalConv3DTransposeErrorCase9( self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 8c7c677366cc984b768a3d9b23b5616b5593ac38..6b449c259b4d4a1fd4c1ab93019b57961ffdf057 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -94,9 +94,9 @@ from .layer.common import Dropout3D #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS from .layer.conv import Conv2D #DEFINE_ALIAS -from .layer.conv import Conv2DTranspose #DEFINE_ALIAS +from .layer.conv import ConvTranspose2d #DEFINE_ALIAS from .layer.conv import Conv3D #DEFINE_ALIAS -from .layer.conv import Conv3DTranspose #DEFINE_ALIAS +from .layer.conv import ConvTranspose3d #DEFINE_ALIAS # from .layer.conv import TreeConv #DEFINE_ALIAS # from .layer.conv import Conv1D #DEFINE_ALIAS from .layer.extension import RowConv #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 9d790ae8883e2046f4caa173f37809699ae22d94..2a941c9aed8b382cff59ad5c50ccbd82f12e9c40 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -70,9 +70,9 @@ from .common import unfold #DEFINE_ALIAS from .common import assign #DEFINE_ALIAS from .common import interpolate #DEFINE_ALIAS from .conv import conv2d #DEFINE_ALIAS -from .conv import conv2d_transpose #DEFINE_ALIAS +from .conv import conv_transpose2d #DEFINE_ALIAS from .conv import conv3d #DEFINE_ALIAS -from .conv import conv3d_transpose #DEFINE_ALIAS +from .conv import conv_transpose3d #DEFINE_ALIAS from .extension import add_position_encoding #DEFINE_ALIAS # from .extension import autoincreased_step_counter #DEFINE_ALIAS from .extension import continuous_value_model #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 2a519718258856fe1f4462422a36dccae7066ad1..fc0656c89dba948c4b5e8f40cd634430e9ff72b6 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -13,9 +13,10 @@ # limitations under the License. from __future__ import print_function -__all__ = ['conv2d', 'conv2d_transpose', 'conv3d', 'conv3d_transpose'] +__all__ = ['conv2d', 'conv_transpose2d', 'conv3d', 'conv_transpose3d'] import numpy as np +from ...device import get_cudnn_version from ...fluid.framework import Variable, in_dygraph_mode from ...fluid import core, dygraph_utils from ...fluid.layers import nn, utils @@ -323,21 +324,18 @@ def conv2d(input, return out -def conv2d_transpose(input, +def conv_transpose2d(x, weight, bias=None, - output_size=None, - padding=0, stride=1, - dilation=1, + padding=0, + output_padding=0, groups=1, - use_cudnn=True, - act=None, + dilation=1, data_format='NCHW', + output_size=None, name=None): """ - :alias_main: paddle.nn.functional.conv2d_transpose - :alias: paddle.nn.functional.conv2d_transpose,paddle.nn.functional.conv.conv2d_transpose The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) @@ -350,6 +348,7 @@ def conv2d_transpose(input, If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. + See more detail in :ref:`api_nn_conv_ConvTranspose2d` . For each input :math:`X`, the equation is: @@ -398,18 +397,15 @@ def conv2d_transpose(input, conv2d_transpose can compute the kernel size automatically. Args: - input(Variable): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format, + x(Tensor): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format, whose data type is float32 or float64. - weight(Variable): The convolution kernel, a Tensor with shape [C, M/g, kH, kW], + weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, kH, kW], where M is the number of output channels(filters), g is the number of groups, kH is the height of the kernel, and kW is the width of the kernel. - bias(Variable, optional): The bias, a Tensor with shape [M, ]. - output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_height, image_width). None if use - filter_size, padding, and stride to calculate output_size. - If output_size is specified, output_size and filter_size (weight)'s shape - should follow the formula above. Default: None. output_size and filter_size - should not be None at the same time. + bias(Tensor, optional): The bias, a Tensor with shape [M, ]. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain two integers, (stride_height, stride_width). + Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. @@ -421,10 +417,9 @@ def conv2d_transpose(input, when `data_format` is `'NHWC'`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). - Otherwise, stride_height = stride_width = stride. Default: stride = 1. - dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by @@ -433,10 +428,12 @@ def conv2d_transpose(input, first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups = 1. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. + output_size(int|tuple|list, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_height, image_width). None if use + filter_size, padding, and stride to calculate output_size. + If output_size is specified, output_size and filter_size (weight)'s shape + should follow the formula above. Default: None. output_size and filter_size + should not be None at the same time. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: @@ -446,20 +443,17 @@ def conv2d_transpose(input, None by default. Returns: - A Variable holding Tensor representing the conv2d_transpose, whose + A Tensor representing the conv_transpose2d, whose data type is the same with input and shape is (num_batches, channels, out_h, - out_w) or (num_batches, out_h, out_w, channels). If act is None, the tensor variable - storing the transposed convolution result, and if act is not None, the - tensor variable storing transposed convolution and non-linearity activation - result. + out_w) or (num_batches, out_h, out_w, channels). The tensor variable storing + transposed convolution result. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. - ValueError: If `output_size` and filter_size are None at the same time. + ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 4-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. ShapeError: If the dimension size of input minus the size of `stride` is not 2. @@ -469,28 +463,23 @@ def conv2d_transpose(input, Examples: .. code-block:: python - from paddle import fluid - import paddle.nn.functional as F - import paddle.fluid.dygraph as dg import numpy as np + import paddle + import paddle.nn.functional as F x = np.random.randn(2, 3, 8, 8).astype(np.float32) w = np.random.randn(3, 6, 3, 3).astype(np.float32) - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - w_var = dg.to_variable(w) - y_var = F.conv2d_transpose(x_var, w_var, act="relu") - y_np = y_var.numpy() + paddle.disable_static() + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv2d_transpose(x_var, w_var) + y_np = y_var.numpy() print(y_np.shape) # (2, 6, 10, 10) """ - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. " - "Received Attr(use_cudnn): {}.".format(use_cudnn)) if data_format not in ['NCHW', 'NHWC']: raise ValueError( "Attr(data_format) of conv2d_transpose got wrong value: " @@ -498,48 +487,65 @@ def conv2d_transpose(input, data_format)) channel_last = (data_format == "NHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] if num_channels < 0: raise ValueError("The channel dimmention of the input({}) " "should be defined. Received: {}.".format( - input.shape, num_channels)) + x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," "received: the channel of input is {}, the shape of input is {}" - ", the groups is {}".format(num_channels, input.shape, groups)) + ", the groups is {}".format(num_channels, x.shape, groups)) + + cudnn_version = get_cudnn_version() + + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) stride = utils.convert_to_list(stride, 2, 'stride') dilation = utils.convert_to_list(dilation, 2, 'dilation') + if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 2, 'output_size') else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 2, 'output_size') + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 2, + 'output_padding') op_type = 'conv2d_transpose' num_filters = weight.shape[1] - if (num_channels == groups and num_filters == 1 and not use_cudnn): + if (num_channels == groups and num_filters == 1): op_type = 'depthwise_conv2d_transpose' + use_cudnn = False if in_dygraph_mode(): - attrs = ('output_size', output_size, 'strides', stride, 'paddings', - padding, 'padding_algorithm', padding_algorithm, 'dilations', - dilation, 'groups', groups, 'use_cudnn', use_cudnn, - 'data_format', data_format) - pre_bias = getattr(core.ops, op_type)(input, weight, *attrs) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'strides', stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, 'data_format', data_format) + pre_bias = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'strides': stride, 'paddings': padding, @@ -549,20 +555,19 @@ def conv2d_transpose(input, 'use_cudnn': use_cudnn, 'data_format': data_format } - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'conv2d_transpose') helper = LayerHelper(op_type, **locals()) - dtype = helper.input_dtype() - pre_bias = helper.create_variable_for_type_inference(dtype) + pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias + return out @@ -782,23 +787,19 @@ def conv3d(input, return out -def conv3d_transpose(input, +def conv_transpose3d(x, weight, bias=None, - output_size=None, - padding=0, stride=1, - dilation=1, + padding=0, + output_padding=0, groups=1, - use_cudnn=True, - act=None, + dilation=1, data_format='NCDHW', + output_size=None, name=None): """ - :alias_main: paddle.nn.functional.conv3d_transpose - :alias: paddle.nn.functional.conv3d_transpose,paddle.nn.functional.conv.conv3d_transpose - - The convolution3D transpose layer calculates the output based on the input, + The convolution3d transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels, D is the depth of the feature, H is the height of the feature, and W @@ -809,6 +810,7 @@ def conv3d_transpose(input, If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. + See more detail in :ref:`api_nn_conv_ConvTranspose3d` . For each input :math:`X`, the equation is: @@ -861,17 +863,16 @@ def conv3d_transpose(input, conv3d_transpose can compute the kernel size automatically. Args: - input(Variable): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type + x(Tensor): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type of input is float32 or float64. - weight (Variable): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW], + weight (Tensor): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW], where M is the number of filters(output channels), g is the number of groups, kD, kH, kW are the filter's depth, height and width respectively. - bias (Variable, optional): The bias, a Tensor of shape [M, ]. - output_size(int|tuple, optional): The output image size. If output size is a - tuple, it must contain three integers, (image_depth, image_height, image_width). This - parameter only works when filter_size is None. If output_size and filter_size are - specified at the same time, They should follow the formula above. Default: None. - Output_size and filter_size should not be None at the same time. + bias (Tensor, optional): The bias, a Tensor of shape [M, ]. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` @@ -882,11 +883,9 @@ def conv3d_transpose(input, when `data_format` is `'NDHWC'`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, - stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. - Default: stride = 1. - dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. @@ -896,32 +895,32 @@ def conv3d_transpose(input, first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups=1 - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. + output_size(int|list|tuple, optional): The output image size. If output size is a + tuple, it must contain three integers, (image_depth, image_height, image_width). This + parameter only works when filter_size is None. If output_size and filter_size are + specified at the same time, They should follow the formula above. Default: None. + Output_size and filter_size should not be None at the same time. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: - A Variable holding Tensor representing the conv3d_transpose, whose data + A Tensor representing the conv_transpose3d, whose data type is the same with input and shape is (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor variable storing the transposed convolution result, and if act is not None, the tensor variable storing transposed convolution and non-linearity activation result. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. - ValueError: If `output_size` and filter_size are None at the same time. + ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 5-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. ShapeError: If the dimension size of input minus the size of `stride` is not 2. @@ -930,29 +929,26 @@ def conv3d_transpose(input, Examples: .. code-block:: python + + import numpy as np - from paddle import fluid + import paddle import paddle.nn.functional as F - import paddle.fluid.dygraph as dg - import numpy as np x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32) w = np.random.randn(3, 6, 3, 3, 3).astype(np.float32) - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - w_var = dg.to_variable(w) - y_var = F.conv3d_transpose(x_var, w_var, act="relu") - y_np = y_var.numpy() + paddle.disable_static() + + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv_transpose3d(x_var, w_var) + y_np = y_var.numpy() print(y_np.shape) # (2, 6, 10, 10, 10) """ # entry checks - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. " - "Received Attr(use_cudnn): {}.".format(use_cudnn)) if data_format not in ["NCDHW", "NDHWC"]: raise ValueError( "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " @@ -960,12 +956,12 @@ def conv3d_transpose(input, channel_last = (data_format == "NDHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] num_filters = weight.shape[1] if num_channels < 0: raise ValueError( "The channel dimmention of the input({}) should be defined. " - "Received: {}.".format(input.shape, num_channels)) + "Received: {}.".format(x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "The number of input channels must be divisible by Attr(groups). " @@ -977,29 +973,45 @@ def conv3d_transpose(input, dilation = utils.convert_to_list(dilation, 3, 'dilation') if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 3, 'output_size') else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 3, 'output_size') + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 3, + 'output_padding') + + cudnn_version = get_cudnn_version() + + #TODO(LielinJiang): whether to use cudnn according to the version of cudnn + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False op_type = 'conv3d_transpose' data_format_ = "NHWC" if channel_last else "NCHW" if in_dygraph_mode(): - attrs = ('output_size', output_size, 'paddings', padding, - "padding_algorithm", padding_algorithm, 'strides', stride, - 'dilations', dilation, 'groups', groups, 'use_cudnn', - use_cudnn, "data_format", data_format_) - pre_bias = getattr(core.ops, op_type)(input, weight, *attrs) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'paddings', padding, "padding_algorithm", padding_algorithm, + 'strides', stride, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, "data_format", data_format_) + pre_bias = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'paddings': padding, "padding_algorithm": padding_algorithm, @@ -1010,19 +1022,17 @@ def conv3d_transpose(input, "data_format": data_format_ } helper = LayerHelper(op_type, **locals()) - dtype = helper.input_dtype() - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], 'conv3d') + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'conv3d') - pre_bias = helper.create_variable_for_type_inference(dtype) + pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias return out diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index f86051d768dd71ff016b31c1c5fc3efe501a5a69..8442bac7a305ed76b8c999783c734b753b205984 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -58,9 +58,9 @@ from .common import Dropout3D #DEFINE_ALIAS from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS from .conv import Conv2D #DEFINE_ALIAS -from .conv import Conv2DTranspose #DEFINE_ALIAS +from .conv import ConvTranspose2d #DEFINE_ALIAS from .conv import Conv3D #DEFINE_ALIAS -from .conv import Conv3DTranspose #DEFINE_ALIAS +from .conv import ConvTranspose3d #DEFINE_ALIAS # from .conv import TreeConv #DEFINE_ALIAS # from .conv import Conv1D #DEFINE_ALIAS from .extension import RowConv #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 9fb6c9ebc2e404ab477630aae99a6b43d683b20b..2e0cfb96244d21ab84b0c6ac1a6a8dcebdfded53 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -16,9 +16,9 @@ __all__ = [ 'Conv2D', - 'Conv2DTranspose', + 'ConvTranspose2d', 'Conv3D', - 'Conv3DTranspose', + 'ConvTranspose3d', # 'TreeConv', # 'Conv1D' ] @@ -38,6 +38,51 @@ def _get_default_param_initializer(num_channels, filter_size): return Normal(0.0, std, 0) +class _ConvNd(layers.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + transposed, + dims, + stride=1, + padding=0, + padding_mode='zeros', + output_padding=0, + dilation=1, + groups=1, + weight_attr=None, + bias_attr=None, + data_format="NCHW"): + super(_ConvNd, self).__init__() + assert weight_attr is not False, "weight_attr should not be False in Conv." + self._param_attr = weight_attr + self._bias_attr = bias_attr + self._groups = groups + self._in_channels = in_channels + self._out_channels = out_channels + self._data_format = data_format + + self._stride = utils.convert_to_list(stride, dims, 'stride') + self._dilation = utils.convert_to_list(dilation, dims, 'dilation') + self._kernel_size = utils.convert_to_list(kernel_size, dims, + 'kernel_size') + self._padding = padding + self.output_padding = output_padding + + if transposed: + filter_shape = [self._in_channels, out_channels // groups + ] + self._kernel_size + else: + filter_shape = [out_channels, in_channels // groups + ] + self._kernel_size + + self.weight = self.create_parameter( + shape=filter_shape, attr=self._param_attr) + self.bias = self.create_parameter( + attr=self._bias_attr, shape=[self._out_channels], is_bias=True) + + class Conv2D(layers.Layer): """ :alias_main: paddle.nn.Conv2D @@ -236,12 +281,9 @@ class Conv2D(layers.Layer): return out -class Conv2DTranspose(layers.Layer): +class ConvTranspose2d(_ConvNd): """ - :alias_main: paddle.nn.Conv2DTranspose - :alias: paddle.nn.Conv2DTranspose,paddle.nn.layer.Conv2DTranspose,paddle.nn.layer.conv.Conv2DTranspose - - This interface is used to construct a callable object of the ``Conv2DTranspose`` class. + This interface is used to construct a callable object of the ``ConvTranspose2d`` class. For more details, refer to code examples. The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input and output @@ -294,17 +336,13 @@ class Conv2DTranspose(layers.Layer): W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of the filter. It is as same as the output - feature map. - filter_size(int or tuple): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - output_size(int or tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. + in_channels(int): The number of channels in the input image. + out_channels(int): The number of channels produced by the convolution. + kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple, + it must contain two integers, (kernel_size_H, kernel_size_W). + Otherwise, the kernel will be a square. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides @@ -312,10 +350,10 @@ class Conv2DTranspose(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - stride(int or tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: 1. - dilation(int or tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: 1. groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by @@ -324,113 +362,98 @@ class Conv2DTranspose(layers.Layer): first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d_transpose. + bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose. If it is set to False, no bias will be added to the output units. If it is set to None or one attribute of ParamAttr, conv2d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW". - dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". Attribute: **weight** (Parameter): the learnable weights of filters of this layer. **bias** (Parameter or None): the learnable bias of this layer. - Returns: - None + Shape: + - x: :math:`(N, C_{in}, H_{in}, W_{in})` + + - output: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ Examples: .. code-block:: python import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv2DTranspose(4, 6, (3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + + paddle.disable_static() + + x_var = paddle.to_tensor(x) + conv = nn.ConvTranspose2d(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 10, 10) """ def __init__(self, - num_channels, - num_filters, - filter_size, - output_size=None, - padding=0, + in_channels, + out_channels, + kernel_size, stride=1, + padding=0, + output_padding=0, dilation=1, groups=1, - param_attr=None, + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCHW", - dtype='float32'): - super(Conv2DTranspose, self).__init__() - assert param_attr is not False, "param_attr should not be False in conv2d_transpose." - self._param_attr = param_attr - self._bias_attr = bias_attr - self._act = act - self._groups = groups - self._num_channels = num_channels - self._num_filters = num_filters - self._use_cudnn = use_cudnn - self._data_format = data_format - self._dtype = dtype - - self._stride = utils.convert_to_list(stride, 2, 'stride') - self._dilation = utils.convert_to_list(dilation, 2, 'dilation') - self._filter_size = utils.convert_to_list(filter_size, 2, 'filter_size') + data_format="NCHW"): + super(ConvTranspose2d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 2, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x, output_size=None): if output_size is None: - self._output_size = output_size - elif isinstance(output_size, (list, tuple, int)): - self._output_size = utils.convert_to_list(output_size, 2, - 'output_size') + output_padding = self.output_padding else: - raise ValueError( - "output_size should be int, ot list[int] or tuple[int]") - self._padding = padding - - filter_shape = [self._num_channels, num_filters // groups - ] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + output_padding = 0 - def forward(self, input): - out = F.conv2d_transpose( - input, + out = F.conv_transpose2d( + x, self.weight, bias=self.bias, - output_size=self._output_size, padding=self._padding, + output_padding=output_padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, + output_size=output_size, data_format=self._data_format) return out @@ -628,10 +651,8 @@ class Conv3D(layers.Layer): return out -class Conv3DTranspose(layers.Layer): +class ConvTranspose3d(_ConvNd): """ - :alias_main: paddle.nn.Conv3DTranspose - :alias: paddle.nn.Conv3DTranspose,paddle.nn.layer.Conv3DTranspose,paddle.nn.layer.conv.Conv3DTranspose **Convlution3D transpose layer** @@ -664,52 +685,30 @@ class Conv3DTranspose(layers.Layer): Example: - - Input: - - Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - - Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)` - - - Output: - - Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\ - H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\ - W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\ - D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\ - H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\ - **Note**: - The conv3d_transpose can be seen as the backward of the conv3d. For conv3d, + The conv_transpose3d can be seen as the backward of the conv3d. For conv3d, when stride > 1, conv3d maps multiple input shape to the same output shape, - so for conv3d_transpose, when stride > 1, input shape maps multiple output shape. + so for conv_transpose3d, when stride > 1, input shape maps multiple output shape. If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \ H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, - conv3d_transpose can compute the kernel size automatically. + conv_transpose3d can compute the kernel size automatically. Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of the filter. It is as same as the output - image channel. - filter_size(int|tuple): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - output_size(int or tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. + in_channels(int): The number of channels in the input image. + out_channels(int): The number of channels produced by the convolution. + kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + it must contain three integers, (kernel_size_D, kernel_size_H, kernel_size_W). + Otherwise, the kernel will be a square. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` @@ -717,11 +716,9 @@ class Conv3DTranspose(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, - stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. - The default value is 1. - dilation(int|tuple, optional): The dilation size. If dilation is a tuple, it must + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by @@ -730,7 +727,7 @@ class Conv3DTranspose(layers.Layer): first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. The default value is 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. The default value is None. @@ -739,10 +736,11 @@ class Conv3DTranspose(layers.Layer): If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. The default value is None. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. The default value is True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - The default value is None. + output_size(int|list|tuple, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). None if use + filter_size, padding, and stride to calculate output_size. + if output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCDHW" or "NDHWC". Default: "NCDHW". @@ -751,8 +749,19 @@ class Conv3DTranspose(layers.Layer): **bias** (Parameter): the learnable bias of this layer. - Returns: - None. + Shape: + - x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + - output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\ + H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel_size[2] - 1) + 1 \\\\ + Raises: ValueError: If the shapes of input, filter_size, stride, padding and @@ -762,86 +771,64 @@ class Conv3DTranspose(layers.Layer): .. code-block:: python import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + + paddle.disable_static() + + x_var = paddle.to_tensor(x) + conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 10, 10, 10) """ def __init__(self, - num_channels, - num_filters, - filter_size, - output_size=None, - padding=0, + in_channels, + out_channels, + kernel_size, stride=1, + padding=0, + output_padding=0, dilation=1, groups=1, - param_attr=None, + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCDHW", - dtype='float32'): - super(Conv3DTranspose, self).__init__() - if not isinstance(use_cudnn, bool): - raise ValueError("use_cudnn should be True or False") - assert param_attr is not False, "param_attr should not be False in conv3d_transpose." - self._num_channels = num_channels - self._num_filters = num_filters - self._groups = groups - self._use_cudnn = use_cudnn - self._act = act - self._dtype = dtype - self._data_format = data_format - - self._stride = utils.convert_to_list(stride, 3, 'stride') - self._dilation = utils.convert_to_list(dilation, 3, 'dilation') - self._filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') - channel_last = (data_format == "NDHWC") - self._padding = padding + data_format="NCDHW"): + super(ConvTranspose3d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 3, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x, output_size): if output_size is None: - self._output_size = output_size - elif isinstance(output_size, (list, tuple, int)): - self._output_size = utils.convert_to_list(output_size, 3, - 'output_size') + output_padding = self.output_padding else: - raise ValueError( - "output_size should be int, ot list[int] or tuple[int]") - - self._param_attr = param_attr - self._bias_attr = bias_attr + output_padding = 0 - filter_shape = [num_channels, num_filters // groups] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) - - def forward(self, input): - out = F.conv3d_transpose( - input, + out = F.conv_transpose3d( + x, self.weight, bias=self.bias, - output_size=self._output_size, padding=self._padding, + output_padding=output_padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, + output_size=output_size, data_format=self._data_format) return out