From 253f618ac712cbae46b7e3a4e81bf4540e4527fb Mon Sep 17 00:00:00 2001
From: Dun <randonlang@gmail.com>
Date: Wed, 19 Sep 2018 11:10:41 +0800
Subject: [PATCH] loosen the restriction of output_size in conv2d_transpose
 (#12292)

* loosen the restriction of output_size in conv2d_transpose

* test and docs

* fix code style

* fix ci test error

* bug fix

* fix python3 issue
---
 paddle/fluid/operators/conv_transpose_op.cc   | 30 ++++++++++++++++---
 python/paddle/fluid/layers/nn.py              | 21 +++++++++----
 .../unittests/test_conv2d_transpose_op.py     | 28 +++++++++++++++++
 3 files changed, 70 insertions(+), 9 deletions(-)
diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc
index eeb98ee44f2..a916dd3496f 100644
--- a/paddle/fluid/operators/conv_transpose_op.cc
+++ b/paddle/fluid/operators/conv_transpose_op.cc
@@ -29,6 +29,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
 
   auto in_dims = ctx->GetInputDim("Input");
   auto filter_dims = ctx->GetInputDim("Filter");
+  std::vector<int> output_size =
+      ctx->Attrs().Get<std::vector<int>>("output_size");
   std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
   std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
   std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
@@ -42,6 +44,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
   PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U,
                  "ConvTransposeOp input dimension and strides dimension should "
                  "be consistent.");
+  if (output_size.size())
+    PADDLE_ENFORCE_EQ(output_size.size(), strides.size(),
+                      "ConvTransposeOp output_size dimension and strides "
+                      "dimension should be the same.");
   PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
                     "ConvTransposeOp paddings dimension and strides "
                     "dimension should be the same.");
@@ -55,8 +61,17 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
   std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
   for (size_t i = 0; i < strides.size(); ++i) {
     auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
-    output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] +
-                           filter_extent);
+    auto infer_shape =
+        (in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + filter_extent;
+    if (output_size.size()) {
+      PADDLE_ENFORCE((output_size[i] >= infer_shape &&
+                      output_size[i] < infer_shape + strides[i]),
+                     "ConvTransposeOp output_size should be "
+                     "in appropriate range.");
+      output_shape.push_back(output_size[i]);
+    } else {
+      output_shape.push_back(infer_shape);
+    }
   }
   ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
 }
@@ -103,6 +118,10 @@ void Conv2DTransposeOpMaker::Make() {
   AddOutput("Output",
             "(Tensor) The output tensor of convolution transpose operator. "
             "The format of output tensor is also NCHW.");
+  AddAttr<std::vector<int>>("output_size",
+                            "(vector<int> default: []), the "
+                            "size of the output tensor")
+      .SetDefault({});
   AddAttr<int>("groups",
                "(int default:1), the groups number of the convolution "
                "transpose operator. ")
@@ -192,7 +211,10 @@ void Conv3DTransposeOpMaker::Make() {
             "Where N is batch size, C is "
             "the number of channels, D is the depth of the feature, H is the "
             "height of the feature, and W is the width of the feature.");
-
+  AddAttr<std::vector<int>>("output_size",
+                            "(vector<int> default: []), the "
+                            "size of the output tensor")
+      .SetDefault({});
   AddAttr<std::vector<int>>(
       "dilations",
       "(vector<int> default:{1, 1, 1}), the "
@@ -247,7 +269,7 @@ Parameters(strides, paddings) are three elements. These three elements represent
 depth, height and width, respectively.
 The input(X) size and output(Out) size may be different.
 
-Example:   
+Example:
   Input:
        Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
        Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index a1a966be2c0..b0b3b0ad6f2 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2342,16 +2342,20 @@ def conv2d_transpose(input,
 
         .. math::
 
-           H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
-           W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
+           H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
+           W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
+           H_{out} \in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
+           W_{out} \in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
 
     Args:
         input(Variable): The input image with [N, C, H, W] format.
         num_filters(int): The number of the filter. It is as same as the output
             image channel.
         output_size(int|tuple|None): The output image size. If output size is a
-            tuple, it must contain two integers, (image_H, image_W). This
-            parameter only works when filter_size is None.
+            tuple, it must contain two integers, (image_H, image_W). None if use
+            filter_size, padding, and stride to calculate output_size.
+            if output_size and filter_size are specified at the same time, They
+            should follow the formula above.
         filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
             it must contain two integers, (filter_size_H, filter_size_W).
             Otherwise, the filter will be a square. None if use output size to
@@ -2429,7 +2433,13 @@ def conv2d_transpose(input,
     else:
         filter_size = utils.convert_to_list(filter_size, 2,
                                             'conv2d_transpose.filter_size')
-
+    if output_size is None:
+        output_size = []
+    elif isinstance(output_size, list) or isinstance(output_size, int):
+        output_size = utils.convert_to_list(output_size, 2, 'output_size')
+    else:
+        raise ValueError("output_size should be list or int")
+    padding = utils.convert_to_list(padding, 2, 'padding')
     groups = 1 if groups is None else groups
     filter_shape = [input_channel, num_filters // groups] + filter_size
     img_filter = helper.create_parameter(
@@ -2442,6 +2452,7 @@ def conv2d_transpose(input,
                 'Filter': [img_filter]},
         outputs={'Output': pre_bias},
         attrs={
+            'output_size': output_size,
             'strides': stride,
             'paddings': padding,
             'dilations': dilation,
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
index 2a320e735bd..5bb769b1689 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -35,6 +35,10 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
     d_bolck_w = dilations[1] * (f_w - 1) + 1
     out_h = (in_h - 1) * stride[0] + d_bolck_h
     out_w = (in_w - 1) * stride[1] + d_bolck_w
+    if 'output_size' in attrs:
+        output_size = attrs['output_size']
+        out_h = output_size[0] + 2 * pad[0]
+        out_w = output_size[1] + 2 * pad[1]
 
     out = np.zeros((in_n, out_c, out_h, out_w))
 
@@ -65,6 +69,7 @@ class TestConv2dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
         self.use_cudnn = False
+        self.output_size = None
         self.init_op_type()
         self.init_test_case()
 
@@ -80,6 +85,8 @@ class TestConv2dTransposeOp(OpTest):
             'use_cudnn': self.use_cudnn,
             'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
+        if self.output_size is not None:
+            self.attrs['output_size'] = self.output_size
 
         output = conv2dtranspose_forward_naive(input_, filter_,
                                                self.attrs).astype('float32')
@@ -192,6 +199,18 @@ class TestWithDilation(TestConv2dTransposeOp):
         self.filter_size = [f_c, 6, 3, 3]
 
 
+class TestWithEvenUpsample(TestConv2dTransposeOp):
+    def init_test_case(self):
+        self.pad = [2, 2]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.output_size = [14, 14]
+        self.input_size = [2, 3, 7, 7]  # NCHW
+        f_c = self.input_size[1]
+        self.filter_size = [f_c, 6, 5, 5]
+
+
 # ------------ test_cudnn ------------
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
@@ -265,6 +284,15 @@ class TestDepthwiseConvTranspose(TestConv2dTransposeOp):
         self.op_type = "depthwise_conv2d_transpose"
 
 
+# ------------ test_cudnn ------------
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithEvenUpsample(TestWithEvenUpsample):
+    def init_op_type(self):
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
+
+
 # Please Don't remove the following code.
 # Currently, CI use cudnn V5.0 which not support dilation conv.
 # class TestCUDNNWithDilation(TestWithDilation):
-- 
GitLab