提交 253f618a 编写于 作者: D Dun 提交者: whs

loosen the restriction of output_size in conv2d_transpose (#12292)

* loosen the restriction of output_size in conv2d_transpose

* test and docs

* fix code style

* fix ci test error

* bug fix

* fix python3 issue
上级 cec94cab
...@@ -29,6 +29,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -29,6 +29,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> output_size =
ctx->Attrs().Get<std::vector<int>>("output_size");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations"); std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
...@@ -42,6 +44,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -42,6 +44,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U, PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U,
"ConvTransposeOp input dimension and strides dimension should " "ConvTransposeOp input dimension and strides dimension should "
"be consistent."); "be consistent.");
if (output_size.size())
PADDLE_ENFORCE_EQ(output_size.size(), strides.size(),
"ConvTransposeOp output_size dimension and strides "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and strides " "ConvTransposeOp paddings dimension and strides "
"dimension should be the same."); "dimension should be the same.");
...@@ -55,8 +61,17 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -55,8 +61,17 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + auto infer_shape =
filter_extent); (in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + filter_extent;
if (output_size.size()) {
PADDLE_ENFORCE((output_size[i] >= infer_shape &&
output_size[i] < infer_shape + strides[i]),
"ConvTransposeOp output_size should be "
"in appropriate range.");
output_shape.push_back(output_size[i]);
} else {
output_shape.push_back(infer_shape);
}
} }
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
...@@ -103,6 +118,10 @@ void Conv2DTransposeOpMaker::Make() { ...@@ -103,6 +118,10 @@ void Conv2DTransposeOpMaker::Make() {
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator. " "(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("output_size",
"(vector<int> default: []), the "
"size of the output tensor")
.SetDefault({});
AddAttr<int>("groups", AddAttr<int>("groups",
"(int default:1), the groups number of the convolution " "(int default:1), the groups number of the convolution "
"transpose operator. ") "transpose operator. ")
...@@ -192,7 +211,10 @@ void Conv3DTransposeOpMaker::Make() { ...@@ -192,7 +211,10 @@ void Conv3DTransposeOpMaker::Make() {
"Where N is batch size, C is " "Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the " "the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."); "height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>("output_size",
"(vector<int> default: []), the "
"size of the output tensor")
.SetDefault({});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"dilations", "dilations",
"(vector<int> default:{1, 1, 1}), the " "(vector<int> default:{1, 1, 1}), the "
......
...@@ -2342,16 +2342,20 @@ def conv2d_transpose(input, ...@@ -2342,16 +2342,20 @@ def conv2d_transpose(input,
.. math:: .. math::
H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} \in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} \in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This tuple, it must contain two integers, (image_H, image_W). None if use
parameter only works when filter_size is None. filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They
should follow the formula above.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square. None if use output size to
...@@ -2429,7 +2433,13 @@ def conv2d_transpose(input, ...@@ -2429,7 +2433,13 @@ def conv2d_transpose(input,
else: else:
filter_size = utils.convert_to_list(filter_size, 2, filter_size = utils.convert_to_list(filter_size, 2,
'conv2d_transpose.filter_size') 'conv2d_transpose.filter_size')
if output_size is None:
output_size = []
elif isinstance(output_size, list) or isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 2, 'output_size')
else:
raise ValueError("output_size should be list or int")
padding = utils.convert_to_list(padding, 2, 'padding')
groups = 1 if groups is None else groups groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters // groups] + filter_size filter_shape = [input_channel, num_filters // groups] + filter_size
img_filter = helper.create_parameter( img_filter = helper.create_parameter(
...@@ -2442,6 +2452,7 @@ def conv2d_transpose(input, ...@@ -2442,6 +2452,7 @@ def conv2d_transpose(input,
'Filter': [img_filter]}, 'Filter': [img_filter]},
outputs={'Output': pre_bias}, outputs={'Output': pre_bias},
attrs={ attrs={
'output_size': output_size,
'strides': stride, 'strides': stride,
'paddings': padding, 'paddings': padding,
'dilations': dilation, 'dilations': dilation,
......
...@@ -35,6 +35,10 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): ...@@ -35,6 +35,10 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
d_bolck_w = dilations[1] * (f_w - 1) + 1 d_bolck_w = dilations[1] * (f_w - 1) + 1
out_h = (in_h - 1) * stride[0] + d_bolck_h out_h = (in_h - 1) * stride[0] + d_bolck_h
out_w = (in_w - 1) * stride[1] + d_bolck_w out_w = (in_w - 1) * stride[1] + d_bolck_w
if 'output_size' in attrs:
output_size = attrs['output_size']
out_h = output_size[0] + 2 * pad[0]
out_w = output_size[1] + 2 * pad[1]
out = np.zeros((in_n, out_c, out_h, out_w)) out = np.zeros((in_n, out_c, out_h, out_w))
...@@ -65,6 +69,7 @@ class TestConv2dTransposeOp(OpTest): ...@@ -65,6 +69,7 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self): def setUp(self):
# init as conv transpose # init as conv transpose
self.use_cudnn = False self.use_cudnn = False
self.output_size = None
self.init_op_type() self.init_op_type()
self.init_test_case() self.init_test_case()
...@@ -80,6 +85,8 @@ class TestConv2dTransposeOp(OpTest): ...@@ -80,6 +85,8 @@ class TestConv2dTransposeOp(OpTest):
'use_cudnn': self.use_cudnn, 'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
} }
if self.output_size is not None:
self.attrs['output_size'] = self.output_size
output = conv2dtranspose_forward_naive(input_, filter_, output = conv2dtranspose_forward_naive(input_, filter_,
self.attrs).astype('float32') self.attrs).astype('float32')
...@@ -192,6 +199,18 @@ class TestWithDilation(TestConv2dTransposeOp): ...@@ -192,6 +199,18 @@ class TestWithDilation(TestConv2dTransposeOp):
self.filter_size = [f_c, 6, 3, 3] self.filter_size = [f_c, 6, 3, 3]
class TestWithEvenUpsample(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [2, 2]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.output_size = [14, 14]
self.input_size = [2, 3, 7, 7] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 5, 5]
# ------------ test_cudnn ------------ # ------------ test_cudnn ------------
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
...@@ -265,6 +284,15 @@ class TestDepthwiseConvTranspose(TestConv2dTransposeOp): ...@@ -265,6 +284,15 @@ class TestDepthwiseConvTranspose(TestConv2dTransposeOp):
self.op_type = "depthwise_conv2d_transpose" self.op_type = "depthwise_conv2d_transpose"
# ------------ test_cudnn ------------
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithEvenUpsample(TestWithEvenUpsample):
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv2d_transpose"
# Please Don't remove the following code. # Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv. # Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation): # class TestCUDNNWithDilation(TestWithDilation):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册