提交 253f618a 编写于 作者: D Dun 提交者: whs

loosen the restriction of output_size in conv2d_transpose (#12292)

* loosen the restriction of output_size in conv2d_transpose

* test and docs

* fix code style

* fix ci test error

* bug fix

* fix python3 issue
上级 cec94cab
...@@ -29,6 +29,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -29,6 +29,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> output_size =
ctx->Attrs().Get<std::vector<int>>("output_size");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations"); std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
...@@ -42,6 +44,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -42,6 +44,10 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U, PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U,
"ConvTransposeOp input dimension and strides dimension should " "ConvTransposeOp input dimension and strides dimension should "
"be consistent."); "be consistent.");
if (output_size.size())
PADDLE_ENFORCE_EQ(output_size.size(), strides.size(),
"ConvTransposeOp output_size dimension and strides "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and strides " "ConvTransposeOp paddings dimension and strides "
"dimension should be the same."); "dimension should be the same.");
...@@ -55,8 +61,17 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -55,8 +61,17 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + auto infer_shape =
filter_extent); (in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + filter_extent;
if (output_size.size()) {
PADDLE_ENFORCE((output_size[i] >= infer_shape &&
output_size[i] < infer_shape + strides[i]),
"ConvTransposeOp output_size should be "
"in appropriate range.");
output_shape.push_back(output_size[i]);
} else {
output_shape.push_back(infer_shape);
}
} }
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
...@@ -103,6 +118,10 @@ void Conv2DTransposeOpMaker::Make() { ...@@ -103,6 +118,10 @@ void Conv2DTransposeOpMaker::Make() {
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator. " "(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("output_size",
"(vector<int> default: []), the "
"size of the output tensor")
.SetDefault({});
AddAttr<int>("groups", AddAttr<int>("groups",
"(int default:1), the groups number of the convolution " "(int default:1), the groups number of the convolution "
"transpose operator. ") "transpose operator. ")
...@@ -192,7 +211,10 @@ void Conv3DTransposeOpMaker::Make() { ...@@ -192,7 +211,10 @@ void Conv3DTransposeOpMaker::Make() {
"Where N is batch size, C is " "Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the " "the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."); "height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>("output_size",
"(vector<int> default: []), the "
"size of the output tensor")
.SetDefault({});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"dilations", "dilations",
"(vector<int> default:{1, 1, 1}), the " "(vector<int> default:{1, 1, 1}), the "
...@@ -247,7 +269,7 @@ Parameters(strides, paddings) are three elements. These three elements represent ...@@ -247,7 +269,7 @@ Parameters(strides, paddings) are three elements. These three elements represent
depth, height and width, respectively. depth, height and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$ Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$ Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$
......
...@@ -2342,16 +2342,20 @@ def conv2d_transpose(input, ...@@ -2342,16 +2342,20 @@ def conv2d_transpose(input,
.. math:: .. math::
H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} \in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} \in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This tuple, it must contain two integers, (image_H, image_W). None if use
parameter only works when filter_size is None. filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They
should follow the formula above.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square. None if use output size to
...@@ -2429,7 +2433,13 @@ def conv2d_transpose(input, ...@@ -2429,7 +2433,13 @@ def conv2d_transpose(input,
else: else:
filter_size = utils.convert_to_list(filter_size, 2, filter_size = utils.convert_to_list(filter_size, 2,
'conv2d_transpose.filter_size') 'conv2d_transpose.filter_size')
if output_size is None:
output_size = []
elif isinstance(output_size, list) or isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 2, 'output_size')
else:
raise ValueError("output_size should be list or int")
padding = utils.convert_to_list(padding, 2, 'padding')
groups = 1 if groups is None else groups groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters // groups] + filter_size filter_shape = [input_channel, num_filters // groups] + filter_size
img_filter = helper.create_parameter( img_filter = helper.create_parameter(
...@@ -2442,6 +2452,7 @@ def conv2d_transpose(input, ...@@ -2442,6 +2452,7 @@ def conv2d_transpose(input,
'Filter': [img_filter]}, 'Filter': [img_filter]},
outputs={'Output': pre_bias}, outputs={'Output': pre_bias},
attrs={ attrs={
'output_size': output_size,
'strides': stride, 'strides': stride,
'paddings': padding, 'paddings': padding,
'dilations': dilation, 'dilations': dilation,
......
...@@ -35,6 +35,10 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): ...@@ -35,6 +35,10 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
d_bolck_w = dilations[1] * (f_w - 1) + 1 d_bolck_w = dilations[1] * (f_w - 1) + 1
out_h = (in_h - 1) * stride[0] + d_bolck_h out_h = (in_h - 1) * stride[0] + d_bolck_h
out_w = (in_w - 1) * stride[1] + d_bolck_w out_w = (in_w - 1) * stride[1] + d_bolck_w
if 'output_size' in attrs:
output_size = attrs['output_size']
out_h = output_size[0] + 2 * pad[0]
out_w = output_size[1] + 2 * pad[1]
out = np.zeros((in_n, out_c, out_h, out_w)) out = np.zeros((in_n, out_c, out_h, out_w))
...@@ -65,6 +69,7 @@ class TestConv2dTransposeOp(OpTest): ...@@ -65,6 +69,7 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self): def setUp(self):
# init as conv transpose # init as conv transpose
self.use_cudnn = False self.use_cudnn = False
self.output_size = None
self.init_op_type() self.init_op_type()
self.init_test_case() self.init_test_case()
...@@ -80,6 +85,8 @@ class TestConv2dTransposeOp(OpTest): ...@@ -80,6 +85,8 @@ class TestConv2dTransposeOp(OpTest):
'use_cudnn': self.use_cudnn, 'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
} }
if self.output_size is not None:
self.attrs['output_size'] = self.output_size
output = conv2dtranspose_forward_naive(input_, filter_, output = conv2dtranspose_forward_naive(input_, filter_,
self.attrs).astype('float32') self.attrs).astype('float32')
...@@ -192,6 +199,18 @@ class TestWithDilation(TestConv2dTransposeOp): ...@@ -192,6 +199,18 @@ class TestWithDilation(TestConv2dTransposeOp):
self.filter_size = [f_c, 6, 3, 3] self.filter_size = [f_c, 6, 3, 3]
class TestWithEvenUpsample(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [2, 2]
self.stride = [2, 2]
self.groups = 1
self.dilations = [1, 1]
self.output_size = [14, 14]
self.input_size = [2, 3, 7, 7] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 5, 5]
# ------------ test_cudnn ------------ # ------------ test_cudnn ------------
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
...@@ -265,6 +284,15 @@ class TestDepthwiseConvTranspose(TestConv2dTransposeOp): ...@@ -265,6 +284,15 @@ class TestDepthwiseConvTranspose(TestConv2dTransposeOp):
self.op_type = "depthwise_conv2d_transpose" self.op_type = "depthwise_conv2d_transpose"
# ------------ test_cudnn ------------
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNWithEvenUpsample(TestWithEvenUpsample):
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv2d_transpose"
# Please Don't remove the following code. # Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv. # Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation): # class TestCUDNNWithDilation(TestWithDilation):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册