From 4a544762a276985575b14821e0f1334cdb2b4c6c Mon Sep 17 00:00:00 2001 From: zhaoyuchen2018 <45989343+zhaoyuchen2018@users.noreply.github.com> Date: Tue, 12 Nov 2019 05:30:54 -0600 Subject: [PATCH] Add Asypadding for conv fusion. (#21041) * Add Asypadding for conv fusion. test=develop reference: pr/20042 * Fix eigen build link error * Change back file mode * Use math function & add more checks. --- .../fluid/operators/fused/conv_fusion_op.cc | 83 +++++++- ...conv_fusion_op.cu.cc => conv_fusion_op.cu} | 104 +++++++++- .../tests/unittests/test_conv2d_fusion_op.py | 188 +++++++++++++++++- 3 files changed, 359 insertions(+), 16 deletions(-) rename paddle/fluid/operators/fused/{conv_fusion_op.cu.cc => conv_fusion_op.cu} (71%) diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc index 30dd35db9e..096d48d730 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cc +++ b/paddle/fluid/operators/fused/conv_fusion_op.cc @@ -73,15 +73,85 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase { std::vector paddings = ctx->Attrs().Get>("paddings"); std::vector dilations = ctx->Attrs().Get>("dilations"); + std::string padding_algorithm = + ctx->Attrs().Get("padding_algorithm"); + int groups = ctx->Attrs().Get("groups"); - std::vector oshape({in_dims[0], filter_dims[0]}); - for (size_t i = 0; i < strides.size(); ++i) { - oshape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], - dilations[i], paddings[i], strides[i])); + framework::DDim in_data_dims; + in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size()); + + PADDLE_ENFORCE_EQ( + in_dims.size() == 4 || in_dims.size() == 5, true, + "ShapeError: Conv_fusion input should be 4-D or 5-D tensor. But " + "received: %u-D Tensor," + "the shape of Conv_fusion input is [%s]", + in_dims.size(), in_dims); + + PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(), + "ShapeError: Conv_fusion input dimension and filter " + "dimension should be the " + "equal." + "But received: the shape of Conv_fusion input is [%s], " + "input dimension of Conv_fusion " + "input is [%d]," + "the shape of filter is [%s], the filter dimension of " + "Conv_fusion is [%d]", + in_dims, in_dims.size(), filter_dims, filter_dims.size()); + + int in_sub_stride_size = in_dims.size() - strides.size(); + PADDLE_ENFORCE_EQ( + in_dims.size() - strides.size() == 2U, true, + "ShapeError: the dimension of input minus the dimension of " + "stride must be euqal to 2." + "But received: the dimension of input minus the dimension " + "of stride is [%d], the" + "input dimension of Conv_fusion is [%d], the shape of Conv_fusion " + "input " + "is [%s], the stride" + "dimension of Conv_fusion is [%d]", + in_sub_stride_size, in_dims.size(), in_dims, strides.size()); + + const auto input_channels = in_dims[1]; + + PADDLE_ENFORCE_EQ( + input_channels, filter_dims[1] * groups, + "ShapeError: The number of input channels should be equal to filter " + "channels * groups. But received: the input channels is [%d], the shape" + "of input is [%s], the filter channel is [%d], the shape of filter is " + "[%s]," + "the groups is [%d]", + in_dims[1], in_dims, filter_dims[1], filter_dims, groups); + PADDLE_ENFORCE_EQ( + filter_dims[0] % groups, 0, + "ShapeError: The number of output channels should be divided by groups." + "But received: the output channels is [%d], the shape of filter is [%s]" + "(the first dimension of filter is output channel), the groups is [%d]", + filter_dims[0], filter_dims, groups); + + framework::DDim filter_data_dims = + framework::slice_ddim(filter_dims, 2, filter_dims.size()); + std::vector ksize = framework::vectorize(filter_data_dims); + UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, + in_data_dims, strides, ksize); + + std::vector output_shape({in_dims[0]}); + output_shape.push_back(filter_dims[0]); + + for (size_t i = 0; i < in_data_dims.size(); ++i) { + if ((!ctx->IsRuntime()) && + (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) { + output_shape.push_back(-1); + } else { + output_shape.push_back( + ConvOutputSize(in_data_dims[i], filter_dims[i + 2], dilations[i], + paddings[2 * i], paddings[2 * i + 1], strides[i])); + } } + PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true, "Output(Output) of ConvOp should not be null."); - ctx->SetOutputDim("Output", framework::make_ddim(oshape)); + ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); + std::vector channels = ctx->Attrs().Get>("split_channels"); if (channels.size()) { @@ -90,7 +160,8 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase { std::vector oshapes; oshapes.reserve(channels.size()); for (size_t i = 0; i < channels.size(); ++i) { - oshapes.push_back({oshape[0], channels[i], oshape[2], oshape[3]}); + oshapes.push_back( + {output_shape[0], channels[i], output_shape[2], output_shape[3]}); } ctx->SetOutputsDim("Outputs", oshapes); } diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu.cc b/paddle/fluid/operators/fused/conv_fusion_op.cu similarity index 71% rename from paddle/fluid/operators/fused/conv_fusion_op.cu.cc rename to paddle/fluid/operators/fused/conv_fusion_op.cu index 0e2f3e1d88..17edccb4ea 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -13,7 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/conv_cudnn_helper.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h" +#include "paddle/fluid/operators/conv_op.h" +#include "paddle/fluid/operators/math/padding.h" #include "paddle/fluid/platform/cudnn_helper.h" DECLARE_int64(cudnn_exhaustive_search_times); @@ -44,6 +47,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NOT_NULL(bias, "The bias should not be null."); auto* residual = ctx.Input("ResidualData"); auto* output = ctx.Output("Output"); + output->mutable_data(ctx.GetPlace()); std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -55,11 +59,96 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { bool exhaustive_search = FLAGS_cudnn_exhaustive_search || ctx.Attr("exhaustive_search"); - const T* input_data = input->data(); + // const T* input_data = input->data(); const T* filter_data = filter->data(); const T* bias_data = bias->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); + // T* output_data = output->mutable_data(ctx.GetPlace()); + + const std::string padding_algorithm = + ctx.Attr("padding_algorithm"); + const std::string data_format = ctx.Attr("data_format"); + + Tensor transformed_input_channel(input->type()); + Tensor transformed_output(output->type()); + T* output_data = nullptr; + + transformed_input_channel = *input; + transformed_output = *output; + output_data = transformed_output.data(); const T* residual_data = residual ? residual->data() : output_data; + // update padding and dilation + auto in_dims = transformed_input_channel.dims(); + auto filter_dims = filter->dims(); + framework::DDim in_data_dims; + in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size()); + + framework::DDim filter_data_dims = + framework::slice_ddim(filter_dims, 2, filter_dims.size()); + std::vector ksize = framework::vectorize(filter_data_dims); + UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, + in_data_dims, strides, ksize); + + int data_dim = strides.size(); // 2d or 3d + bool is_sys_pad = math::IsSymmetricPadding(paddings, data_dim); + + Tensor transformed_input; + std::vector padding_common(data_dim, 0); + if (!is_sys_pad) { + std::vector padding_diff(data_dim); + std::vector new_input_shape_vec(data_dim + 2); + new_input_shape_vec[0] = transformed_input_channel.dims()[0]; + new_input_shape_vec[1] = transformed_input_channel.dims()[1]; + + std::vector input_pad(transformed_input_channel.dims().size() * 2, + 0); + for (size_t i = 0; i < data_dim; ++i) { + padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); + padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); + new_input_shape_vec[i + 2] = + transformed_input_channel.dims()[i + 2] + padding_diff[i]; + input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; + input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; + } + framework::DDim new_input_shape( + framework::make_ddim(new_input_shape_vec)); + transformed_input.Resize(new_input_shape); + auto& dev_ctx = + ctx.template device_context(); + + transformed_input = + ctx.AllocateTmpTensor( + new_input_shape, dev_ctx); + const int rank = transformed_input_channel.dims().size(); + T pad_value(0.0); + switch (rank) { + case 4: { + math::PadFunction( + ctx, input_pad, transformed_input_channel, pad_value, + &transformed_input); + } break; + case 5: { + math::PadFunction( + ctx, input_pad, transformed_input_channel, pad_value, + &transformed_input); + } break; + default: + PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); + } + + } else { + transformed_input = transformed_input_channel; + if (paddings.size() == data_dim) { + for (size_t i = 0; i < data_dim; ++i) { + padding_common[i] = paddings[i]; + } + } else { + for (size_t i = 0; i < data_dim; ++i) { + padding_common[i] = paddings[2 * i]; + } + } + } + + const T* input_data = transformed_input.data(); // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor input_desc; @@ -74,18 +163,19 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { } cudnnConvolutionDescriptor_t cudnn_conv_desc = - conv_desc.descriptor(paddings, strides, dilations); + conv_desc.descriptor(padding_common, strides, dilations); CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount( cudnn_conv_desc, groups)); cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, framework::vectorize(input->dims())); + layout, framework::vectorize(transformed_input.dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, framework::vectorize(output->dims())); + layout, framework::vectorize(transformed_output.dims())); cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize(filter->dims())); // Now only support NCHW - std::vector bias_dim = {1, static_cast(output->dims()[1]), 1, 1}; + std::vector bias_dim = { + 1, static_cast(transformed_output.dims()[1]), 1, 1}; cudnnTensorDescriptor_t cudnn_bias_desc = bias_desc.descriptor(layout, bias_dim); cudnnActivationDescriptor_t cudnn_act_desc = @@ -109,7 +199,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( cudnn_conv_desc, CUDNN_DEFAULT_MATH)); - auto x_dims = framework::vectorize(input->dims()); + auto x_dims = framework::vectorize(transformed_input.dims()); auto f_dims = framework::vectorize(filter->dims()); if (!exhaustive_search) { CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm( diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py index 3a302f2c41..94ea62d793 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py @@ -23,23 +23,47 @@ from op_test import OpTest from test_conv2d_op import conv2d_forward_naive +def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): + self.pad = [0, 0] + self.padding_algorithm = "SAME" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp") + TestPaddingSMAECase.__name__ = cls_name + globals()[cls_name] = TestPaddingSMAECase + + +def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): + self.pad = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp") + TestPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestPaddingVALIDCase + + class TestConv2dFusionOp(OpTest): def setUp(self): self.op_type = "conv2d_fusion" self.exhaustive_search = False - self.data_format = "AnyLayout" + self.data_format = "NCHW" self.dtype = np.float32 self.activation = 'relu' self.add_bias = True self.add_residual_data = True self.channels = None self.outputs = None + self.padding_algorithm = "EXIPLICIT" self.init_group() self.init_dilation() self.init_test_case() self.init_bias_residual() self.init_activation() + self.init_paddings() self.set_search_method() conv2d_param = { @@ -52,7 +76,9 @@ class TestConv2dFusionOp(OpTest): filter = np.random.random(self.filter_size).astype(self.dtype) self.output, _, _, _, _ = conv2d_forward_naive( - input, filter, self.groups, conv2d_param) + input, filter, self.groups, conv2d_param, self.padding_algorithm, + self.data_format) + self.output = self.output.astype(self.dtype) self.inputs = { @@ -84,7 +110,8 @@ class TestConv2dFusionOp(OpTest): 'data_format': self.data_format, 'exhaustive_search': self.exhaustive_search, 'activation': self.activation, - 'split_channels': self.channels + 'split_channels': self.channels, + 'padding_algorithm': self.padding_algorithm } self.outputs = {'Output': self.output} @@ -127,6 +154,10 @@ class TestConv2dFusionOp(OpTest): def set_outputs(self): pass + def init_paddings(self): + self.pad = [0, 0] + self.padding_algorithm = "EXPLICIT" + class TestWithoutResidual(TestConv2dFusionOp): def init_bias_residual(self): @@ -186,5 +217,156 @@ class TestMultipleOutputs(TestConv2dFusionOp): self.outputs['Outputs'] = [('out1', out1), ('out2', out2)] +class TestAsyPadding(TestConv2dFusionOp): + def init_paddings(self): + self.pad = [0, 0, 1, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWithPad_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_paddings(self): + self.pad = [2, 1, 3, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWithStride_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_paddings(self): + self.pad = [2, 1, 3, 2] + self.padding_algorithm = "EXPLICIT" + + +class TestWith1x1_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [2, 2, 4, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithGroup_AsyPadding(TestConv2dFusionOp): + def init_group(self): + self.groups = 3 + + +class TestWithDepthWise3x3_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [3, 4, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [8, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + + def init_group(self): + self.groups = 4 + + def init_paddings(self): + self.pad = [1, 3, 2, 1] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDepthWise5x5_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 4, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [8, f_c, 5, 5] + + def init_group(self): + self.groups = 4 + + def init_paddings(self): + self.pad = [0, 1, 1, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDepthWise7x7_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [2, 2] + self.input_size = [2, 8, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [16, f_c, 7, 7] + + def init_group(self): + self.groups = 8 + + def init_paddings(self): + self.pad = [1, 3, 4, 1] + self.padding_algorithm = "EXPLICIT" + + +class TestWithDilation_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [0, 1, 3, 0] + self.padding_algorithm = "EXPLICIT" + + +class TestWithInput1x1Filter1x1_AsyPadding(TestConv2dFusionOp): + def init_test_case(self): + self.stride = [1, 1] + self.input_size = [2, 3, 1, 1] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 + + def init_paddings(self): + self.pad = [0, 3, 4, 0] + self.padding_algorithm = "EXPLICIT" + + +create_test_padding_SAME_class(TestAsyPadding) +create_test_padding_SAME_class(TestWithPad_AsyPadding) +create_test_padding_SAME_class(TestWithStride_AsyPadding) +create_test_padding_SAME_class(TestWithGroup_AsyPadding) +create_test_padding_SAME_class(TestWithInput1x1Filter1x1_AsyPadding) + +create_test_padding_VALID_class(TestAsyPadding) +create_test_padding_VALID_class(TestWithPad_AsyPadding) +create_test_padding_VALID_class(TestWithStride_AsyPadding) +create_test_padding_VALID_class(TestWithGroup_AsyPadding) +create_test_padding_VALID_class(TestWithInput1x1Filter1x1_AsyPadding) + if __name__ == '__main__': unittest.main() -- GitLab