From 389734525fb50599e3ba459d24db78caa4a79790 Mon Sep 17 00:00:00 2001 From: xiongkun Date: Thu, 1 Sep 2022 14:09:23 +0800 Subject: [PATCH] [XPU] Tranfer xpu: conv2d into phi. (#45612) * tranfer xpu: conv2d into phi * tranfer xpu: conv2d into phi test=kunlun * test=kunlun * test=kunlun * test=kunlun --- paddle/fluid/operators/conv_op_xpu.cc | 261 --------------------- paddle/phi/kernels/conv_kernel.h | 2 +- paddle/phi/kernels/xpu/conv_grad_kernel.cc | 194 +++++++++++++++ paddle/phi/kernels/xpu/conv_kernel.cc | 149 ++++++++++++ 4 files changed, 344 insertions(+), 262 deletions(-) delete mode 100644 paddle/fluid/operators/conv_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/conv_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/conv_kernel.cc diff --git a/paddle/fluid/operators/conv_op_xpu.cc b/paddle/fluid/operators/conv_op_xpu.cc deleted file mode 100644 index 638983ea26b..00000000000 --- a/paddle/fluid/operators/conv_op_xpu.cc +++ /dev/null @@ -1,261 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include -#include -#include - -#include "paddle/fluid/operators/conv_op.h" -#include "paddle/fluid/platform/cudnn_workspace_helper.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#ifdef PADDLE_WITH_XPU -namespace paddle { -namespace operators { - -template -class GemmConvXPUKernel : public framework::OpKernel { - using XPUT = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &context) const override { - const Tensor *input = context.Input("Input"); - // The filter will be reshaped in the calculations, - // so here use an assignment operation, - // that avoids modifying the variable in the Scope. - Tensor filter = *context.Input("Filter"); - Tensor *output = context.Output("Output"); - output->mutable_data(context.GetPlace()); - int groups = context.Attr("groups"); - std::vector strides = context.Attr>("strides"); - std::vector paddings = context.Attr>("paddings"); - std::vector dilations = context.Attr>("dilations"); - const std::string data_format = context.Attr("data_format"); - const std::string padding_algorithm = - context.Attr("padding_algorithm"); - - PADDLE_ENFORCE_EQ( - data_format == "NDHWC", - false, - platform::errors::InvalidArgument( - ("XPU does not support data_format is NDHWC in conv op."))); - - framework::DDim in_data_dims = - phi::slice_ddim(input->dims(), 2, input->dims().size()); - framework::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - int batch_size = static_cast(input->dims()[0]); - int img_c = static_cast(input->dims()[1]); - int img_h = static_cast(input->dims()[2]); - int img_w = static_cast(input->dims()[3]); - int f = static_cast(filter.dims()[0]); - bool is_nchw = true; - if (data_format == "NHWC") { - img_c = static_cast(input->dims()[3]); - img_h = static_cast(input->dims()[1]); - img_w = static_cast(input->dims()[2]); - is_nchw = false; - } - - const XPUT *input_data = reinterpret_cast(input->data()); - const XPUT *filter_data = reinterpret_cast(filter.data()); - XPUT *output_data = reinterpret_cast(output->data()); - - auto &dev_ctx = context.template device_context(); - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - - XPUT *filter_data_tmp; - const XPUT *filter_data_ptr = filter_data; - if (data_format == "NHWC") { - filter_data_tmp = RAII_GUARD.alloc(filter.numel()); - PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); - std::vector filter_shape = phi::vectorize(filter.dims()); - int r = xpu::transpose(dev_ctx.x_context(), - filter_data, - filter_data_tmp, - filter_shape, - {0, 2, 3, 1}); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); - filter_data_ptr = reinterpret_cast(filter_data_tmp); - } - - int r = xpu::conv2d(dev_ctx.x_context(), - input_data, - filter_data_ptr, - output_data, - batch_size, - img_c, - img_h, - img_w, - f, - ksize, - strides, - paddings, - dilations, - groups, - nullptr, - nullptr, - nullptr, - is_nchw); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d"); - } -}; - -template -class GemmConvGradXPUKernel : public framework::OpKernel { - using XPUT = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &context) const override { - const Tensor *input = context.Input("Input"); - const Tensor *output_grad = - context.Input(framework::GradVarName("Output")); - Tensor *input_grad = - context.Output(framework::GradVarName("Input")); - Tensor *filter_grad = - context.Output(framework::GradVarName("Filter")); - // The filter and filter_grad will be reshaped in the calculations, - // so here use an assignment operation, - // that avoids modifying the variable in the Scope. - Tensor filter = *context.Input("Filter"); - if (!input_grad && !filter_grad) return; - int groups = context.Attr("groups"); - std::vector strides = context.Attr>("strides"); - std::vector paddings = context.Attr>("paddings"); - std::vector dilations = context.Attr>("dilations"); - const std::string data_format = context.Attr("data_format"); - const std::string padding_algorithm = - context.Attr("padding_algorithm"); - - PADDLE_ENFORCE_EQ( - data_format == "NDHWC", - false, - platform::errors::InvalidArgument( - ("XPU doesn't support data_format is NDHWC in conv grad op."))); - - framework::DDim in_data_dims = - phi::slice_ddim(input->dims(), 2, input->dims().size()); - framework::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); - std::vector filter_shape = phi::vectorize(filter.dims()); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - int batch_size = static_cast(input->dims()[0]); - int img_c = static_cast(input->dims()[1]); - int img_h = static_cast(input->dims()[2]); - int img_w = static_cast(input->dims()[3]); - int f = static_cast(filter.dims()[0]); - bool is_nchw = true; - if (data_format == "NHWC") { - img_c = static_cast(input->dims()[3]); - img_h = static_cast(input->dims()[1]); - img_w = static_cast(input->dims()[2]); - is_nchw = false; - } - - const XPUT *input_data = reinterpret_cast(input->data()); - const XPUT *filter_data = reinterpret_cast(filter.data()); - const XPUT *output_grad_data = - reinterpret_cast(output_grad->data()); - XPUT *input_grad_data = nullptr; - if (input_grad) { - input_grad->mutable_data(context.GetPlace()); - input_grad_data = reinterpret_cast(input_grad->data()); - } - XPUT *filter_grad_data = nullptr; - if (filter_grad) { - filter_grad->mutable_data(context.GetPlace()); - filter_grad_data = reinterpret_cast(filter_grad->data()); - } - auto &dev_ctx = context.template device_context(); - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - - XPUT *filter_data_tmp; - XPUT *filter_grad_data_tmp; - const XPUT *filter_data_ptr = filter_data; - XPUT *filter_grad_data_ptr = filter_grad_data; - if (data_format == "NHWC") { - filter_data_tmp = RAII_GUARD.alloc(filter.numel()); - PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); - int r = xpu::transpose(dev_ctx.x_context(), - filter_data, - filter_data_tmp, - filter_shape, - {0, 2, 3, 1}); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); - filter_data_ptr = reinterpret_cast(filter_data_tmp); - - if (filter_grad_data != nullptr) { - filter_grad_data_tmp = RAII_GUARD.alloc(filter.numel()); - PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp); - filter_grad_data_ptr = filter_grad_data_tmp; - } - } - int r = xpu::conv2d_grad(dev_ctx.x_context(), - input_data, - filter_data_ptr, - output_grad_data, - input_grad_data, - filter_grad_data_ptr, - batch_size, - img_c, - img_h, - img_w, - f, - ksize, - strides, - paddings, - dilations, - groups, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - is_nchw); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_grad"); - - if ((filter_grad_data_ptr != nullptr) && (data_format == "NHWC")) { - std::vector filter_shape_fhwc = { - filter_shape[0], filter_shape[2], filter_shape[3], filter_shape[1]}; - int r = xpu::transpose(dev_ctx.x_context(), - filter_grad_data_ptr, - filter_grad_data, - filter_shape_fhwc, - {0, 3, 1, 2}); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); - } - } -}; -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL( - conv2d, - ops::GemmConvXPUKernel, - ops::GemmConvXPUKernel); -REGISTER_OP_XPU_KERNEL( - conv2d_grad, - ops::GemmConvGradXPUKernel, - ops::GemmConvGradXPUKernel); -REGISTER_OP_XPU_KERNEL( - depthwise_conv2d, - ops::GemmConvXPUKernel); -REGISTER_OP_XPU_KERNEL( - depthwise_conv2d_grad, - ops::GemmConvGradXPUKernel); -#endif diff --git a/paddle/phi/kernels/conv_kernel.h b/paddle/phi/kernels/conv_kernel.h index 508b3a42a21..a105fe794f9 100644 --- a/paddle/phi/kernels/conv_kernel.h +++ b/paddle/phi/kernels/conv_kernel.h @@ -24,7 +24,7 @@ void ConvKernel(const Context& dev_ctx, const DenseTensor& filter, const std::vector& strides, const std::vector& paddings, - const std::string& paddding_algorithm, + const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, diff --git a/paddle/phi/kernels/xpu/conv_grad_kernel.cc b/paddle/phi/kernels/xpu/conv_grad_kernel.cc new file mode 100644 index 00000000000..da86474b96b --- /dev/null +++ b/paddle/phi/kernels/xpu/conv_grad_kernel.cc @@ -0,0 +1,194 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/conv_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/conv_util.h" + +namespace phi { + +template +void ConvGradKernel(const Context& dev_ctx, + const DenseTensor& input, + const DenseTensor& filter, + const DenseTensor& out_grad, + const std::vector& strides, + const std::vector& paddings_t, + const std::string& padding_algorithm, + int groups, + const std::vector& dilations_t, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + DenseTensor* input_grad, + DenseTensor* filter_grad) { + using XPUT = typename XPUTypeTrait::Type; + std::vector paddings = paddings_t; + std::vector dilations = dilations_t; + // The filter and filter_grad will be reshaped in the calculations, + // so here use an assignment operation, + // that avoids modifying the variable in the Scope. + if (!input_grad && !filter_grad) return; + PADDLE_ENFORCE_EQ( + data_format == "NDHWC", + false, + phi::errors::InvalidArgument( + ("XPU doesn't support data_format is NDHWC in conv grad op."))); + + phi::DDim in_data_dims = + phi::slice_ddim(input.dims(), 2, input.dims().size()); + phi::DDim filter_data_dims = + phi::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = phi::vectorize(filter_data_dims); + std::vector filter_shape = phi::vectorize(filter.dims()); + UpdatePaddingAndDilation( + &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); + + int batch_size = static_cast(input.dims()[0]); + int img_c = static_cast(input.dims()[1]); + int img_h = static_cast(input.dims()[2]); + int img_w = static_cast(input.dims()[3]); + int f = static_cast(filter.dims()[0]); + bool is_nchw = true; + if (data_format == "NHWC") { + img_c = static_cast(input.dims()[3]); + img_h = static_cast(input.dims()[1]); + img_w = static_cast(input.dims()[2]); + is_nchw = false; + } + + const XPUT* input_data = reinterpret_cast(input.data()); + const XPUT* filter_data = reinterpret_cast(filter.data()); + const XPUT* output_grad_data = + reinterpret_cast(out_grad.data()); + XPUT* input_grad_data = nullptr; + if (input_grad) { + dev_ctx.template Alloc(input_grad); + input_grad_data = reinterpret_cast(input_grad->data()); + } + XPUT* filter_grad_data = nullptr; + if (filter_grad) { + dev_ctx.template Alloc(filter_grad); + filter_grad_data = reinterpret_cast(filter_grad->data()); + } + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + + XPUT* filter_data_tmp; + XPUT* filter_grad_data_tmp; + const XPUT* filter_data_ptr = filter_data; + XPUT* filter_grad_data_ptr = filter_grad_data; + if (data_format == "NHWC") { + filter_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); + int r = xpu::transpose(dev_ctx.x_context(), + filter_data, + filter_data_tmp, + filter_shape, + {0, 2, 3, 1}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + filter_data_ptr = reinterpret_cast(filter_data_tmp); + + if (filter_grad_data != nullptr) { + filter_grad_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp); + filter_grad_data_ptr = filter_grad_data_tmp; + } + } + int r = xpu::conv2d_grad(dev_ctx.x_context(), + input_data, + filter_data_ptr, + output_grad_data, + input_grad_data, + filter_grad_data_ptr, + batch_size, + img_c, + img_h, + img_w, + f, + ksize, + strides, + paddings, + dilations, + groups, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + is_nchw); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_grad"); + + if ((filter_grad_data_ptr != nullptr) && (data_format == "NHWC")) { + std::vector filter_shape_fhwc = { + filter_shape[0], filter_shape[2], filter_shape[3], filter_shape[1]}; + int r = xpu::transpose(dev_ctx.x_context(), + filter_grad_data_ptr, + filter_grad_data, + filter_shape_fhwc, + {0, 3, 1, 2}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + } +} + +template +void DepthwiseConvGradKernel(const Context& dev_ctx, + const DenseTensor& input, + const DenseTensor& filter, + const DenseTensor& out_grad, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + bool fuse_relu, + DenseTensor* input_grad, + DenseTensor* filter_grad) { + ConvGradKernel(dev_ctx, + input, + filter, + out_grad, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search, + input_grad, + filter_grad); +} + +} // namespace phi + +PD_REGISTER_KERNEL(conv2d_grad, + XPU, + ALL_LAYOUT, + phi::ConvGradKernel, + float, + phi::dtype::float16) {} + +PD_REGISTER_KERNEL(depthwise_conv2d_grad, + XPU, + ALL_LAYOUT, + phi::DepthwiseConvGradKernel, + float) {} diff --git a/paddle/phi/kernels/xpu/conv_kernel.cc b/paddle/phi/kernels/xpu/conv_kernel.cc new file mode 100644 index 00000000000..287faf4cdb9 --- /dev/null +++ b/paddle/phi/kernels/xpu/conv_kernel.cc @@ -0,0 +1,149 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/conv_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/conv_util.h" + +namespace phi { + +template +void ConvKernel(const Context& dev_ctx, + const DenseTensor& input, + const DenseTensor& filter, + const std::vector& strides, + const std::vector& paddings_t, + const std::string& padding_algorithm, + int groups, + const std::vector& dilations_t, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + DenseTensor* out) { + using XPUT = typename XPUTypeTrait::Type; + std::vector paddings = paddings_t; + std::vector dilations = dilations_t; + // The filter will be reshaped in the calculations, + // so here use an assignment operation, + // that avoids modifying the variable in the Scope. + dev_ctx.template Alloc(out); + + PADDLE_ENFORCE_EQ( + data_format == "NDHWC", + false, + phi::errors::InvalidArgument( + ("XPU does not support data_format is NDHWC in conv op."))); + + phi::DDim in_data_dims = + phi::slice_ddim(input.dims(), 2, input.dims().size()); + phi::DDim filter_data_dims = + phi::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = phi::vectorize(filter_data_dims); + UpdatePaddingAndDilation( + &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); + + int batch_size = static_cast(input.dims()[0]); + int img_c = static_cast(input.dims()[1]); + int img_h = static_cast(input.dims()[2]); + int img_w = static_cast(input.dims()[3]); + int f = static_cast(filter.dims()[0]); + bool is_nchw = true; + if (data_format == "NHWC") { + img_c = static_cast(input.dims()[3]); + img_h = static_cast(input.dims()[1]); + img_w = static_cast(input.dims()[2]); + is_nchw = false; + } + + const XPUT* input_data = reinterpret_cast(input.data()); + const XPUT* filter_data = reinterpret_cast(filter.data()); + XPUT* output_data = reinterpret_cast(out->data()); + + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + + XPUT* filter_data_tmp; + const XPUT* filter_data_ptr = filter_data; + if (data_format == "NHWC") { + filter_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); + std::vector filter_shape = phi::vectorize(filter.dims()); + int r = xpu::transpose(dev_ctx.x_context(), + filter_data, + filter_data_tmp, + filter_shape, + {0, 2, 3, 1}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + filter_data_ptr = reinterpret_cast(filter_data_tmp); + } + + int r = xpu::conv2d(dev_ctx.x_context(), + input_data, + filter_data_ptr, + output_data, + batch_size, + img_c, + img_h, + img_w, + f, + ksize, + strides, + paddings, + dilations, + groups, + nullptr, + nullptr, + nullptr, + is_nchw); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d"); +} + +template +void DepthwiseConvKernel(const Context& dev_ctx, + const DenseTensor& input, + const DenseTensor& filter, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + bool fuse_relu, + DenseTensor* out) { + ConvKernel(dev_ctx, + input, + filter, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search, + out); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + conv2d, XPU, ALL_LAYOUT, phi::ConvKernel, float, phi::dtype::float16) {} +PD_REGISTER_KERNEL( + depthwise_conv2d, XPU, ALL_LAYOUT, phi::DepthwiseConvKernel, float) {} -- GitLab