From 8add11a015194d68f1d3d48603f98bca4e3aa16a Mon Sep 17 00:00:00 2001 From: Leo Guo <58431564+ZibinGuo@users.noreply.github.com> Date: Thu, 8 Sep 2022 19:16:46 +0800 Subject: [PATCH] Migrate roi_align and roi_align_grad to phi. test=kunlun (#45858) --- paddle/fluid/operators/roi_align_op_xpu.cc | 278 ------------------ .../phi/kernels/xpu/roi_align_grad_kernel.cc | 114 +++++++ paddle/phi/kernels/xpu/roi_align_kernel.cc | 149 ++++++++++ 3 files changed, 263 insertions(+), 278 deletions(-) delete mode 100644 paddle/fluid/operators/roi_align_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/roi_align_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/roi_align_kernel.cc diff --git a/paddle/fluid/operators/roi_align_op_xpu.cc b/paddle/fluid/operators/roi_align_op_xpu.cc deleted file mode 100644 index 740285fb65..0000000000 --- a/paddle/fluid/operators/roi_align_op_xpu.cc +++ /dev/null @@ -1,278 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; - -template -class XPUROIAlignOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - auto sampling_ratio = ctx.Attr("sampling_ratio"); - auto aligned = ctx.Attr("aligned"); - - const auto& in_dims = in->dims(); - int batch_size = in_dims[0]; - int channels = in_dims[1]; - int height = in_dims[2]; - int width = in_dims[3]; - - int rois_num = rois->dims()[0]; - - if (rois_num == 0) return; - - Tensor roi_batch_id_list; - roi_batch_id_list.Resize({rois_num}); - auto cplace = platform::CPUPlace(); - int* roi_batch_id_data = roi_batch_id_list.mutable_data(cplace); - auto& dev_ctx = ctx.template device_context(); - auto xplace = ctx.GetPlace(); - int rois_batch_size = 0; - int* cpu_lod = nullptr; - if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); - rois_batch_size = rois_num_t->numel(); - PADDLE_ENFORCE_EQ( - rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The rois_batch_size and imgs " - "batch_size must be the same. But received rois_batch_size = %d, " - "batch_size = %d", - rois_batch_size, - batch_size)); - - std::vector rois_num_list(rois_batch_size); - memory::Copy(cplace, - rois_num_list.data(), - xplace, - rois_num_t->data(), - sizeof(int) * rois_batch_size); - cpu_lod = new int[rois_batch_size + 1]; - cpu_lod[0] = 0; - for (int i = 0; i < rois_batch_size; i++) { - cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i]; - } - } else { - auto lod = rois->lod(); - PADDLE_ENFORCE_EQ( - lod.empty(), - false, - platform::errors::InvalidArgument("Input(ROIs) in ROIAlignOp does " - "not contain LoD information.")); - auto rois_lod = lod.back(); - rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ( - rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The batch size of rois and batch size " - "of images must be the same. But received rois batch size = %d, " - "and images batch size = %d", - rois_batch_size, - batch_size)); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ( - rois_num, - rois_num_with_lod, - platform::errors::InvalidArgument( - "The actual number of rois and the number of rois " - "provided from Input(RoIsLoD) in RoIAlign must be the same." - " But received actual number of rois is %d, and the number " - "of rois from RoIsLoD is %d", - rois_num, - rois_num_with_lod)); - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - roi_batch_id_data[i] = n; - } - } - cpu_lod = new int[rois_batch_size + 1]; - for (int i = 0; i < rois_batch_size + 1; i++) { - cpu_lod[i] = rois_lod[i]; - } - } - - int* roi_id_data = nullptr; - int r = xpu_malloc(reinterpret_cast(&roi_id_data), - (rois_batch_size + 1) * sizeof(int)); - PADDLE_ENFORCE_EQ(r, - xpu::Error_t::SUCCESS, - platform::errors::External("no enough memory in xpu")); - memory::Copy(xplace, - roi_id_data, - cplace, - cpu_lod, - (rois_batch_size + 1) * sizeof(int)); - delete[] cpu_lod; - r = xpu::roi_align(dev_ctx.x_context(), - in->data(), - out->mutable_data(ctx.GetPlace()), - rois->data(), - roi_id_data, - batch_size, - channels, - height, - width, - out->dims()[0], - pooled_height, - pooled_width, - spatial_scale, - sampling_ratio, - true, - aligned); - PADDLE_ENFORCE_EQ(r, - xpu::Error_t::SUCCESS, - platform::errors::External( - "The roi_align XPU OP return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - xpu_free(roi_id_data); - } -}; - -template -class XPUROIAlignGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - auto sampling_ratio = ctx.Attr("sampling_ratio"); - auto aligned = ctx.Attr("aligned"); - - int rois_num = rois->dims()[0]; - int channels = in->dims()[1]; - int height = in->dims()[2]; - int width = in->dims()[3]; - - if (!in_grad) { - return; - } - Tensor roi_batch_id_list; - roi_batch_id_list.Resize({rois_num}); - auto cplace = platform::CPUPlace(); - - auto& dev_ctx = ctx.template device_context(); - auto xplace = ctx.GetPlace(); - - int rois_batch_size = 0; - int* cpu_lod = nullptr; - if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); - rois_batch_size = rois_num_t->numel(); - std::vector rois_num_list(rois_batch_size); - memory::Copy(cplace, - rois_num_list.data(), - xplace, - rois_num_t->data(), - sizeof(int) * rois_batch_size); - cpu_lod = new int[rois_batch_size + 1]; - cpu_lod[0] = 0; - for (int i = 0; i < rois_batch_size; i++) { - cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i]; - } - } else { - auto rois_lod = rois->lod().back(); - rois_batch_size = rois_lod.size() - 1; - cpu_lod = new int[rois_batch_size + 1]; - for (int i = 0; i < rois_batch_size + 1; i++) { - cpu_lod[i] = rois_lod[i]; - } - } - int* roi_id_data = nullptr; - int r = xpu_malloc(reinterpret_cast(&roi_id_data), - (rois_batch_size + 1) * sizeof(int)); - PADDLE_ENFORCE_EQ(r, - xpu::Error_t::SUCCESS, - platform::errors::External("no enough memory in xpu")); - memory::Copy(xplace, - roi_id_data, - cplace, - cpu_lod, - (rois_batch_size + 1) * sizeof(int)); - in_grad->mutable_data(ctx.GetPlace()); - - int output_grad_size = out_grad->numel(); - - delete[] cpu_lod; - if (output_grad_size > 0) { - r = xpu::roi_align_grad(dev_ctx.x_context(), - out_grad->data(), - in_grad->data(), - rois->data(), - roi_id_data, - in->dims()[0], - channels, - height, - width, - out_grad->dims()[0], - pooled_height, - pooled_width, - spatial_scale, - sampling_ratio, - true, - aligned); - PADDLE_ENFORCE_EQ( - r, - xpu::Error_t::SUCCESS, - platform::errors::External( - "The roi_align_grad XPU OP return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - xpu_free(roi_id_data); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL( - roi_align, - ops::XPUROIAlignOpKernel); -REGISTER_OP_XPU_KERNEL( - roi_align_grad, - ops::XPUROIAlignGradOpKernel); - -#endif diff --git a/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc b/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc new file mode 100644 index 0000000000..7f2b08538a --- /dev/null +++ b/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/roi_align_kernel.h" + +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void RoiAlignGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& boxes, + const paddle::optional& boxes_num, + const DenseTensor& out_grad, + int pooled_height, + int pooled_width, + float spatial_scale, + int sampling_ratio, + bool aligned, + DenseTensor* dx) { + int rois_num = boxes.dims()[0]; + int channels = x.dims()[1]; + int height = x.dims()[2]; + int width = x.dims()[3]; + + if (!dx) { + return; + } + DenseTensor roi_batch_id_list; + roi_batch_id_list.Resize({rois_num}); + auto cplace = phi::CPUPlace(); + auto xplace = dev_ctx.GetPlace(); + + int rois_batch_size = 0; + int* cpu_lod = nullptr; + if (boxes_num) { + rois_batch_size = boxes_num->numel(); + std::vector rois_num_list(rois_batch_size); + paddle::memory::Copy(cplace, + rois_num_list.data(), + xplace, + boxes_num->data(), + sizeof(int) * rois_batch_size); + cpu_lod = new int[rois_batch_size + 1]; + cpu_lod[0] = 0; + for (int i = 0; i < rois_batch_size; i++) { + cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i]; + } + } else { + auto rois_lod = boxes.lod().back(); + rois_batch_size = rois_lod.size() - 1; + cpu_lod = new int[rois_batch_size + 1]; + for (int i = 0; i < rois_batch_size + 1; i++) { + cpu_lod[i] = rois_lod[i]; + } + } + int* roi_id_data = nullptr; + int r = xpu_malloc(reinterpret_cast(&roi_id_data), + (rois_batch_size + 1) * sizeof(int)); + PADDLE_ENFORCE_XPU_SUCCESS(r); + paddle::memory::Copy(xplace, + roi_id_data, + cplace, + cpu_lod, + (rois_batch_size + 1) * sizeof(int)); + dev_ctx.template Alloc(dx); + + int output_grad_size = out_grad.numel(); + + delete[] cpu_lod; + if (output_grad_size > 0) { + r = xpu::roi_align_grad(dev_ctx.x_context(), + out_grad.data(), + dx->data(), + boxes.data(), + roi_id_data, + x.dims()[0], + channels, + height, + width, + out_grad.dims()[0], + pooled_height, + pooled_width, + spatial_scale, + sampling_ratio, + true, + aligned); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "roi_align_grad"); + } + if (dev_ctx.x_context()->xpu_stream) { + dev_ctx.Wait(); + } + xpu_free(roi_id_data); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + roi_align_grad, XPU, ALL_LAYOUT, phi::RoiAlignGradKernel, float) {} diff --git a/paddle/phi/kernels/xpu/roi_align_kernel.cc b/paddle/phi/kernels/xpu/roi_align_kernel.cc new file mode 100644 index 0000000000..dacb676693 --- /dev/null +++ b/paddle/phi/kernels/xpu/roi_align_kernel.cc @@ -0,0 +1,149 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/roi_align_kernel.h" + +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void RoiAlignKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& boxes, + const paddle::optional& boxes_num, + int pooled_height, + int pooled_width, + float spatial_scale, + int sampling_ratio, + bool aligned, + DenseTensor* out) { + const auto& in_dims = x.dims(); + int batch_size = in_dims[0]; + int channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + + int rois_num = boxes.dims()[0]; + + if (rois_num == 0) return; + + DenseTensor roi_batch_id_list; + roi_batch_id_list.Resize({rois_num}); + auto cplace = phi::CPUPlace(); + int* roi_batch_id_data = dev_ctx.template HostAlloc(&roi_batch_id_list); + auto xplace = dev_ctx.GetPlace(); + int rois_batch_size = 0; + int* cpu_lod = nullptr; + if (boxes_num) { + rois_batch_size = boxes_num->numel(); + PADDLE_ENFORCE_EQ( + rois_batch_size, + batch_size, + errors::InvalidArgument( + "The rois_batch_size and imgs " + "batch_size must be the same. But received rois_batch_size = %d, " + "batch_size = %d", + rois_batch_size, + batch_size)); + + std::vector rois_num_list(rois_batch_size); + paddle::memory::Copy(cplace, + rois_num_list.data(), + xplace, + boxes_num->data(), + sizeof(int) * rois_batch_size); + cpu_lod = new int[rois_batch_size + 1]; + cpu_lod[0] = 0; + for (int i = 0; i < rois_batch_size; i++) { + cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i]; + } + } else { + auto lod = boxes.lod(); + PADDLE_ENFORCE_EQ(lod.empty(), + false, + errors::InvalidArgument("Input(ROIs) in ROIAlignOp does " + "not contain LoD information.")); + auto rois_lod = lod.back(); + rois_batch_size = rois_lod.size() - 1; + PADDLE_ENFORCE_EQ( + rois_batch_size, + batch_size, + errors::InvalidArgument( + "The batch size of rois and batch size " + "of images must be the same. But received rois batch size = %d, " + "and images batch size = %d", + rois_batch_size, + batch_size)); + int rois_num_with_lod = rois_lod[rois_batch_size]; + PADDLE_ENFORCE_EQ( + rois_num, + rois_num_with_lod, + errors::InvalidArgument( + "The actual number of rois and the number of rois " + "provided from Input(RoIsLoD) in RoIAlign must be the same." + " But received actual number of rois is %d, and the number " + "of rois from RoIsLoD is %d", + rois_num, + rois_num_with_lod)); + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + roi_batch_id_data[i] = n; + } + } + cpu_lod = new int[rois_batch_size + 1]; + for (int i = 0; i < rois_batch_size + 1; i++) { + cpu_lod[i] = rois_lod[i]; + } + } + + int* roi_id_data = nullptr; + int r = xpu_malloc(reinterpret_cast(&roi_id_data), + (rois_batch_size + 1) * sizeof(int)); + PADDLE_ENFORCE_XPU_SUCCESS(r); + paddle::memory::Copy(xplace, + roi_id_data, + cplace, + cpu_lod, + (rois_batch_size + 1) * sizeof(int)); + delete[] cpu_lod; + r = xpu::roi_align(dev_ctx.x_context(), + x.data(), + dev_ctx.template Alloc(out), + boxes.data(), + roi_id_data, + batch_size, + channels, + height, + width, + out->dims()[0], + pooled_height, + pooled_width, + spatial_scale, + sampling_ratio, + true, + aligned); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "roi_align_grad"); + if (dev_ctx.x_context()->xpu_stream) { + dev_ctx.Wait(); + } + xpu_free(roi_id_data); +} + +} // namespace phi + +PD_REGISTER_KERNEL(roi_align, XPU, ALL_LAYOUT, phi::RoiAlignKernel, float) {} -- GitLab