diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index b13a74ae72d3a0e0d52ecc08deaeb2b1599d255e..d965e1ace5fc3182f79e5e92906f0ee448bce24d 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -41,7 +41,7 @@ endif() if(WITH_XPU) detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_xpu.cc) - detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc) + detection_library(prior_box_op SRCS prior_box_op.cc) detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc) elseif(WITH_MLU) detection_library(iou_similarity_op SRCS iou_similarity_op.cc diff --git a/paddle/fluid/operators/detection/prior_box_op_xpu.cc b/paddle/fluid/operators/detection/prior_box_op_xpu.cc deleted file mode 100644 index c52f64fb2c89788ab7047f5169f2ebaf33b8abfe..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/detection/prior_box_op_xpu.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/operators/detection/prior_box_op.h" -#include "paddle/fluid/platform/device/device_wrapper.h" - -namespace paddle { -namespace operators { - -template -class PriorBoxOpXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); - - auto min_sizes = ctx.Attr>("min_sizes"); - auto max_sizes = ctx.Attr>("max_sizes"); - auto input_aspect_ratio = ctx.Attr>("aspect_ratios"); - auto variances = ctx.Attr>("variances"); - auto flip = ctx.Attr("flip"); - auto clip = ctx.Attr("clip"); - auto min_max_aspect_ratios_order = - ctx.Attr("min_max_aspect_ratios_order"); - - std::vector aspect_ratios; - ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios); - - K step_w = static_cast(ctx.Attr("step_w")); - K step_h = static_cast(ctx.Attr("step_h")); - K offset = static_cast(ctx.Attr("offset")); - - auto img_width = image->dims()[3]; - auto img_height = image->dims()[2]; - - auto feature_width = input->dims()[3]; - auto feature_height = input->dims()[2]; - - K step_width, step_height; - if (step_w == 0 || step_h == 0) { - step_width = static_cast(img_width) / feature_width; - step_height = static_cast(img_height) / feature_height; - } else { - step_width = step_w; - step_height = step_h; - } - - int num_priors = aspect_ratios.size() * min_sizes.size(); - if (max_sizes.size() > 0) { - num_priors += max_sizes.size(); - } - - boxes->mutable_data(ctx.GetPlace()); - vars->mutable_data(ctx.GetPlace()); - - const auto& dev_ctx = - ctx.template device_context(); - auto boxes_data = boxes->data(); - auto vars_data = vars->data(); - xpu::VectorParam aspect_ratios_param{ - aspect_ratios.data(), static_cast(aspect_ratios.size()), nullptr}; - xpu::VectorParam min_sizes_param{ - min_sizes.data(), static_cast(min_sizes.size()), nullptr}; - xpu::VectorParam max_sizes_param{ - max_sizes.data(), static_cast(max_sizes.size()), nullptr}; - - int ret = xpu::gen_prior_box(dev_ctx.x_context(), - boxes_data, - aspect_ratios_param, - min_sizes_param, - max_sizes_param, - feature_height, - feature_width, - img_height, - img_width, - offset, - step_height, - step_width, - clip, - min_max_aspect_ratios_order); - PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gen_prior_box"); - - int box_num = feature_height * feature_width * num_priors; - int vlen = variances.size(); - std::vector var_cpu(vlen * box_num); - for (int i = 0; i < box_num; ++i) { - std::copy(variances.begin(), variances.end(), var_cpu.begin() + i * vlen); - } - ret = xpu_memcpy(vars_data, - var_cpu.data(), - var_cpu.size() * sizeof(K), - XPUMemcpyKind::XPU_HOST_TO_DEVICE); - PADDLE_ENFORCE_XPU_SUCCESS(ret); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL(prior_box, ops::PriorBoxOpXPUKernel); - -#endif diff --git a/paddle/fluid/operators/softmax_op_xpu.cc b/paddle/fluid/operators/softmax_op_xpu.cc deleted file mode 100644 index 9c415a5f4291b40450277f1a05c1586dbdac5abf..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/softmax_op_xpu.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/axis_utils.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -using DDim = framework::DDim; - -template -class SoftmaxXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); - const int rank = x->dims().size(); - int axis = phi::funcs::CanonicalAxis(context.Attr("axis"), rank); - - // allocate memory on device. - out->mutable_data(context.GetPlace()); - - std::vector x_dims; - for (int i = 0; i < rank; i++) { - x_dims.push_back(x->dims()[i]); - } - if (axis < 0) { - axis += rank; - } - - auto& dev_ctx = context.template device_context(); - - int r = XPU_SUCCESS; - auto version = platform::get_xpu_version(context.GetPlace().GetDeviceId()); - if (version == phi::backends::xpu::XPUVersion::XPU1) { - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm(x->numel()); - r = xpu::clip_v2(dev_ctx.x_context(), - reinterpret_cast(x->data()), - clip_x_data_l3, - x->numel(), - static_cast(-1e20), - static_cast(1e20)); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU API(clip_v2) return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - r = xpu::softmax(dev_ctx.x_context(), - clip_x_data_l3, - reinterpret_cast(out->data()), - x_dims, - axis); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External("XPU API(softmax2d_forward) return wrong " - "value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } else { - r = xpu::softmax(dev_ctx.x_context(), - reinterpret_cast(x->data()), - reinterpret_cast(out->data()), - x_dims, - axis); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External("XPU API(softmax2d_forward) return wrong " - "value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } - } -}; - -template -class SoftmaxGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* out = context.Input("Out"); - auto* dout = context.Input(framework::GradVarName("Out")); - auto* dx = context.Output(framework::GradVarName("X")); - const int rank = dx->dims().size(); - int axis = phi::funcs::CanonicalAxis(context.Attr("axis"), rank); - - // allocate memory on device. - dx->mutable_data(context.GetPlace()); - - std::vector x_dims; - for (int i = 0; i < rank; i++) { - x_dims.push_back(dx->dims()[i]); - } - if (axis < 0) { - axis += rank; - } - - auto& dev_ctx = context.template device_context(); - int r = xpu::softmax_grad( - dev_ctx.x_context(), - reinterpret_cast(out->data()), - reinterpret_cast(dout->data()), - reinterpret_cast(dx->data()), - x_dims, - axis); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External("XPU API(softmax2d_backward) return wrong " - "value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL( - softmax, - ops::SoftmaxXPUKernel, - ops::SoftmaxXPUKernel); -REGISTER_OP_XPU_KERNEL( - softmax_grad, - ops::SoftmaxGradXPUKernel, - ops::SoftmaxGradXPUKernel); - -#endif // PADDLE_WITH_XPU diff --git a/paddle/phi/kernels/xpu/prior_box_kernel.cc b/paddle/phi/kernels/xpu/prior_box_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..0c6413a2e49bf326c8874dbabb04d7d325b11949 --- /dev/null +++ b/paddle/phi/kernels/xpu/prior_box_kernel.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/prior_box_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" + +namespace phi { + +template +void PriorBoxKernel(const Context& ctx, + const DenseTensor& input, + const DenseTensor& image, + const std::vector& min_sizes, + const std::vector& aspect_ratios, + const std::vector& variances, + const std::vector& max_sizes, + bool flip, + bool clip, + float step_w, + float step_h, + float offset, + bool min_max_aspect_ratios_order, + DenseTensor* out, + DenseTensor* var) { + std::vector new_aspect_ratios; + ExpandAspectRatios(aspect_ratios, flip, &new_aspect_ratios); + + T new_step_w = static_cast(step_w); + T new_step_h = static_cast(step_h); + T new_offset = static_cast(offset); + + auto img_width = image.dims()[3]; + auto img_height = image.dims()[2]; + + auto feature_width = input.dims()[3]; + auto feature_height = input.dims()[2]; + + T step_width, step_height; + if (new_step_w == 0 || new_step_h == 0) { + step_width = static_cast(img_width) / feature_width; + step_height = static_cast(img_height) / feature_height; + } else { + step_width = new_step_w; + step_height = new_step_h; + } + + int num_priors = new_aspect_ratios.size() * min_sizes.size(); + if (max_sizes.size() > 0) { + num_priors += max_sizes.size(); + } + + ctx.template Alloc(out); + ctx.template Alloc(var); + + auto boxes_data = out->data(); + auto var_data = var->data(); + xpu::VectorParam aspect_ratios_param{ + new_aspect_ratios.data(), + static_cast(new_aspect_ratios.size()), + nullptr}; + xpu::VectorParam min_sizes_param{ + min_sizes.data(), static_cast(min_sizes.size()), nullptr}; + xpu::VectorParam max_sizes_param{ + max_sizes.data(), static_cast(max_sizes.size()), nullptr}; + + int ret = xpu::gen_prior_box(ctx.x_context(), + boxes_data, + aspect_ratios_param, + min_sizes_param, + max_sizes_param, + feature_height, + feature_width, + img_height, + img_width, + new_offset, + step_height, + step_width, + clip, + min_max_aspect_ratios_order); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gen_prior_box"); + + int box_num = feature_height * feature_width * num_priors; + int vlen = variances.size(); + std::vector var_cpu(vlen * box_num); + for (int i = 0; i < box_num; ++i) { + std::copy(variances.begin(), variances.end(), var_cpu.begin() + i * vlen); + } + ctx.Wait(); + ret = xpu_memcpy(var_data, + var_cpu.data(), + var_cpu.size() * sizeof(T), + XPUMemcpyKind::XPU_HOST_TO_DEVICE); + PADDLE_ENFORCE_XPU_SUCCESS(ret); +} + +} // namespace phi + +PD_REGISTER_KERNEL(prior_box, XPU, ALL_LAYOUT, phi::PriorBoxKernel, float) {} diff --git a/paddle/phi/kernels/xpu/softmax_grad_kernel.cc b/paddle/phi/kernels/xpu/softmax_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..8d5d855e52aa94d44c7b2de283d3ffbc10999d68 --- /dev/null +++ b/paddle/phi/kernels/xpu/softmax_grad_kernel.cc @@ -0,0 +1,60 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/softmax_grad_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/axis_utils.h" + +namespace phi { + +template +void SoftmaxGradKernel(const Context& dev_ctx, + const DenseTensor& out, + const DenseTensor& out_grad, + int axis, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + const int rank = x_grad->dims().size(); + const int calc_axis = phi::funcs::CanonicalAxis(axis, rank); + + // allocate memory on device. + dev_ctx.template Alloc(x_grad); + if (x_grad->numel() == 0) { + return; + } + + std::vector x_dims; + for (int i = 0; i < rank; i++) { + x_dims.push_back(x_grad->dims()[i]); + } + + int r = xpu::softmax_grad( + dev_ctx.x_context(), + reinterpret_cast(out.data()), + reinterpret_cast(out_grad.data()), + reinterpret_cast(x_grad->data()), + x_dims, + calc_axis); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax_grad"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(softmax_grad, + XPU, + ALL_LAYOUT, + phi::SoftmaxGradKernel, + float, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/xpu/softmax_kernel.cc b/paddle/phi/kernels/xpu/softmax_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..60b1c52ca5047f7d8e6cfd0266d14fe6902a7374 --- /dev/null +++ b/paddle/phi/kernels/xpu/softmax_kernel.cc @@ -0,0 +1,74 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/softmax_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/axis_utils.h" + +namespace phi { + +template +void SoftmaxKernel(const Context& dev_ctx, + const DenseTensor& x, + int axis, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + const int rank = x.dims().size(); + const int calc_axis = phi::funcs::CanonicalAxis(axis, rank); + + // allocate memory on device. + dev_ctx.template Alloc(out); + if (out->numel() == 0) { + return; + } + + std::vector x_dims; + for (int i = 0; i < rank; i++) { + x_dims.push_back(x.dims()[i]); + } + + int r = XPU_SUCCESS; + auto version = + phi::backends::xpu::get_xpu_version(dev_ctx.GetPlace().GetDeviceId()); + if (version == phi::backends::xpu::XPUVersion::XPU1) { + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm(x.numel()); + r = xpu::clip_v2(dev_ctx.x_context(), + reinterpret_cast(x.data()), + clip_x_data_l3, + x.numel(), + static_cast(-1e20), + static_cast(1e20)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_v2"); + r = xpu::softmax(dev_ctx.x_context(), + clip_x_data_l3, + reinterpret_cast(out->data()), + x_dims, + calc_axis); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax"); + } else { + r = xpu::softmax(dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(out->data()), + x_dims, + calc_axis); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax"); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + softmax, XPU, ALL_LAYOUT, phi::SoftmaxKernel, float, phi::dtype::float16) {}