move gelu/gelu_grad/generate_proposals_v2 kernel to phi (#45471)

* move xpu kernel to phi * delete fluid file * fix compile * add guard, test=kunlun * xpu set constant * fix xpu error, test=kunlun

move gelu/gelu_grad/generate_proposals_v2 kernel to phi (#45471)
* move xpu kernel to phi * delete fluid file * fix compile * add guard, test=kunlun * xpu set constant * fix xpu error, test=kunlun
8b24c795 · Leo Chen · GitHub · c857841e · 8b24c795 · c857841e
7 changed file
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -42,8 +42,7 @@ if(WITH_XPU)
  detection_library(iou_similarity_op SRCS iou_similarity_op.cc
                    iou_similarity_op_xpu.cc)
  detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc)
-  detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc
+  detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc)
-                    generate_proposals_v2_op_xpu.cc)
 elseif(WITH_MLU)
  detection_library(iou_similarity_op SRCS iou_similarity_op.cc
                    iou_similarity_op_mlu.cc)

--- a/paddle/fluid/operators/gelu_op_xpu.cc
+++ b/paddle/fluid/operators/gelu_op_xpu.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <memory>
-#include <string>
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/tensor.h"
-namespace paddle {
-namespace operators {
-using Tensor = framework::Tensor;
-template <typename DeviceContext, typename T>
-class GeluXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<Tensor>("X");
-    auto* out = ctx.Output<Tensor>("Out");
-    auto place = ctx.GetPlace();
-    const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
-    XPUType* y_data = reinterpret_cast<XPUType*>(out->mutable_data<T>(place));
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-    int r = xpu::gelu<XPUType>(dev_ctx.x_context(), x_data, y_data, x->numel());
-    PADDLE_ENFORCE_EQ(
-        r,
-        XPU_SUCCESS,
-        platform::errors::External(
-            "XPU gelu kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r]));
-  }
-};
-template <typename DeviceContext, typename T>
-class GeluGradXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<Tensor>("X");
-    auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
-    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto place = ctx.GetPlace();
-    const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
-    const XPUType* dout_data =
-        reinterpret_cast<const XPUType*>(dout->data<T>());
-    XPUType* dx_data = reinterpret_cast<XPUType*>(dx->mutable_data<T>(place));
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-    int r = xpu::gelu_grad<XPUType>(dev_ctx.x_context(),
-                                    x_data,
-                                    nullptr,
-                                    dout_data,
-                                    dx_data,
-                                    dout->numel());
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU gelu_grad kernel return wrong value[%d %s]",
-                          r,
-                          XPUAPIErrorMsg[r]));
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    gelu,
-    ops::GeluXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::GeluXPUKernel<paddle::platform::XPUDeviceContext,
-                       paddle::platform::float16>);
-REGISTER_OP_XPU_KERNEL(
-    gelu_grad,
-    ops::GeluGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::GeluGradXPUKernel<paddle::platform::XPUDeviceContext,
-                           paddle::platform::float16>);
--- a/paddle/phi/kernels/funcs/math_function.h
+++ b/paddle/phi/kernels/funcs/math_function.h
@@ -53,6 +53,22 @@ struct SetConstant {
                  T num);
 };
+#ifdef PADDLE_WITH_XPU
+template <typename T>
+struct SetConstant<XPUContext, T> {
+  void operator()(const XPUContext& context,
+                  paddle::framework::Tensor* tensor,
+                  T num);
+};
+template <typename T>
+struct SetConstant<paddle::platform::XPUDeviceContext, T> {
+  void operator()(const paddle::platform::XPUDeviceContext& context,
+                  paddle::framework::Tensor* tensor,
+                  T num);
+};
+#endif
 template <typename Place>
 void set_constant_with_place(const paddle::platform::DeviceContext& context,
                             paddle::framework::Tensor* tensor,

--- a/paddle/phi/kernels/funcs/math_function_impl.h
+++ b/paddle/phi/kernels/funcs/math_function_impl.h
@@ -27,20 +27,27 @@ using paddle::framework::To32BitIndex;
 template <typename DeviceContext, typename T>
 void SetConstant<DeviceContext, T>::operator()(
    const DeviceContext& context, paddle::framework::Tensor* tensor, T num) {
-  bool xpu_place = false;
+  auto t = paddle::framework::EigenVector<T>::Flatten(*tensor);
+  t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
+}
 #ifdef PADDLE_WITH_XPU
-  if (paddle::platform::is_xpu_place(context.GetPlace())) {
+template <typename T>
-    xpu_place = true;
+void SetConstant<XPUContext, T>::operator()(const XPUContext& context,
-    phi::VisitDataType(
+                                            paddle::framework::Tensor* tensor,
-        tensor->dtype(),
+                                            T num) {
-        TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
+  phi::VisitDataType(tensor->dtype(),
-  }
+                     TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
-#endif
+}
-  if (!xpu_place) {
+template <typename T>
-    auto t = paddle::framework::EigenVector<T>::Flatten(*tensor);
+void SetConstant<paddle::platform::XPUDeviceContext, T>::operator()(
-    t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
+    const paddle::platform::XPUDeviceContext& context,
-  }
+    paddle::framework::Tensor* tensor,
+    T num) {
+  phi::VisitDataType(tensor->dtype(),
+                     TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
 }
+#endif
 template <typename DeviceContext, typename T, int Rank>
 void Transpose<DeviceContext, T, Rank>::operator()(

--- a/paddle/phi/kernels/xpu/gelu_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/gelu_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/gelu_grad_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/kernel_registry.h"
+namespace phi {
+template <typename T, typename Context>
+void GeluGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    bool approximate,
+                    DenseTensor* x_grad) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  dev_ctx.template Alloc<T>(x_grad);
+  int r = xpu::gelu_grad<XPUType>(
+      dev_ctx.x_context(),
+      reinterpret_cast<const XPUType*>(x.data<T>()),
+      nullptr,
+      reinterpret_cast<const XPUType*>(out_grad.data<T>()),
+      reinterpret_cast<XPUType*>(x_grad->data<T>()),
+      x_grad->numel());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "gelu_grad");
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(gelu_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::GeluGradKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/xpu/gelu_kernel.cc
+++ b/paddle/phi/kernels/xpu/gelu_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/gelu_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/kernel_registry.h"
+namespace phi {
+template <typename T, typename Context>
+void GeluKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                bool approximate,
+                DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  dev_ctx.template Alloc<T>(out);
+  int r = xpu::gelu<XPUType>(dev_ctx.x_context(),
+                             reinterpret_cast<const XPUType*>(x.data<T>()),
+                             reinterpret_cast<XPUType*>(out->data<T>()),
+                             out->numel());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "gelu");
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(
+    gelu, XPU, ALL_LAYOUT, phi::GeluKernel, float, phi::dtype::float16) {}
--- a/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc