From 750abc2cf6ced0caa38213e44fb1564684a4c8d7 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Wed, 31 Aug 2022 13:57:13 +0800
Subject: [PATCH] [XPU]Migrate argsort and arg_max XPU kernel into Phi (#45576)

* [XPU]Migrate argsort and arg_max XPU kernel into Phi

* test=kunlun

* test=kunlun
---
 paddle/fluid/operators/arg_max_op_xpu.cc      |  79 --------
 paddle/phi/kernels/xpu/arg_min_max_kernel.cc  |  67 +++++++
 .../kernels/xpu/argsort_kernel.cc}            | 189 ++++++++----------
 3 files changed, 154 insertions(+), 181 deletions(-)
 delete mode 100644 paddle/fluid/operators/arg_max_op_xpu.cc
 create mode 100644 paddle/phi/kernels/xpu/arg_min_max_kernel.cc
 rename paddle/{fluid/operators/argsort_op_xpu.cc => phi/kernels/xpu/argsort_kernel.cc} (58%)
diff --git a/paddle/fluid/operators/arg_max_op_xpu.cc b/paddle/fluid/operators/arg_max_op_xpu.cc
deleted file mode 100644
index 1077a73a827..00000000000
--- a/paddle/fluid/operators/arg_max_op_xpu.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef PADDLE_WITH_XPU
-
-#include "paddle/fluid/operators/arg_min_max_op_base.h"
-
-namespace paddle {
-namespace operators {
-using Tensor = framework::Tensor;
-
-template <typename DeviceContext, typename T>
-class ArgMaxXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<framework::LoDTensor>("X");
-    auto* out = ctx.Output<framework::LoDTensor>("Out");
-    auto dtype = ctx.Attr<int>("dtype");
-    PADDLE_ENFORCE_EQ(
-        (dtype < 0 || dtype == 2 || dtype == 3),
-        true,
-        platform::errors::InvalidArgument(
-            "The attribute of dtype in xpu argmin/argmax must be [%s] or [%s], "
-            "but "
-            "received [%s]",
-            paddle::framework::DataTypeToString(
-                framework::proto::VarType::INT64),
-            paddle::framework::DataTypeToString(
-                framework::proto::VarType::INT32),
-            paddle::framework::DataTypeToString(
-                static_cast<framework::proto::VarType::Type>(dtype))));
-
-    out->template mutable_data<int64_t>(ctx.GetPlace());
-    auto axis = ctx.Attr<int64_t>("axis");
-    const bool& flatten = ctx.Attr<bool>("flatten");
-    framework::DDim x_dims;
-    if (flatten) {
-      x_dims = phi::make_ddim({x->numel()});
-      // if flatten, the axis just as 0
-      axis = 0;
-    } else {
-      x_dims = x->dims();
-      if (axis < 0) axis += x_dims.size();
-    }
-    auto xdims_vec = phi::vectorize<int>(x_dims);
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-    int r = xpu::argmax(dev_ctx.x_context(),
-                        x->data<T>(),
-                        out->data<int64_t>(),
-                        xdims_vec,
-                        axis);
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU argmax kernel return wrong value[%d %s].",
-                          r,
-                          XPUAPIErrorMsg[r]));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    arg_max, ops::ArgMaxXPUKernel<paddle::platform::XPUDeviceContext, float>);
-
-#endif
diff --git a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc
new file mode 100644
index 00000000000..a48e2155a25
--- /dev/null
+++ b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/arg_min_max_kernel.h"
+
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/ddim.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void ArgMaxKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const Scalar& axis,
+                  bool keepdims,
+                  bool flatten,
+                  int dtype,
+                  DenseTensor* out) {
+  PADDLE_ENFORCE_EQ(
+      (dtype < 0 || dtype == 2 || dtype == 3),
+      true,
+      errors::InvalidArgument(
+          "The attribute of dtype in xpu argmin/argmax must be [%s] or [%s], "
+          "but "
+          "received [%s]",
+          DataType::INT64,
+          DataType::INT32,
+          dtype));
+  dev_ctx.template Alloc<int64_t>(out);
+
+  DDim x_dims;
+  int axis_val = axis.to<int>();
+  if (flatten) {
+    x_dims = phi::make_ddim({x.numel()});
+    // if flatten, the axis just as 0
+    axis_val = 0;
+  } else {
+    x_dims = x.dims();
+    if (axis_val < 0) axis_val += x_dims.size();
+  }
+  auto xdims_vec = phi::vectorize<int>(x_dims);
+  int r = xpu::argmax(dev_ctx.x_context(),
+                      x.data<T>(),
+                      out->data<int64_t>(),
+                      xdims_vec,
+                      axis_val);
+  PADDLE_ENFORCE_EQ(
+      r,
+      XPU_SUCCESS,
+      errors::External("XPU argmax kernel return wrong value[%d %s].",
+                       r,
+                       XPUAPIErrorMsg[r]));
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(arg_max, XPU, ALL_LAYOUT, phi::ArgMaxKernel, float) {}
diff --git a/paddle/fluid/operators/argsort_op_xpu.cc b/paddle/phi/kernels/xpu/argsort_kernel.cc
similarity index 58%
rename from paddle/fluid/operators/argsort_op_xpu.cc
rename to paddle/phi/kernels/xpu/argsort_kernel.cc
index 95837841cce..80db142e15d 100644
--- a/paddle/fluid/operators/argsort_op_xpu.cc
+++ b/paddle/phi/kernels/xpu/argsort_kernel.cc
@@ -1,23 +1,23 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef PADDLE_WITH_XPU
-
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/argsort_kernel.h"
+
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
 
 const int XPU_SORT_MAX_SIZE = 16384;
 
@@ -34,9 +34,9 @@ static inline void xpu_argsort(xpu::Context* ctx,
   PADDLE_ENFORCE_EQ(
       ret,
       XPU_SUCCESS,
-      platform::errors::External("XPU sort kernel return wrong value[%d %s].",
-                                 ret,
-                                 XPUAPIErrorMsg[ret]));
+      errors::External("XPU sort kernel return wrong value[%d %s].",
+                       ret,
+                       XPUAPIErrorMsg[ret]));
 }
 
 template <typename T>
@@ -46,12 +46,12 @@ static inline void xpu_transpose(xpu::Context* ctx,
                                  const std::vector<int>& xshape,
                                  const std::vector<int>& permute) {
   int ret = xpu::transpose(ctx, x, y, xshape, permute);
-  PADDLE_ENFORCE_EQ(ret,
-                    XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU transpose kernel return wrong value[%d %s]",
-                        ret,
-                        XPUAPIErrorMsg[ret]));
+  PADDLE_ENFORCE_EQ(
+      ret,
+      XPU_SUCCESS,
+      errors::External("XPU transpose kernel return wrong value[%d %s]",
+                       ret,
+                       XPUAPIErrorMsg[ret]));
 }
 
 template <typename TX, typename TY>
@@ -60,9 +60,9 @@ static inline void xpu_cast(xpu::Context* ctx, const TX* x, TY* y, int len) {
   PADDLE_ENFORCE_EQ(
       ret,
       XPU_SUCCESS,
-      platform::errors::External("XPU cast kernel return wrong value[%d %s]",
-                                 ret,
-                                 XPUAPIErrorMsg[ret]));
+      errors::External("XPU cast kernel return wrong value[%d %s]",
+                       ret,
+                       XPUAPIErrorMsg[ret]));
 }
 
 template <typename T,
@@ -179,82 +179,67 @@ struct XPUArgsort<int64_t, true, true> {
   }
 };
 
-template <typename T>
-class ArgsortXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<framework::Tensor>("X");
-    auto* output = ctx.Output<framework::Tensor>("Out");
-    auto* indices = ctx.Output<framework::Tensor>("Indices");
-    int axis = ctx.Attr<int>("axis");
-    bool descending = ctx.Attr<bool>("descending");
-
-    auto in_dims = input->dims();
-    axis = (axis < 0) ? (in_dims.size() + axis) : axis;
-    int n = in_dims[axis];
-
-    PADDLE_ENFORCE_LT(
-        n,
-        XPU_SORT_MAX_SIZE,
-        platform::errors::InvalidArgument(
-            "The axis dimension of Input should less than %d, but got %d.",
-            XPU_SORT_MAX_SIZE,
-            in_dims[axis]));
-
-    auto input_data = input->data<T>();
-    auto output_data = output->mutable_data<T>(ctx.GetPlace());
-    auto indices_data = indices->mutable_data<int64_t>(ctx.GetPlace());
-
-    auto& dev_ctx =
-        ctx.template device_context<paddle::platform::XPUDeviceContext>();
-    int len_before = phi::product(phi::slice_ddim(in_dims, 0, axis));
-    int len_after =
-        phi::product(phi::slice_ddim(in_dims, axis + 1, in_dims.size()));
-    bool int64_need_cast =
-        (std::is_same<T, int64_t>::value && n > (XPU_SORT_MAX_SIZE / 2))
-            ? true
-            : false;
-    bool index_need_cast = (n > (XPU_SORT_MAX_SIZE / 2)) ? true : false;
-    std::vector<int> permute_vec{0, 2, 1};
-    std::vector<int> data_shape{len_before, n, len_after};
-
-    if (int64_need_cast) {
-      XPUArgsort<T, true, true>()(dev_ctx.x_context(),
+template <typename T, typename Context>
+void ArgsortKernel(const Context& dev_ctx,
+                   const DenseTensor& input,
+                   int axis,
+                   bool descending,
+                   DenseTensor* output,
+                   DenseTensor* indices) {
+  auto in_dims = input.dims();
+  axis = (axis < 0) ? (in_dims.size() + axis) : axis;
+  int n = in_dims[axis];
+
+  PADDLE_ENFORCE_LT(
+      n,
+      XPU_SORT_MAX_SIZE,
+      errors::InvalidArgument(
+          "The axis dimension of Input should less than %d, but got %d.",
+          XPU_SORT_MAX_SIZE,
+          in_dims[axis]));
+
+  auto input_data = input.data<T>();
+  auto output_data = dev_ctx.template Alloc<T>(output);
+  auto indices_data = dev_ctx.template Alloc<int64_t>(indices);
+
+  int len_before = phi::product(phi::slice_ddim(in_dims, 0, axis));
+  int len_after =
+      phi::product(phi::slice_ddim(in_dims, axis + 1, in_dims.size()));
+  bool int64_need_cast =
+      (std::is_same<T, int64_t>::value && n > (XPU_SORT_MAX_SIZE / 2)) ? true
+                                                                       : false;
+  bool index_need_cast = (n > (XPU_SORT_MAX_SIZE / 2)) ? true : false;
+  std::vector<int> permute_vec{0, 2, 1};
+  std::vector<int> data_shape{len_before, n, len_after};
+
+  if (int64_need_cast) {
+    XPUArgsort<T, true, true>()(dev_ctx.x_context(),
+                                input_data,
+                                output_data,
+                                indices_data,
+                                data_shape,
+                                permute_vec,
+                                descending);
+  } else if (index_need_cast) {
+    XPUArgsort<T, false, true>()(dev_ctx.x_context(),
+                                 input_data,
+                                 output_data,
+                                 indices_data,
+                                 data_shape,
+                                 permute_vec,
+                                 descending);
+  } else {
+    XPUArgsort<T, false, false>()(dev_ctx.x_context(),
                                   input_data,
                                   output_data,
                                   indices_data,
                                   data_shape,
                                   permute_vec,
                                   descending);
-    } else if (index_need_cast) {
-      XPUArgsort<T, false, true>()(dev_ctx.x_context(),
-                                   input_data,
-                                   output_data,
-                                   indices_data,
-                                   data_shape,
-                                   permute_vec,
-                                   descending);
-    } else {
-      XPUArgsort<T, false, false>()(dev_ctx.x_context(),
-                                    input_data,
-                                    output_data,
-                                    indices_data,
-                                    data_shape,
-                                    permute_vec,
-                                    descending);
-    }
   }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
+}
 
-REGISTER_OP_XPU_KERNEL(argsort,
-                       ops::ArgsortXPUKernel<float>,
-                       ops::ArgsortXPUKernel<int>,
-                       ops::ArgsortXPUKernel<int64_t>);
+}  // namespace phi
 
-#endif
+PD_REGISTER_KERNEL(
+    argsort, XPU, ALL_LAYOUT, phi::ArgsortKernel, float, int, int64_t) {}
-- 
GitLab