[PHI kernels] Bind XPU kernels (#53336)

* bind sparse_coo_tensor, reduce_max/max_int32, range/arange_int32, equal_bool, scatter_grad_float32, nearest_interp_int64 kernels * add more unit tests; modify compilation logic of xpu sparse kernels

[PHI kernels] Bind XPU kernels (#53336)
* bind sparse_coo_tensor, reduce_max/max_int32, range/arange_int32, equal_bool, scatter_grad_float32, nearest_interp_int64 kernels * add more unit tests; modify compilation logic of xpu sparse kernels
7e9c87c5 · RuohengMa · GitHub · 14c642cb · 7e9c87c5 · 7e9c87c5
13 changed file
--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
@@ -261,7 +261,8 @@ XPUOpMap& get_kl2_ops() {
       XPUKernelSet({phi::DataType::INT64,
                     phi::DataType::INT32,
                     phi::DataType::FLOAT16,
-                     phi::DataType::FLOAT32})},
+                     phi::DataType::FLOAT32,
+                     phi::DataType::BOOL})},
      {"exp_grad", XPUKernelSet({phi::DataType::FLOAT32})},
      {"exp", XPUKernelSet({phi::DataType::FLOAT32})},
      {"expand_as_v2",
@@ -513,7 +514,9 @@ XPUOpMap& get_kl2_ops() {
      {"multi_encoder_xpu",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"nearest_interp_v2",
-       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
+       XPUKernelSet({phi::DataType::FLOAT32,
+                     phi::DataType::FLOAT16,
+                     phi::DataType::INT64})},
      {"nearest_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
      {"not_equal",
       XPUKernelSet({phi::DataType::INT64,
@@ -545,7 +548,10 @@ XPUOpMap& get_kl2_ops() {
      {"prelu_grad",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"prod_raw", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"range", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT64})},
+      {"range",
+       XPUKernelSet({phi::DataType::FLOAT32,
+                     phi::DataType::INT64,
+                     phi::DataType::INT32})},
      {"randperm",
       XPUKernelSet({phi::DataType::INT32,
                     phi::DataType::INT64,
@@ -555,7 +561,8 @@ XPUOpMap& get_kl2_ops() {
      {"reciprocal_grad",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"reduce_max_grad", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"reduce_max", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"reduce_max",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32})},
      {"reduce_mean_grad", XPUKernelSet({phi::DataType::FLOAT32})},
      {"reduce_mean",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
@@ -615,6 +622,8 @@ XPUOpMap& get_kl2_ops() {
       XPUKernelSet({phi::DataType::INT64,
                     phi::DataType::INT32,
                     phi::DataType::FLOAT32})},
+      {"scatter_grad",
+       XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})},
      {"scatter_nd_add", XPUKernelSet({phi::DataType::FLOAT32})},
      {"sampling_id",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT64})},
@@ -673,6 +682,14 @@ XPUOpMap& get_kl2_ops() {
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"softplus", XPUKernelSet({phi::DataType::FLOAT32})},
      {"softplus_grad", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"sparse_coo_tensor",
+       XPUKernelSet({phi::DataType::FLOAT64,
+                     phi::DataType::INT64,
+                     phi::DataType::INT32,
+                     phi::DataType::UINT8,
+                     phi::DataType::INT16,
+                     phi::DataType::FLOAT32,
+                     phi::DataType::FLOAT16})},
      {"split",
       XPUKernelSet({phi::DataType::FLOAT32,
                     phi::DataType::FLOAT16,

--- a/paddle/phi/kernels/CMakeLists.txt
+++ b/paddle/phi/kernels/CMakeLists.txt
@@ -184,8 +184,14 @@ else()
    "fusion/cpu/*.cc")
 endif()
-file(GLOB kernel_xpu "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc"
+file(
-     "fusion/xpu/*.cc")
+  GLOB
+  kernel_xpu
+  "xpu/*.cc"
+  "legacy/xpu/*.cc"
+  "selected_rows/xpu/*.cc"
+  "fusion/xpu/*.cc"
+  "sparse/xpu/*.cc")
 if(WITH_MKLDNN)
  set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} get_kerneltype_forvar_utils)

--- a/paddle/phi/kernels/reduce_max_kernel.cc
+++ b/paddle/phi/kernels/reduce_max_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -62,5 +62,5 @@ PD_REGISTER_KERNEL(
 #endif
 #if defined(PADDLE_WITH_XPU)
-PD_REGISTER_KERNEL(max, XPU, ALL_LAYOUT, phi::MaxKernel, float) {}
+PD_REGISTER_KERNEL(max, XPU, ALL_LAYOUT, phi::MaxKernel, float, int) {}
 #endif
--- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/paddle/phi/kernels/sparse/xpu/sparse_utils_kernel.cc
+++ b/paddle/phi/kernels/sparse/xpu/sparse_utils_kernel.cc
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
+#include "paddle/phi/core/kernel_registry.h"
+PD_REGISTER_KERNEL(sparse_coo_tensor,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SparseCooTensorKernel,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   uint8_t,
+                   int16_t,
+                   int,
+                   int64_t) {}
--- a/paddle/phi/kernels/xpu/compare_kernel.cc
+++ b/paddle/phi/kernels/xpu/compare_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -119,7 +119,8 @@ PD_REGISTER_KERNEL(less_than_raw,
                     int,                                 \
                     int64_t,                             \
                     float,                               \
-                     phi::dtype::float16) {               \
+                     phi::dtype::float16,                 \
+                     bool) {                              \
    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \
  }                                                       \
  PD_REGISTER_KERNEL(name##_raw,                          \
@@ -129,7 +130,8 @@ PD_REGISTER_KERNEL(less_than_raw,
                     int,                                 \
                     int64_t,                             \
                     float,                               \
-                     phi::dtype::float16) {               \
+                     phi::dtype::float16,                 \
+                     bool) {                              \
    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \
  }

--- a/paddle/phi/kernels/xpu/interpolate_kernel.cc
+++ b/paddle/phi/kernels/xpu/interpolate_kernel.cc
@@ -238,7 +238,8 @@ PD_REGISTER_KERNEL(nearest_interp,
                   ALL_LAYOUT,
                   phi::NearestInterpKernel,
                   phi::dtype::float16,
-                   float) {
+                   float,
+                   int64_t) {
  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
  kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
  kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);

--- a/paddle/phi/kernels/xpu/reduce_max_kernel.cc
+++ b/paddle/phi/kernels/xpu/reduce_max_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -49,4 +49,4 @@ void MaxRawKernel(const Context& dev_ctx,
 }  // namespace phi
-PD_REGISTER_KERNEL(max_raw, XPU, ALL_LAYOUT, phi::MaxRawKernel, float) {}
+PD_REGISTER_KERNEL(max_raw, XPU, ALL_LAYOUT, phi::MaxRawKernel, float, int) {}
--- a/paddle/phi/kernels/xpu/scatter_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/scatter_grad_kernel.cc
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/scatter_grad_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "glog/logging.h"
+namespace phi {
+template <typename T, typename Context>
+void ScatterGradKernel(const Context &ctx,
+                       const DenseTensor &index,
+                       const DenseTensor &updates,
+                       const DenseTensor &out_grad,
+                       bool overwrite,
+                       DenseTensor *x_grad,
+                       DenseTensor *updates_grad) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  const auto &index_type = index.dtype();
+  bool index_type_match =
+      index_type == phi::DataType::INT32 || index_type == phi::DataType::INT64;
+  PADDLE_ENFORCE_EQ(index_type_match,
+                    true,
+                    phi::errors::InvalidArgument(
+                        "scatter_op index holds the wrong type, it holds [%s],"
+                        "but desires to be [%s] or [%s]",
+                        index_type,
+                        phi::DataType::INT32,
+                        phi::DataType::INT64));
+  T *x_grad_data = nullptr;
+  T *updates_grad_data = nullptr;
+  if (x_grad != nullptr) {
+    ctx.template Alloc<T>(x_grad);
+    x_grad_data = x_grad->data<T>();
+  }
+  if (updates_grad != nullptr) {
+    ctx.template Alloc<T>(updates_grad);
+    updates_grad_data = updates_grad->data<T>();
+  }
+  std::vector<int64_t> x_grad_shape;
+  DDim out_dims = out_grad.dims();
+  for (int i = 0; i < out_dims.size(); i++) {
+    x_grad_shape.push_back(out_dims[i]);
+  }
+  int index_size = index.numel();
+  int r;
+  if (index_type == phi::DataType::INT32) {
+    auto index_data = const_cast<int *>(index.data<int>());
+    xpu::VectorParam<int> indices{nullptr, index_size, index_data};
+    r = xpu::scatter_grad<XPUType, int>(
+        ctx.x_context(),
+        reinterpret_cast<const XPUType *>(out_grad.data<T>()),
+        indices,
+        reinterpret_cast<XPUType *>(x_grad_data),
+        reinterpret_cast<XPUType *>(updates_grad_data),
+        x_grad_shape,
+        overwrite);
+  } else if (index_type == phi::DataType::INT64) {
+    auto index_data = const_cast<int64_t *>(index.data<int64_t>());
+    xpu::VectorParam<int64_t> indices{nullptr, index_size, index_data};
+    r = xpu::scatter_grad<XPUType, int64_t>(
+        ctx.x_context(),
+        reinterpret_cast<const XPUType *>(out_grad.data<T>()),
+        indices,
+        reinterpret_cast<XPUType *>(x_grad_data),
+        reinterpret_cast<XPUType *>(updates_grad_data),
+        x_grad_shape,
+        overwrite);
+  }
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "scatter grad");
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(scatter_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::ScatterGradKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/test/xpu/test_compare_op_xpu.py
+++ b/test/xpu/test_compare_op_xpu.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -415,6 +415,33 @@ class XPUTestEqualOp(XPUOpTestWrapper):
            self.x_shape = [11, 17]
            self.y_shape = []
+    class EqualOpTestCaseBoolBase(TestCompareOpBase):
+        def config(self):
+            self.dtype = np.bool_
+            self.op_type = 'equal'
+            self.compute = np.equal
+            self.choices = [True, False]
+            self.set_shape()
+        def set_shape(self):
+            self.x_shape = [11, 17]
+            self.y_shape = [11, 17]
+        def set_case(self):
+            self.x = np.random.choice(self.choices, self.x_shape)
+            self.y = np.random.choice(self.choices, self.y_shape)
+            self.result = self.compute(self.x, self.y)
+    class EqualOpTestCaseBool1(EqualOpTestCaseBoolBase):
+        def set_shape(self):
+            self.x_shape = [11, 17]
+            self.y_shape = [1]
+    class EqualOpTestCaseBool2(EqualOpTestCaseBoolBase):
+        def set_shape(self):
+            self.x_shape = [1]
+            self.y_shape = [11, 17]
 support_types = get_xpu_op_support_types('equal')
 for stype in support_types:

--- a/test/xpu/test_reduce_max_op_xpu.py
+++ b/test/xpu/test_reduce_max_op_xpu.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,7 +45,8 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper):
                'keep_dim': self.keep_dim,
                'dim': self.axis,
            }
-            self.inputs = {'X': np.random.random(self.shape).astype("float32")}
+            self.dtype = self.in_type
+            self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)}
            if self.attrs['reduce_all']:
                self.outputs = {'Out': self.inputs['X'].max()}
            else:
@@ -74,6 +75,27 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper):
            self.reduce_all = False
            self.keep_dim = True
+    class XPUTestReduceMaxCase2(XPUTestReduceMaxBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = True
+    class XPUTestReduceMaxCase3(XPUTestReduceMaxBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = False
+    class XPUTestReduceMaxCase4(XPUTestReduceMaxBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = True
+            self.keep_dim = False
 support_types = get_xpu_op_support_types('reduce_max')
 for stype in support_types:

--- a/test/xpu/test_scatter_op_xpu.py
+++ b/test/xpu/test_scatter_op_xpu.py
-#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#  Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -139,11 +139,14 @@ class XPUTestScatterOp(XPUOpTestWrapper):
            self.op_type = "scatter"
            self.place = paddle.XPUPlace(0)
            self.dtype = self.in_type
-            self.__class__.no_need_check_grad = True
+            self.__class__.no_need_check_grad = False
        def test_check_output(self):
            self.check_output_with_place(self.place)
+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, ['X'], 'Out')
 support_types = get_xpu_op_support_types('scatter')
 for stype in support_types:

--- a/test/xpu/test_sparse_utils_op_xpu.py
+++ b/test/xpu/test_sparse_utils_op_xpu.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import paddle
+class TestSparseCreate(unittest.TestCase):
+    def test_create_coo_by_tensor(self):
+        indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]]
+        values = [1, 2, 3, 4, 5]
+        dense_shape = [3, 4]
+        dense_indices = paddle.to_tensor(indices)
+        dense_elements = paddle.to_tensor(values, dtype='float32')
+        coo = paddle.sparse.sparse_coo_tensor(
+            dense_indices, dense_elements, dense_shape, stop_gradient=False
+        )
+        assert np.array_equal(indices, coo.indices().numpy())
+        assert np.array_equal(values, coo.values().numpy())
+    def test_create_coo_by_np(self):
+        indices = [[0, 1, 2], [1, 2, 0]]
+        values = [1.0, 2.0, 3.0]
+        dense_shape = [3, 3]
+        coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape)
+        assert np.array_equal(3, coo.nnz())
+        assert np.array_equal(indices, coo.indices().numpy())
+        assert np.array_equal(values, coo.values().numpy())
+    def test_place(self):
+        indices = [[0, 1], [0, 1]]
+        values = [1.0, 2.0]
+        dense_shape = [2, 2]
+        coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape)
+        assert coo.place.is_xpu_place()
+        assert coo.values().place.is_xpu_place()
+        assert coo.indices().place.is_xpu_place()
+    def test_dtype(self):
+        indices = [[0, 1], [0, 1]]
+        values = [1.0, 2.0]
+        dense_shape = [2, 2]
+        indices = paddle.to_tensor(indices, dtype='int32')
+        values = paddle.to_tensor(values, dtype='float32')
+        coo = paddle.sparse.sparse_coo_tensor(
+            indices, values, dense_shape, dtype='float64'
+        )
+        assert coo.dtype == paddle.float64
+    def test_create_coo_no_shape(self):
+        indices = [[0, 1], [0, 1]]
+        values = [1.0, 2.0]
+        indices = paddle.to_tensor(indices, dtype='int32')
+        values = paddle.to_tensor(values, dtype='float32')
+        coo = paddle.sparse.sparse_coo_tensor(indices, values)
+        assert [2, 2] == coo.shape
+if __name__ == "__main__":
+    unittest.main()