From ac2a94c768be26d11e36270652f154deb2ea4749 Mon Sep 17 00:00:00 2001
From: houj04 <35131887+houj04@users.noreply.github.com>
Date: Fri, 4 Nov 2022 15:28:58 +0800
Subject: [PATCH] [XPU] add cumsum op. test=kunlun (#47585)

* [XPU] add cumsum op. test=kunlun

* try to fix linker. test=kunlun

* try to fix linker. test=kunlun

* try to fix linker. test=kunlun

* debug. test=kunlun

* update xpu.cmake. remove unnecessary codes. test=kunlun.
---
 cmake/external/xpu.cmake                      |   4 +-
 .../fluid/platform/device/xpu/xpu2_op_list.h  |   4 +
 paddle/phi/kernels/xpu/cum_kernel.cc          |  75 +++++++++++++
 .../tests/unittests/xpu/test_cumsum_op_xpu.py | 100 ++++++++++++++++++
 python/setup.py.in                            |   8 +-
 5 files changed, 186 insertions(+), 5 deletions(-)
 create mode 100644 paddle/phi/kernels/xpu/cum_kernel.cc
 create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
index d409ac062a..bd86e5ee5b 100644
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
 if(NOT DEFINED XPU_BASE_URL)
   set(XPU_BASE_URL_WITHOUT_DATE
       "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221016")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221104")
 else()
   set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()
@@ -19,7 +19,7 @@ endif()
 if(NOT DEFINED XPU_XDNN_BASE_URL)
   set(XPU_XDNN_BASE_URL_WITHOUT_DATE
       "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
-  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221016")
+  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221103")
 else()
   set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
 endif()
diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
index 07337dc747..692f0ab377 100644
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -123,6 +123,10 @@ XPUOpMap& get_kl2_ops() {
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"conv2d_transpose",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"cumsum",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::INT32, XPUPlace()),
+                     pOpKernelType(vartype::INT64, XPUPlace())})},
       {"deformable_conv_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"deformable_conv",
diff --git a/paddle/phi/kernels/xpu/cum_kernel.cc b/paddle/phi/kernels/xpu/cum_kernel.cc
new file mode 100644
index 0000000000..17eca40086
--- /dev/null
+++ b/paddle/phi/kernels/xpu/cum_kernel.cc
@@ -0,0 +1,75 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/cum_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void CumsumKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const Scalar& axis,
+                  bool flatten,
+                  bool exclusive,
+                  bool reverse,
+                  DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  dev_ctx.template Alloc<T>(out);
+
+  // prepare for call xdnn api
+  std::vector<int> x_shape = phi::vectorize<int>(x.dims());
+  int axis_as_int = axis.to<int>();
+
+  if (flatten) {
+    // flatten to 1-dim vector
+    x_shape = {static_cast<int>(x.numel())};
+    axis_as_int = 0;
+  } else {
+    // not flatten
+    // check axis_as_int
+    auto out_dims = out->dims();
+
+    PADDLE_ENFORCE_EQ(
+        axis_as_int < out_dims.size() && axis_as_int >= (0 - out_dims.size()),
+        true,
+        phi::errors::OutOfRange(
+            "Attr(axis) is out of range, It's expected "
+            "to be in range of [-%d, %d]. But received Attr(axis) = %d.",
+            out_dims.size(),
+            out_dims.size() - 1,
+            axis_as_int));
+    if (axis_as_int < 0) {
+      axis_as_int += out_dims.size();
+    }
+  }
+
+  // template<typename T> DLL_EXPORT int cumsum(Context* ctx, const T* x, T* y,
+  // const std::vector<int>& xshape, bool reverse, bool exclusive, int axis);
+  int r = cumsum(dev_ctx.x_context(),
+                 reinterpret_cast<const XPUType*>(x.data<T>()),
+                 reinterpret_cast<XPUType*>(out->data<T>()),
+                 x_shape,
+                 reverse,
+                 exclusive,
+                 axis_as_int);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    cumsum, XPU, ALL_LAYOUT, phi::CumsumKernel, float, int, int64_t) {}
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
new file mode 100644
index 0000000000..5d15ddcff6
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
@@ -0,0 +1,100 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import sys
+
+sys.path.append("..")
+
+import paddle
+import paddle.fluid.core as core
+
+from op_test_xpu import XPUOpTest
+from xpu.get_test_cover_info import (
+    create_test_class,
+    get_xpu_op_support_types,
+    XPUOpTestWrapper,
+)
+
+paddle.enable_static()
+
+
+class XPUTestCumsumOP(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'cumsum'
+        self.use_dynamic_create_class = False
+
+    class TestCumsumOPBase(XPUOpTest):
+        def setUp(self):
+            self.place = paddle.XPUPlace(0)
+            self.xpu_version = core.get_xpu_device_version(0)
+            self.init_dtype()
+            self.set_case()
+
+        def set_case(self):
+            self.op_type = 'cumsum'
+            self.init_config()
+
+            self.data = np.random.uniform(
+                -100.0, 100.0, self.input_shape
+            ).astype(self.dtype)
+            reference_out = np.cumsum(self.data, axis=self.axis)
+            self.inputs = {
+                'X': self.data,
+            }
+            self.attrs = {
+                'use_xpu': True,
+                'axis': self.axis,
+                'flatten': True if self.axis is None else False,
+            }
+            self.outputs = {'Out': reference_out}
+
+        def init_dtype(self):
+            self.dtype = self.in_type
+
+        def test_check_output(self):
+            self.check_output_with_place(self.place)
+
+        def init_config(self):
+            self.input_shape = (2, 5)
+            self.axis = None
+
+    class XPUTestCumsum1(TestCumsumOPBase):
+        def init_config(self):
+            self.input_shape = [2, 768]
+            self.axis = 0
+
+    class XPUTestCumsum2(TestCumsumOPBase):
+        def init_config(self):
+            self.input_shape = [3, 8, 4096]
+            self.axis = 1
+
+    class XPUTestCumsum3(TestCumsumOPBase):
+        def init_config(self):
+            self.input_shape = [1024]
+            self.axis = 0
+
+    class XPUTestCumsum4(TestCumsumOPBase):
+        def init_config(self):
+            self.input_shape = [2, 2, 255]
+            self.axis = -1
+
+
+support_types = get_xpu_op_support_types('cumsum')
+for stype in support_types:
+    create_test_class(globals(), XPUTestCumsumOP, stype)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/setup.py.in b/python/setup.py.in
index b58d7b052d..e596e55b55 100755
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -570,9 +570,11 @@ if '${WITH_XPU}' == 'ON':
             if os.system(command) != 0:
                 raise Exception("patch ${XPU_API_LIB} failed, command: %s" % command)
     shutil.copy('${XPU_API_LIB}', libs_path)
-    shutil.copy('${XPU_RT_LIB}', libs_path)
-    package_data['paddle.libs']+=['${XPU_API_LIB_NAME}',
-                                  '${XPU_RT_LIB_NAME}']
+    package_data['paddle.libs']+=['${XPU_API_LIB_NAME}']
+    xpu_rt_lib_list = glob.glob('${XPU_RT_LIB}*')
+    for xpu_rt_lib_file in xpu_rt_lib_list:
+        shutil.copy(xpu_rt_lib_file, libs_path)
+        package_data['paddle.libs']+=[os.path.basename(xpu_rt_lib_file)]
 
 if '${WITH_XPU_BKCL}' == 'ON':
     shutil.copy('${XPU_BKCL_LIB}', libs_path)
-- 
GitLab