From 26b845e21fa5847fec5a48069ead38c918081d89 Mon Sep 17 00:00:00 2001
From: Zhangjingyu06 <92561254+Zhangjingyu06@users.noreply.github.com>
Date: Fri, 31 Dec 2021 21:59:24 +0800
Subject: [PATCH] [XPU]add split op for kunlun2,*test=kunlun (#38277)

* [XPU]add split op for kunlun2,*test=kunlun

* [XPU]add split op for kunlun2,*test=kunlun

* [XPU]add split op for kunlun,*test=kunlun

Co-authored-by: QingshuChen <chenqingshu@baidu.com>
---
 paddle/fluid/operators/split_op_xpu.cc        |  65 +++++++++
 .../fluid/platform/device/xpu/xpu1_op_list.h  |   2 +
 .../fluid/platform/device/xpu/xpu2_op_list.h  |   2 +
 .../tests/unittests/xpu/test_split_op_xpu.py  | 127 ++++++++++++++++++
 4 files changed, 196 insertions(+)
 create mode 100644 paddle/fluid/operators/split_op_xpu.cc
 create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
diff --git a/paddle/fluid/operators/split_op_xpu.cc b/paddle/fluid/operators/split_op_xpu.cc
new file mode 100644
index 0000000000..bd8c691487
--- /dev/null
+++ b/paddle/fluid/operators/split_op_xpu.cc
@@ -0,0 +1,65 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef PADDLE_WITH_XPU
+#include "paddle/fluid/operators/split_op.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/platform/device/xpu/xpu_header.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename DeviceContext, typename T>
+class SplitXPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<framework::Tensor>("X");
+    auto output = ctx.MultiOutput<framework::Tensor>("Out");
+    int num = ctx.Attr<int>("num");
+    std::vector<int> sections = ctx.Attr<std::vector<int>>("sections");
+    int axis = ctx.Attr<int>("axis");
+    auto& dev_ctx = ctx.template device_context<DeviceContext>();
+    auto in_dims = input->dims();
+
+    auto input_shape = framework::vectorize<int>(in_dims);
+    std::vector<int> split_lists;
+    std::vector<T*> out_ptrs;
+    auto outs_number = output.size();
+    std::vector<framework::DDim> outs_dims =
+        UpdateOutsDims(true, true, in_dims, num, sections, axis, outs_number);
+    for (size_t i = 0; i < output.size(); ++i) {
+      output[i]->Resize(outs_dims[i]);
+      out_ptrs.push_back(output[i]->mutable_data<T>(ctx.GetPlace()));
+      split_lists.push_back(output[i]->dims()[axis]);
+    }
+
+    int r = xpu::split<T>(dev_ctx.x_context(), input->data<T>(), out_ptrs,
+                          input_shape, split_lists, axis);
+    PADDLE_ENFORCE_EQ(
+        r, XPU_SUCCESS,
+        platform::errors::External("XPU split kernel return wrong value[%d %s]",
+                                   r, XPUAPIErrorMsg[r]));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_XPU_KERNEL(
+    split, ops::SplitXPUKernel<paddle::platform::XPUDeviceContext, float>,
+    ops::SplitXPUKernel<paddle::platform::XPUDeviceContext, int>);
+#endif
diff --git a/paddle/fluid/platform/device/xpu/xpu1_op_list.h b/paddle/fluid/platform/device/xpu/xpu1_op_list.h
index a08e6a70c9..cf8c321a58 100644
--- a/paddle/fluid/platform/device/xpu/xpu1_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu1_op_list.h
@@ -267,6 +267,8 @@ XPUOpMap& get_kl1_ops() {
       {"softmax_with_cross_entropy_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"split", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                              pOpKernelType(vartype::INT32, XPUPlace())})},
       {"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
index b4ad88ce6a..142685a64e 100644
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -310,6 +310,8 @@ XPUOpMap& get_kl2_ops() {
       {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                                 pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"split", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                              pOpKernelType(vartype::INT32, XPUPlace())})},
       {"squeeze2_grad",
        XPUKernelSet({pOpKernelType(vartype::FP64, XPUPlace()),
                      pOpKernelType(vartype::INT64, XPUPlace()),
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
new file mode 100644
index 0000000000..20fd837ece
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+sys.path.append("..")
+import unittest
+import numpy as np
+import paddle.fluid.core as core
+from op_test import OpTest
+from op_test_xpu import XPUOpTest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
+
+# test with attr(num)
+class TestSplitOp(XPUOpTest):
+    def initDefaultParameters(self):
+        self.dtype = 'float32'
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+
+    def setUp(self):
+        self.__class__.op_type = 'split'
+        self.use_xpu = True
+        self.use_mkldnn = False
+        self.initDefaultParameters()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def test_check_output(self):
+        if paddle.is_compiled_with_xpu():
+            paddle.enable_static()
+            place = paddle.XPUPlace(0)
+            self.check_output_with_place(place)
+
+
+# unknown sections
+class TestSplitOp_2(XPUOpTest):
+    def initDefaultParameters(self):
+        self.dtype = 'float32'
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = [2, 1, -1]
+        self.num = 0
+        self.indices_or_sections = [2, 3]
+
+    def setUp(self):
+        self.__class__.op_type = 'split'
+        self.use_xpu = True
+        self.use_mkldnn = False
+        self.initDefaultParameters()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def test_check_output(self):
+        if paddle.is_compiled_with_xpu():
+            paddle.enable_static()
+            place = paddle.XPUPlace(0)
+            self.check_output_with_place(place)
+
+
+# test with int32
+class TestSplitOp_5(XPUOpTest):
+    def initDefaultParameters(self):
+        self.dtype = 'int32'
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+
+    def setUp(self):
+        self.__class__.op_type = 'split'
+        self.use_xpu = True
+        self.use_mkldnn = False
+        self.initDefaultParameters()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def test_check_output(self):
+        if paddle.is_compiled_with_xpu():
+            paddle.enable_static()
+            place = paddle.XPUPlace(0)
+            self.check_output_with_place(place)
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab