From ac3dc0bbad72999d9c48f1494dee22a325809b95 Mon Sep 17 00:00:00 2001 From: joeqiao12 <45232181+joeqiao12@users.noreply.github.com> Date: Tue, 25 Jan 2022 16:02:00 +0800 Subject: [PATCH] [MLU]add mlu kernel for split and concat (#39020) * [MLU]add mlu kernel for concat and split op * delete device_context DEPS --- paddle/fluid/operators/concat_op_mlu.cc | 85 +++++++ paddle/fluid/operators/split_op_mlu.cc | 88 +++++++ .../fluid/platform/device/mlu/CMakeLists.txt | 2 +- .../tests/unittests/mlu/test_concat_op_mlu.py | 223 +++++++++++++++++ .../tests/unittests/mlu/test_split_op_mlu.py | 234 ++++++++++++++++++ 5 files changed, 631 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/operators/concat_op_mlu.cc create mode 100644 paddle/fluid/operators/split_op_mlu.cc create mode 100644 python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py create mode 100644 python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc new file mode 100644 index 00000000000..f7a1cae72be --- /dev/null +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/concat_op.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" + +namespace paddle { +namespace operators { + +template +class ConcatMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto ins = ctx.MultiInput("X"); + framework::LoDTensor* out = ctx.Output("Out"); + PADDLE_ENFORCE_NOT_NULL(ins[0], + platform::errors::NotFound( + "The first input tensor is not initalized.")); + auto axis = ctx.Attr("axis"); + auto ins_size = ins.size(); + bool need_resize_out_dims = false; + if (ctx.HasInput("AxisTensor")) { + auto* axis_tensor = ctx.Input("AxisTensor"); + axis = GetDataFromTensor(axis_tensor)[0]; + need_resize_out_dims = true; + } + axis = ComputeAxis(static_cast(axis), + static_cast(ins[0]->dims().size())); + + if (need_resize_out_dims) { + const size_t n = ins.size(); + std::vector ins_dims(n); + for (size_t i = 0; i < n; i++) { + ins_dims[i] = ins[i]->dims(); + } + + framework::DDim out_dims = ComputeAndCheckShape(true, ins_dims, axis); + out->Resize(out_dims); + } + const int axis_t = axis; + const int ins_size_t = ins_size; + auto place = ctx.GetPlace(); + out->mutable_data(place); + + // mlu should do sth + // init ins tensors + std::vector inputs; + std::vector input_descs; + std::vector desc_vector; + for (size_t i = 0; i < ins_size; i++) { + input_descs.emplace_back(MLUCnnlTensorDesc( + *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type()))); + desc_vector.push_back(input_descs.back().get()); + inputs.push_back(GetBasePtr(ins[i])); + } + // init out tensors + MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(out->type())); + + // MLU should do sth + MLUCnnl::Concat(ctx, ins_size_t, axis_t, desc_vector.data(), inputs.data(), + output_desc.get(), GetBasePtr(out)); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_MLU_KERNEL(concat, ops::ConcatMLUKernel, + ops::ConcatMLUKernel, + ops::ConcatMLUKernel, + ops::ConcatMLUKernel, ops::ConcatMLUKernel, + ops::ConcatMLUKernel); diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc new file mode 100644 index 00000000000..c569c9bf091 --- /dev/null +++ b/paddle/fluid/operators/split_op_mlu.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_op.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class SplitMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + // init parameter + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); + int num = ctx.Attr("num"); + std::vector sections = ctx.Attr>("sections"); + int axis = ctx.Attr("axis"); + auto in_dims = in->dims(); + auto out_size = outs.size(); + auto num_tensor = num == 0 ? out_size : num; + + bool need_resize_outs_dims = false; + if (ctx.HasInput("AxisTensor")) { + auto* axis_tensor = ctx.Input("AxisTensor"); + axis = GetDataFromTensor(axis_tensor)[0]; + need_resize_outs_dims = true; + } + auto sections_tensor_list = + ctx.MultiInput("SectionsTensorList"); + if (sections_tensor_list.size() > 0) { + sections = GetDataFromTensorList(sections_tensor_list); + need_resize_outs_dims = true; + } + if (need_resize_outs_dims) { + std::vector outs_dims = + UpdateOutsDims(true, true, in_dims, num, sections, axis, out_size); + for (size_t j = 0; j < outs.size(); ++j) { + outs[j]->Resize(outs_dims[j]); + } + } + + // init out tensors + std::vector vct_tensor; + std::vector output_descs; + std::vector desc_vector; + auto place = ctx.GetPlace(); + for (size_t i = 0; i < outs.size(); i++) { + outs[i]->mutable_data(ctx.GetPlace()); + output_descs.emplace_back(MLUCnnlTensorDesc( + *outs[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(outs[i]->type()))); + desc_vector.push_back(output_descs.back().get()); + vct_tensor.push_back(GetBasePtr(outs[i])); + } + // init in tensors + MLUCnnlTensorDesc input_desc(*in, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(in->type())); + + // MLU should do sth + MLUCnnl::Split(ctx, num_tensor, axis, input_desc.get(), GetBasePtr(in), + desc_vector.data(), vct_tensor.data()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_MLU_KERNEL(split, ops::SplitMLUKernel, + ops::SplitMLUKernel, ops::SplitMLUKernel, + ops::SplitMLUKernel, + ops::SplitMLUKernel); diff --git a/paddle/fluid/platform/device/mlu/CMakeLists.txt b/paddle/fluid/platform/device/mlu/CMakeLists.txt index 9ef4439f39b..a4584f54637 100644 --- a/paddle/fluid/platform/device/mlu/CMakeLists.txt +++ b/paddle/fluid/platform/device/mlu/CMakeLists.txt @@ -5,6 +5,6 @@ IF(WITH_MLU) cc_library(mlu_stream SRCS mlu_stream.cc DEPS boost mlu_info stream_callback_manager) - cc_library(mlu_device_context SRCS device_context.cc DEPS mlu_stream ) + cc_library(mlu_device_context SRCS device_context.cc DEPS mlu_stream eigen3) cc_test(mlu_device_context_test SRCS device_context_test.cc DEPS mlu_device_context) ENDIF() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py new file mode 100644 index 00000000000..3bfa96b7001 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py @@ -0,0 +1,223 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest, skip_check_grad_ci +import paddle +import paddle.fluid as fluid + +paddle.enable_static() +SEED = 2021 + + +class TestConcatOp(OpTest): + def setUp(self): + self.set_mlu() + self.op_type = "concat" + self.place = paddle.device.MLUPlace(0) + self.init_dtype() + self.init_test_data() + + self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} + self.attrs = {'axis': self.axis} + if self.axis < 0: + self.actual_axis = self.axis + len(self.x0.shape) + self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0 + else: + self.actual_axis = self.axis + + self.outputs = { + 'Out': np.concatenate( + (self.x0, self.x1, self.x2), axis=self.actual_axis) + } + + def set_mlu(self): + self.__class__.use_mlu = True + + def init_dtype(self): + self.dtype = np.float32 + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out') + self.check_grad_with_place(self.place, ['x1'], 'Out') + self.check_grad_with_place(self.place, ['x2'], 'Out') + + def init_test_data(self): + self.x0 = np.random.random((1, 4, 50)).astype(self.dtype) + self.x1 = np.random.random((2, 4, 50)).astype(self.dtype) + self.x2 = np.random.random((3, 4, 50)).astype(self.dtype) + self.axis = 0 + + +class TestConcatOp2(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) + self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) + self.x2 = np.random.random((2, 3, 4, 5)).astype(self.dtype) + self.axis = 1 + + +@skip_check_grad_ci( + reason="The function 'check_grad' for large inputs is too slow.") +class TestConcatOp3(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype) + self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype) + self.x2 = np.random.random((1, 128, 170, 256)).astype(self.dtype) + self.axis = 1 + + def test_check_grad(self): + pass + + +@skip_check_grad_ci( + reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." +) +class TestConcatOp4(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) + self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) + self.x2 = np.random.random((0, 3, 4, 5)).astype(self.dtype) + self.axis = 0 + + def test_check_grad(self): + pass + + +class TestConcatOp5(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype) + self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype) + self.x2 = np.random.random((5, 3, 4, 5)).astype(self.dtype) + self.axis = -3 + + +#----------------Concat Fp16---------------- +def create_test_fp16(parent): + class TestConcatFp16(parent): + def init_dtype(self): + self.dtype = np.float16 + + cls_name = "{0}_{1}".format(parent.__name__, "Fp16") + TestConcatFp16.__name__ = cls_name + globals()[cls_name] = TestConcatFp16 + + +create_test_fp16(TestConcatOp) +create_test_fp16(TestConcatOp2) +create_test_fp16(TestConcatOp3) +create_test_fp16(TestConcatOp4) +create_test_fp16(TestConcatOp5) + + +#----------------Concat Int64---------------- +def create_test_int64(parent): + class TestConcatInt64(parent): + def init_dtype(self): + self.dtype = np.int64 + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}".format(parent.__name__, "Int64") + TestConcatInt64.__name__ = cls_name + globals()[cls_name] = TestConcatInt64 + + +create_test_int64(TestConcatOp) +create_test_int64(TestConcatOp2) +create_test_int64(TestConcatOp3) +create_test_int64(TestConcatOp4) +create_test_int64(TestConcatOp5) + + +#----------------Concat Int32---------------- +def create_test_int32(parent): + class TestConcatInt32(parent): + def init_dtype(self): + self.dtype = np.int32 + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}".format(parent.__name__, "Int32") + TestConcatInt32.__name__ = cls_name + globals()[cls_name] = TestConcatInt32 + + +create_test_int32(TestConcatOp) +create_test_int32(TestConcatOp2) +create_test_int32(TestConcatOp3) +create_test_int32(TestConcatOp4) +create_test_int32(TestConcatOp5) + + +#----------------Concat AxisTensor---------------- +def create_test_AxisTensor(parent): + class TestConcatAxisTensor(parent): + def setUp(self): + self.op_type = "concat" + self.dtype = self.init_dtype() + self.init_test_data() + + self.inputs = { + 'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)], + 'AxisTensor': np.array([self.axis]).astype("int32") + } + self.attrs = {} + + if self.axis < 0: + self.actual_axis = self.axis + len(self.x0.shape) + self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0 + else: + self.actual_axis = self.axis + + self.outputs = { + 'Out': np.concatenate( + (self.x0, self.x1, self.x2), axis=self.actual_axis) + } + + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + + def init_test_data(self): + self.x0 = np.random.random((1, 4, 50)).astype(self.dtype) + self.x1 = np.random.random((2, 4, 50)).astype(self.dtype) + self.x2 = np.random.random((3, 4, 50)).astype(self.dtype) + self.axis = 0 + + def init_dtype(self): + self.dtype = np.float32 + + cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor") + TestConcatAxisTensor.__name__ = cls_name + globals()[cls_name] = TestConcatAxisTensor + + +create_test_AxisTensor(TestConcatOp) +create_test_AxisTensor(TestConcatOp2) +create_test_AxisTensor(TestConcatOp3) +create_test_AxisTensor(TestConcatOp4) +create_test_AxisTensor(TestConcatOp5) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py new file mode 100644 index 00000000000..b8363545d22 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py @@ -0,0 +1,234 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + +paddle.enable_static() +SEED = 2021 + + +class TestCase1(OpTest): + def setUp(self): + self.set_mlu() + self.set_example() + self.op_type = "split" + self.place = paddle.device.MLUPlace(0) + ipt = self.x.astype(self.dtype) + axis = self.axis if isinstance(self.axis, int) else int(self.axis[0]) + tmp_outs = np.split( + ipt, axis=axis, indices_or_sections=self.num_or_sections) + tmp_outs = [o.astype(self.dtype) for o in tmp_outs] + self.outputs = {'Out': []} + self.outs = [] + for i, o in enumerate(tmp_outs): + self.outputs["Out"].append((str(i), o)) + self.outs.append(str(i)) + + self.attrs = {"axis": self.axis, "num": self.num_or_sections} + self.inputs = {} + self.inputs.update({'X': ipt.astype(self.dtype)}) + + def set_mlu(self): + self.__class__.use_mlu = True + self.__class__.op_type = "split" + + def test_check_output(self): + self.check_output_with_place(self.place) + + def set_example(self): + self.dtype = "float32" + self.x = np.random.random((2, 4, 6)) + self.axis = 1 + self.num_or_sections = 2 + + +class TestCase2(TestCase1): + def set_example(self): + self.dtype = "float32" + self.x = np.random.random((20, 4, 50)) + self.axis = 0 + self.num_or_sections = 4 + + +class TestCase4(TestCase1): + def set_example(self): + self.dtype = "float16" + self.x = np.random.random((4, 50, 20)) + self.axis = 2 + self.num_or_sections = 4 + + +# Test Sections +class TestCase5(TestCase1): + def set_example(self): + super().set_example() + self.x = np.random.random((2, 10, 4)) + self.axis = 1 + self.num_or_sections = [2, 4, 8] + + def setUp(self): + super().setUp() + self.attrs.update({"sections": [2, 2, 4, 2], "num": 0}) + + +class API_TestSplit(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') + x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1) + place = fluid.MLUPlace(0) + exe = fluid.Executor(place) + input1 = np.random.random([1, 10]).astype('float32') + r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1]) + ex_x0, ex_x1 = np.split(input1, (3, ), axis=1) + self.assertTrue(np.allclose(ex_x0, r0)) + self.assertTrue(np.allclose(ex_x1, r1)) + + +class API_TestSplit2(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') + x0, x1 = paddle.split(data, num_or_sections=2, axis=1) + place = fluid.MLUPlace(0) + exe = fluid.Executor(place) + input1 = np.random.random([1, 10]).astype('float32') + r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1]) + ex_x0, ex_x1 = np.split(input1, 2, axis=1) + self.assertTrue(np.allclose(ex_x0, r0)) + self.assertTrue(np.allclose(ex_x1, r1)) + + +class API_TestDygraphSplit(unittest.TestCase): + def test_out1(self): + with fluid.dygraph.guard(paddle.MLUPlace(0)): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = fluid.dygraph.to_variable(input_1) + x0, x1, x2 = paddle.split(input, num_or_sections=3, axis=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1) + self.assertTrue(np.allclose(ex_x0, x0_out)) + self.assertTrue(np.allclose(ex_x1, x1_out)) + self.assertTrue(np.allclose(ex_x2, x2_out)) + + def test_out2(self): + with fluid.dygraph.guard(paddle.MLUPlace(0)): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = fluid.dygraph.to_variable(input_1) + x0, x1, x2 = paddle.split(input, num_or_sections=[1, 2, 3], axis=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.split(input_1, (1, 3), axis=1) + self.assertTrue(np.allclose(ex_x0, x0_out)) + self.assertTrue(np.allclose(ex_x1, x1_out)) + self.assertTrue(np.allclose(ex_x2, x2_out)) + + +# attr(axis) is Tensor +class TestSplitOp_AxisTensor(OpTest): + def setUp(self): + self._set_op_type() + self.dtype = self.get_dtype() + self.init_data() + self.inputs = { + 'X': self.x, + 'AxisTensor': np.array([self.axis]).astype("int32") + } + self.attrs = {'sections': self.sections, 'num': self.num} + + out = np.split(self.x, self.indices_or_sections, self.axis) + self.outputs = {'Out': [('out%d' % i, out[i]) \ + for i in range(len(out))]} + + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + + def init_data(self): + self.x = np.random.random((4, 5, 6)).astype(self.dtype) + self.axis = 2 + self.sections = [] + self.num = 3 + self.indices_or_sections = 3 + + def get_dtype(self): + return "float" + + def _set_op_type(self): + self.op_type = "split" + + def test_check_output(self): + self.check_output_with_place(self.place) + + +class TestSplitOp_SectionsTensor(OpTest): + def setUp(self): + self._set_op_type() + self.dtype = self.get_dtype() + self.init_data() + self.inputs = {'X': self.x} + + sections_tensor = [] + for index, ele in enumerate(self.sections): + sections_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs['SectionsTensorList'] = sections_tensor + + self.attrs = { + 'axis': self.axis, + 'sections': self.sections_infer, + 'num': self.num + } + + out = np.split(self.x, self.indices_or_sections, self.axis) + self.outputs = {'Out': [('out%d' % i, out[i]) \ + for i in range(len(out))]} + + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + + def init_data(self): + self.x = np.random.random((4, 5, 6)).astype(self.dtype) + self.axis = 1 + self.sections = [2, 1, 2] + self.sections_infer = [-1, -1, -1] + self.num = 0 + self.indices_or_sections = [2, 3] + + def get_dtype(self): + return "float" + + def _set_op_type(self): + self.op_type = "split" + + def test_check_output(self): + self.check_output_with_place(self.place) + + +if __name__ == '__main__': + unittest.main() -- GitLab