未验证 提交 ac3dc0bb 编写于 作者: J joeqiao12 提交者: GitHub

[MLU]add mlu kernel for split and concat (#39020)

* [MLU]add mlu kernel for concat and split op

* delete device_context DEPS
上级 20e23e1b
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ConcatMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<framework::LoDTensor>("X");
framework::LoDTensor* out = ctx.Output<framework::LoDTensor>("Out");
PADDLE_ENFORCE_NOT_NULL(ins[0],
platform::errors::NotFound(
"The first input tensor is not initalized."));
auto axis = ctx.Attr<int>("axis");
auto ins_size = ins.size();
bool need_resize_out_dims = false;
if (ctx.HasInput("AxisTensor")) {
auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
axis = GetDataFromTensor<int>(axis_tensor)[0];
need_resize_out_dims = true;
}
axis = ComputeAxis(static_cast<int64_t>(axis),
static_cast<int64_t>(ins[0]->dims().size()));
if (need_resize_out_dims) {
const size_t n = ins.size();
std::vector<framework::DDim> ins_dims(n);
for (size_t i = 0; i < n; i++) {
ins_dims[i] = ins[i]->dims();
}
framework::DDim out_dims = ComputeAndCheckShape(true, ins_dims, axis);
out->Resize(out_dims);
}
const int axis_t = axis;
const int ins_size_t = ins_size;
auto place = ctx.GetPlace();
out->mutable_data<T>(place);
// mlu should do sth
// init ins tensors
std::vector<const void*> inputs;
std::vector<MLUCnnlTensorDesc> input_descs;
std::vector<cnnlTensorDescriptor_t> desc_vector;
for (size_t i = 0; i < ins_size; i++) {
input_descs.emplace_back(MLUCnnlTensorDesc(
*ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type())));
desc_vector.push_back(input_descs.back().get());
inputs.push_back(GetBasePtr(ins[i]));
}
// init out tensors
MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(out->type()));
// MLU should do sth
MLUCnnl::Concat(ctx, ins_size_t, axis_t, desc_vector.data(), inputs.data(),
output_desc.get(), GetBasePtr(out));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(concat, ops::ConcatMLUKernel<float>,
ops::ConcatMLUKernel<paddle::platform::float16>,
ops::ConcatMLUKernel<int64_t>,
ops::ConcatMLUKernel<bool>, ops::ConcatMLUKernel<int>,
ops::ConcatMLUKernel<uint8_t>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/split_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
class SplitMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
// init parameter
auto* in = ctx.Input<framework::Tensor>("X");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
int num = ctx.Attr<int>("num");
std::vector<int> sections = ctx.Attr<std::vector<int>>("sections");
int axis = ctx.Attr<int>("axis");
auto in_dims = in->dims();
auto out_size = outs.size();
auto num_tensor = num == 0 ? out_size : num;
bool need_resize_outs_dims = false;
if (ctx.HasInput("AxisTensor")) {
auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
axis = GetDataFromTensor(axis_tensor)[0];
need_resize_outs_dims = true;
}
auto sections_tensor_list =
ctx.MultiInput<framework::Tensor>("SectionsTensorList");
if (sections_tensor_list.size() > 0) {
sections = GetDataFromTensorList(sections_tensor_list);
need_resize_outs_dims = true;
}
if (need_resize_outs_dims) {
std::vector<framework::DDim> outs_dims =
UpdateOutsDims(true, true, in_dims, num, sections, axis, out_size);
for (size_t j = 0; j < outs.size(); ++j) {
outs[j]->Resize(outs_dims[j]);
}
}
// init out tensors
std::vector<void*> vct_tensor;
std::vector<MLUCnnlTensorDesc> output_descs;
std::vector<cnnlTensorDescriptor_t> desc_vector;
auto place = ctx.GetPlace();
for (size_t i = 0; i < outs.size(); i++) {
outs[i]->mutable_data<T>(ctx.GetPlace());
output_descs.emplace_back(MLUCnnlTensorDesc(
*outs[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(outs[i]->type())));
desc_vector.push_back(output_descs.back().get());
vct_tensor.push_back(GetBasePtr(outs[i]));
}
// init in tensors
MLUCnnlTensorDesc input_desc(*in, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(in->type()));
// MLU should do sth
MLUCnnl::Split(ctx, num_tensor, axis, input_desc.get(), GetBasePtr(in),
desc_vector.data(), vct_tensor.data());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(split, ops::SplitMLUKernel<float>,
ops::SplitMLUKernel<int64_t>, ops::SplitMLUKernel<int>,
ops::SplitMLUKernel<bool>,
ops::SplitMLUKernel<plat::float16>);
......@@ -5,6 +5,6 @@ IF(WITH_MLU)
cc_library(mlu_stream SRCS mlu_stream.cc DEPS boost mlu_info stream_callback_manager)
cc_library(mlu_device_context SRCS device_context.cc DEPS mlu_stream )
cc_library(mlu_device_context SRCS device_context.cc DEPS mlu_stream eigen3)
cc_test(mlu_device_context_test SRCS device_context_test.cc DEPS mlu_device_context)
ENDIF()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest, skip_check_grad_ci
import paddle
import paddle.fluid as fluid
paddle.enable_static()
SEED = 2021
class TestConcatOp(OpTest):
def setUp(self):
self.set_mlu()
self.op_type = "concat"
self.place = paddle.device.MLUPlace(0)
self.init_dtype()
self.init_test_data()
self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]}
self.attrs = {'axis': self.axis}
if self.axis < 0:
self.actual_axis = self.axis + len(self.x0.shape)
self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0
else:
self.actual_axis = self.axis
self.outputs = {
'Out': np.concatenate(
(self.x0, self.x1, self.x2), axis=self.actual_axis)
}
def set_mlu(self):
self.__class__.use_mlu = True
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out')
self.check_grad_with_place(self.place, ['x1'], 'Out')
self.check_grad_with_place(self.place, ['x2'], 'Out')
def init_test_data(self):
self.x0 = np.random.random((1, 4, 50)).astype(self.dtype)
self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
self.x2 = np.random.random((3, 4, 50)).astype(self.dtype)
self.axis = 0
class TestConcatOp2(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.axis = 1
@skip_check_grad_ci(
reason="The function 'check_grad' for large inputs is too slow.")
class TestConcatOp3(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype)
self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
self.x2 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
self.axis = 1
def test_check_grad(self):
pass
@skip_check_grad_ci(
reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015."
)
class TestConcatOp4(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((0, 3, 4, 5)).astype(self.dtype)
self.axis = 0
def test_check_grad(self):
pass
class TestConcatOp5(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((5, 3, 4, 5)).astype(self.dtype)
self.axis = -3
#----------------Concat Fp16----------------
def create_test_fp16(parent):
class TestConcatFp16(parent):
def init_dtype(self):
self.dtype = np.float16
cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
TestConcatFp16.__name__ = cls_name
globals()[cls_name] = TestConcatFp16
create_test_fp16(TestConcatOp)
create_test_fp16(TestConcatOp2)
create_test_fp16(TestConcatOp3)
create_test_fp16(TestConcatOp4)
create_test_fp16(TestConcatOp5)
#----------------Concat Int64----------------
def create_test_int64(parent):
class TestConcatInt64(parent):
def init_dtype(self):
self.dtype = np.int64
def test_check_grad(self):
pass
cls_name = "{0}_{1}".format(parent.__name__, "Int64")
TestConcatInt64.__name__ = cls_name
globals()[cls_name] = TestConcatInt64
create_test_int64(TestConcatOp)
create_test_int64(TestConcatOp2)
create_test_int64(TestConcatOp3)
create_test_int64(TestConcatOp4)
create_test_int64(TestConcatOp5)
#----------------Concat Int32----------------
def create_test_int32(parent):
class TestConcatInt32(parent):
def init_dtype(self):
self.dtype = np.int32
def test_check_grad(self):
pass
cls_name = "{0}_{1}".format(parent.__name__, "Int32")
TestConcatInt32.__name__ = cls_name
globals()[cls_name] = TestConcatInt32
create_test_int32(TestConcatOp)
create_test_int32(TestConcatOp2)
create_test_int32(TestConcatOp3)
create_test_int32(TestConcatOp4)
create_test_int32(TestConcatOp5)
#----------------Concat AxisTensor----------------
def create_test_AxisTensor(parent):
class TestConcatAxisTensor(parent):
def setUp(self):
self.op_type = "concat"
self.dtype = self.init_dtype()
self.init_test_data()
self.inputs = {
'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)],
'AxisTensor': np.array([self.axis]).astype("int32")
}
self.attrs = {}
if self.axis < 0:
self.actual_axis = self.axis + len(self.x0.shape)
self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0
else:
self.actual_axis = self.axis
self.outputs = {
'Out': np.concatenate(
(self.x0, self.x1, self.x2), axis=self.actual_axis)
}
self.place = paddle.device.MLUPlace(0)
self.__class__.use_mlu = True
def init_test_data(self):
self.x0 = np.random.random((1, 4, 50)).astype(self.dtype)
self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
self.x2 = np.random.random((3, 4, 50)).astype(self.dtype)
self.axis = 0
def init_dtype(self):
self.dtype = np.float32
cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor")
TestConcatAxisTensor.__name__ = cls_name
globals()[cls_name] = TestConcatAxisTensor
create_test_AxisTensor(TestConcatOp)
create_test_AxisTensor(TestConcatOp2)
create_test_AxisTensor(TestConcatOp3)
create_test_AxisTensor(TestConcatOp4)
create_test_AxisTensor(TestConcatOp5)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
paddle.enable_static()
SEED = 2021
class TestCase1(OpTest):
def setUp(self):
self.set_mlu()
self.set_example()
self.op_type = "split"
self.place = paddle.device.MLUPlace(0)
ipt = self.x.astype(self.dtype)
axis = self.axis if isinstance(self.axis, int) else int(self.axis[0])
tmp_outs = np.split(
ipt, axis=axis, indices_or_sections=self.num_or_sections)
tmp_outs = [o.astype(self.dtype) for o in tmp_outs]
self.outputs = {'Out': []}
self.outs = []
for i, o in enumerate(tmp_outs):
self.outputs["Out"].append((str(i), o))
self.outs.append(str(i))
self.attrs = {"axis": self.axis, "num": self.num_or_sections}
self.inputs = {}
self.inputs.update({'X': ipt.astype(self.dtype)})
def set_mlu(self):
self.__class__.use_mlu = True
self.__class__.op_type = "split"
def test_check_output(self):
self.check_output_with_place(self.place)
def set_example(self):
self.dtype = "float32"
self.x = np.random.random((2, 4, 6))
self.axis = 1
self.num_or_sections = 2
class TestCase2(TestCase1):
def set_example(self):
self.dtype = "float32"
self.x = np.random.random((20, 4, 50))
self.axis = 0
self.num_or_sections = 4
class TestCase4(TestCase1):
def set_example(self):
self.dtype = "float16"
self.x = np.random.random((4, 50, 20))
self.axis = 2
self.num_or_sections = 4
# Test Sections
class TestCase5(TestCase1):
def set_example(self):
super().set_example()
self.x = np.random.random((2, 10, 4))
self.axis = 1
self.num_or_sections = [2, 4, 8]
def setUp(self):
super().setUp()
self.attrs.update({"sections": [2, 2, 4, 2], "num": 0})
class API_TestSplit(unittest.TestCase):
def test_out(self):
with fluid.program_guard(fluid.Program(), fluid.Program()):
data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1)
place = fluid.MLUPlace(0)
exe = fluid.Executor(place)
input1 = np.random.random([1, 10]).astype('float32')
r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1])
ex_x0, ex_x1 = np.split(input1, (3, ), axis=1)
self.assertTrue(np.allclose(ex_x0, r0))
self.assertTrue(np.allclose(ex_x1, r1))
class API_TestSplit2(unittest.TestCase):
def test_out(self):
with fluid.program_guard(fluid.Program(), fluid.Program()):
data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
x0, x1 = paddle.split(data, num_or_sections=2, axis=1)
place = fluid.MLUPlace(0)
exe = fluid.Executor(place)
input1 = np.random.random([1, 10]).astype('float32')
r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1])
ex_x0, ex_x1 = np.split(input1, 2, axis=1)
self.assertTrue(np.allclose(ex_x0, r0))
self.assertTrue(np.allclose(ex_x1, r1))
class API_TestDygraphSplit(unittest.TestCase):
def test_out1(self):
with fluid.dygraph.guard(paddle.MLUPlace(0)):
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = fluid.dygraph.to_variable(input_1)
x0, x1, x2 = paddle.split(input, num_or_sections=3, axis=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
self.assertTrue(np.allclose(ex_x0, x0_out))
self.assertTrue(np.allclose(ex_x1, x1_out))
self.assertTrue(np.allclose(ex_x2, x2_out))
def test_out2(self):
with fluid.dygraph.guard(paddle.MLUPlace(0)):
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = fluid.dygraph.to_variable(input_1)
x0, x1, x2 = paddle.split(input, num_or_sections=[1, 2, 3], axis=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, (1, 3), axis=1)
self.assertTrue(np.allclose(ex_x0, x0_out))
self.assertTrue(np.allclose(ex_x1, x1_out))
self.assertTrue(np.allclose(ex_x2, x2_out))
# attr(axis) is Tensor
class TestSplitOp_AxisTensor(OpTest):
def setUp(self):
self._set_op_type()
self.dtype = self.get_dtype()
self.init_data()
self.inputs = {
'X': self.x,
'AxisTensor': np.array([self.axis]).astype("int32")
}
self.attrs = {'sections': self.sections, 'num': self.num}
out = np.split(self.x, self.indices_or_sections, self.axis)
self.outputs = {'Out': [('out%d' % i, out[i]) \
for i in range(len(out))]}
self.place = paddle.device.MLUPlace(0)
self.__class__.use_mlu = True
def init_data(self):
self.x = np.random.random((4, 5, 6)).astype(self.dtype)
self.axis = 2
self.sections = []
self.num = 3
self.indices_or_sections = 3
def get_dtype(self):
return "float"
def _set_op_type(self):
self.op_type = "split"
def test_check_output(self):
self.check_output_with_place(self.place)
class TestSplitOp_SectionsTensor(OpTest):
def setUp(self):
self._set_op_type()
self.dtype = self.get_dtype()
self.init_data()
self.inputs = {'X': self.x}
sections_tensor = []
for index, ele in enumerate(self.sections):
sections_tensor.append(("x" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs['SectionsTensorList'] = sections_tensor
self.attrs = {
'axis': self.axis,
'sections': self.sections_infer,
'num': self.num
}
out = np.split(self.x, self.indices_or_sections, self.axis)
self.outputs = {'Out': [('out%d' % i, out[i]) \
for i in range(len(out))]}
self.place = paddle.device.MLUPlace(0)
self.__class__.use_mlu = True
def init_data(self):
self.x = np.random.random((4, 5, 6)).astype(self.dtype)
self.axis = 1
self.sections = [2, 1, 2]
self.sections_infer = [-1, -1, -1]
self.num = 0
self.indices_or_sections = [2, 3]
def get_dtype(self):
return "float"
def _set_op_type(self):
self.op_type = "split"
def test_check_output(self):
self.check_output_with_place(self.place)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册