未验证 提交 5f1eb839 编写于 作者: A Aganlengzi 提交者: GitHub

[NPU] concat supports dtype int64 for model deepfm (#36327)

* [NPU] modify for model deepfm

* [NPU] unit test delete precision control

* [NPU] add more unit test

* revert elementwise_mul related modification

* [NPU] add more unit tests for concat
上级 6920afeb
...@@ -122,8 +122,14 @@ namespace ops = paddle::operators; ...@@ -122,8 +122,14 @@ namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(concat, ops::ConcatNPUKernel<float>, REGISTER_OP_NPU_KERNEL(concat, ops::ConcatNPUKernel<float>,
ops::ConcatNPUKernel<paddle::platform::float16>, ops::ConcatNPUKernel<paddle::platform::float16>,
#ifdef PADDLE_WITH_ASCEND_INT64
ops::ConcatNPUKernel<int64_t>,
#endif
ops::ConcatNPUKernel<int>); ops::ConcatNPUKernel<int>);
REGISTER_OP_NPU_KERNEL(concat_grad, ops::ConcatGradNPUKernel<float>, REGISTER_OP_NPU_KERNEL(concat_grad, ops::ConcatGradNPUKernel<float>,
ops::ConcatGradNPUKernel<paddle::platform::float16>, ops::ConcatGradNPUKernel<paddle::platform::float16>,
#ifdef PADDLE_WITH_ASCEND_INT64
ops::ConcatGradNPUKernel<int64_t>,
#endif
ops::ConcatGradNPUKernel<int>); ops::ConcatGradNPUKernel<int>);
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import unittest import unittest
import sys import sys
sys.path.append("..") sys.path.append("..")
from op_test import OpTest from op_test import OpTest, skip_check_grad_ci
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -26,7 +26,7 @@ paddle.enable_static() ...@@ -26,7 +26,7 @@ paddle.enable_static()
SEED = 2021 SEED = 2021
class TestConcat(OpTest): class TestConcatOp(OpTest):
def setUp(self): def setUp(self):
self.set_npu() self.set_npu()
self.op_type = "concat" self.op_type = "concat"
...@@ -56,54 +56,161 @@ class TestConcat(OpTest): ...@@ -56,54 +56,161 @@ class TestConcat(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out')
self.check_grad_with_place(self.place, ['x1'], 'Out')
self.check_grad_with_place(self.place, ['x2'], 'Out')
def init_test_data(self): def init_test_data(self):
self.x0 = np.random.random((1, 4, 50)).astype(self.dtype) self.x0 = np.random.random((1, 4, 50)).astype(self.dtype)
self.x1 = np.random.random((2, 4, 50)).astype(self.dtype) self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
self.x2 = np.random.random((3, 4, 50)).astype(self.dtype) self.x2 = np.random.random((3, 4, 50)).astype(self.dtype)
self.axis = 0 self.axis = 0
class TestConcatOp2(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.axis = 1
@skip_check_grad_ci(
reason="The function 'check_grad' for large inputs is too slow.")
class TestConcatOp3(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype)
self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
self.x2 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
self.axis = 1
def test_check_grad(self): def test_check_grad(self):
self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out') pass
self.check_grad_with_place(self.place, ['x1'], 'Out')
self.check_grad_with_place(self.place, ['x2'], 'Out')
@skip_check_grad_ci(
reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015."
)
class TestConcatOp4(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((0, 3, 4, 5)).astype(self.dtype)
self.axis = 0
def test_check_grad(self):
pass
class TestConcatOp5(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype)
self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype)
self.x2 = np.random.random((5, 3, 4, 5)).astype(self.dtype)
self.axis = -3
#----------------Concat Fp16----------------
def create_test_fp16(parent):
class TestConcatFp16(parent):
def init_dtype(self):
self.dtype = np.float16
cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
TestConcatFp16.__name__ = cls_name
globals()[cls_name] = TestConcatFp16
create_test_fp16(TestConcatOp)
create_test_fp16(TestConcatOp2)
create_test_fp16(TestConcatOp3)
create_test_fp16(TestConcatOp4)
create_test_fp16(TestConcatOp5)
#----------------Concat Int64----------------
def create_test_int64(parent):
class TestConcatInt64(parent):
def init_dtype(self):
self.dtype = np.int64
def test_check_grad(self):
pass
cls_name = "{0}_{1}".format(parent.__name__, "Int64")
TestConcatInt64.__name__ = cls_name
globals()[cls_name] = TestConcatInt64
create_test_int64(TestConcatOp)
create_test_int64(TestConcatOp2)
create_test_int64(TestConcatOp3)
create_test_int64(TestConcatOp4)
create_test_int64(TestConcatOp5)
class TestConcatAPIWithLoDTensorArray(unittest.TestCase):
"""
Test concat api when the input(x) is a LoDTensorArray.
"""
class TestConcatFP16(OpTest):
def setUp(self): def setUp(self):
self.set_npu() self.set_npu()
self.op_type = "concat"
self.place = paddle.NPUPlace(0) self.place = paddle.NPUPlace(0)
self.init_dtype() self.axis = 1
self.init_test_data() self.iter_num = 3
self.input_shape = [2, 3]
self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} self.x = np.random.random(self.input_shape).astype("float32")
self.attrs = {'axis': self.axis}
if self.axis < 0: def set_program(self, use_fluid_api):
self.actual_axis = self.axis + len(self.x0.shape) paddle.enable_static()
self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0 if use_fluid_api:
self.program = fluid.Program()
with fluid.program_guard(self.program):
input = fluid.layers.assign(self.x)
tensor_array = fluid.layers.create_array(dtype='float32')
zero = fluid.layers.fill_constant(
shape=[1], value=0, dtype="int64")
for i in range(self.iter_num):
fluid.layers.array_write(input, zero + i, tensor_array)
self.out_var = fluid.layers.concat(tensor_array, axis=self.axis)
else: else:
self.actual_axis = self.axis self.program = paddle.static.Program()
with paddle.static.program_guard(self.program):
input = paddle.assign(self.x)
tensor_array = fluid.layers.create_array(
dtype='float32'
) # Api create_array is not supported in paddle 2.0 yet.
zero = paddle.zeros(shape=[1], dtype="int64")
self.outputs = { for i in range(self.iter_num):
'Out': np.concatenate( # Api array_write is not supported in paddle 2.0 yet.
(self.x0, self.x1, self.x2), axis=self.actual_axis) fluid.layers.array_write(input, zero + i, tensor_array)
}
self.out_var = paddle.concat(tensor_array, axis=self.axis)
def set_npu(self): def set_npu(self):
self.__class__.use_npu = True self.__class__.use_npu = True
self.__class__.no_need_check_grad = True
def init_dtype(self):
self.dtype = np.float16
def test_check_output(self): def test_fluid_api(self):
self.check_output_with_place(self.place) self._run_static_mode(use_fluid_api=True)
def init_test_data(self): def test_paddle_api(self):
self.x0 = np.random.random((1, 4, 50)).astype(self.dtype) self._run_static_mode(use_fluid_api=False)
self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
self.x2 = np.random.random((3, 4, 50)).astype(self.dtype) def _run_static_mode(self, use_fluid_api):
self.axis = 0 self.set_program(use_fluid_api)
self.assertTrue(self.out_var.shape[self.axis] == -1)
exe = fluid.Executor(self.place)
res = exe.run(self.program, fetch_list=self.out_var)
self.assertTrue(
np.array_equal(
res[0],
np.concatenate(
[self.x] * self.iter_num, axis=self.axis)))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册