[NPU] concat supports dtype int64 for model deepfm (#36327)

* [NPU] modify for model deepfm * [NPU] unit test delete precision control * [NPU] add more unit test * revert elementwise_mul related modification * [NPU] add more unit tests for concat

[NPU] concat supports dtype int64 for model deepfm (#36327)
* [NPU] modify for model deepfm * [NPU] unit test delete precision control * [NPU] add more unit test * revert elementwise_mul related modification * [NPU] add more unit tests for concat
5f1eb839 · Aganlengzi · GitHub · 6920afeb · 5f1eb839 · 5f1eb839
Showing with 145 addition and 32 deletion

paddle/fluid/operators/concat_op_npu.cc paddle/fluid/operators/concat_op_npu.cc +6 -0

python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py ...on/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py +139 -32

未找到文件。
--- a/paddle/fluid/operators/concat_op_npu.cc
+++ b/paddle/fluid/operators/concat_op_npu.cc
@@ -122,8 +122,14 @@ namespace ops = paddle::operators;

 REGISTER_OP_NPU_KERNEL(concat, ops::ConcatNPUKernel<float>,
                       ops::ConcatNPUKernel<paddle::platform::float16>,
+#ifdef PADDLE_WITH_ASCEND_INT64
+                       ops::ConcatNPUKernel<int64_t>,
+#endif
                       ops::ConcatNPUKernel<int>);

 REGISTER_OP_NPU_KERNEL(concat_grad, ops::ConcatGradNPUKernel<float>,
                       ops::ConcatGradNPUKernel<paddle::platform::float16>,
+#ifdef PADDLE_WITH_ASCEND_INT64
+                       ops::ConcatGradNPUKernel<int64_t>,
+#endif
                       ops::ConcatGradNPUKernel<int>);
--- a/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py
@@ -18,7 +18,7 @@ import numpy as np
 import unittest
 import sys
 sys.path.append("..")
-from op_test import OpTest
+from op_test import OpTest, skip_check_grad_ci
 import paddle
 import paddle.fluid as fluid

@@ -26,7 +26,7 @@ paddle.enable_static()
 SEED = 2021


-class TestConcat(OpTest):
+class TestConcatOp(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "concat"
@@ -56,54 +56,161 @@ class TestConcat(OpTest):
    def test_check_output(self):
        self.check_output_with_place(self.place)

+    def test_check_grad(self):
+        self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out')
+        self.check_grad_with_place(self.place, ['x1'], 'Out')
+        self.check_grad_with_place(self.place, ['x2'], 'Out')
+
    def init_test_data(self):
        self.x0 = np.random.random((1, 4, 50)).astype(self.dtype)
        self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
        self.x2 = np.random.random((3, 4, 50)).astype(self.dtype)
        self.axis = 0

+
+class TestConcatOp2(TestConcatOp):
+    def init_test_data(self):
+        self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
+        self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
+        self.x2 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
+        self.axis = 1
+
+
+@skip_check_grad_ci(
+    reason="The function 'check_grad' for large inputs is too slow.")
+class TestConcatOp3(TestConcatOp):
+    def init_test_data(self):
+        self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype)
+        self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
+        self.x2 = np.random.random((1, 128, 170, 256)).astype(self.dtype)
+        self.axis = 1
+
    def test_check_grad(self):
-        self.check_grad_with_place(self.place, ['x0', 'x2'], 'Out')
-        self.check_grad_with_place(self.place, ['x1'], 'Out')
-        self.check_grad_with_place(self.place, ['x2'], 'Out')
+        pass
+
+
+@skip_check_grad_ci(
+    reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015."
+)
+class TestConcatOp4(TestConcatOp):
+    def init_test_data(self):
+        self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
+        self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype)
+        self.x2 = np.random.random((0, 3, 4, 5)).astype(self.dtype)
+        self.axis = 0
+
+    def test_check_grad(self):
+        pass
+
+
+class TestConcatOp5(TestConcatOp):
+    def init_test_data(self):
+        self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype)
+        self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype)
+        self.x2 = np.random.random((5, 3, 4, 5)).astype(self.dtype)
+        self.axis = -3
+
+
+#----------------Concat Fp16----------------
+def create_test_fp16(parent):
+    class TestConcatFp16(parent):
+        def init_dtype(self):
+            self.dtype = np.float16
+
+    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    TestConcatFp16.__name__ = cls_name
+    globals()[cls_name] = TestConcatFp16
+
+
+create_test_fp16(TestConcatOp)
+create_test_fp16(TestConcatOp2)
+create_test_fp16(TestConcatOp3)
+create_test_fp16(TestConcatOp4)
+create_test_fp16(TestConcatOp5)
+
+
+#----------------Concat Int64----------------
+def create_test_int64(parent):
+    class TestConcatInt64(parent):
+        def init_dtype(self):
+            self.dtype = np.int64

+        def test_check_grad(self):
+            pass
+
+    cls_name = "{0}_{1}".format(parent.__name__, "Int64")
+    TestConcatInt64.__name__ = cls_name
+    globals()[cls_name] = TestConcatInt64
+
+
+create_test_int64(TestConcatOp)
+create_test_int64(TestConcatOp2)
+create_test_int64(TestConcatOp3)
+create_test_int64(TestConcatOp4)
+create_test_int64(TestConcatOp5)
+
+
+class TestConcatAPIWithLoDTensorArray(unittest.TestCase):
+    """
+    Test concat api when the input(x) is a LoDTensorArray.
+    """

-class TestConcatFP16(OpTest):
    def setUp(self):
        self.set_npu()
-        self.op_type = "concat"
        self.place = paddle.NPUPlace(0)
-        self.init_dtype()
-        self.init_test_data()
-
-        self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]}
-        self.attrs = {'axis': self.axis}
-        if self.axis < 0:
-            self.actual_axis = self.axis + len(self.x0.shape)
-            self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0
+        self.axis = 1
+        self.iter_num = 3
+        self.input_shape = [2, 3]
+        self.x = np.random.random(self.input_shape).astype("float32")
+
+    def set_program(self, use_fluid_api):
+        paddle.enable_static()
+        if use_fluid_api:
+            self.program = fluid.Program()
+            with fluid.program_guard(self.program):
+                input = fluid.layers.assign(self.x)
+                tensor_array = fluid.layers.create_array(dtype='float32')
+                zero = fluid.layers.fill_constant(
+                    shape=[1], value=0, dtype="int64")
+
+                for i in range(self.iter_num):
+                    fluid.layers.array_write(input, zero + i, tensor_array)
+
+                self.out_var = fluid.layers.concat(tensor_array, axis=self.axis)
        else:
-            self.actual_axis = self.axis
+            self.program = paddle.static.Program()
+            with paddle.static.program_guard(self.program):
+                input = paddle.assign(self.x)
+                tensor_array = fluid.layers.create_array(
+                    dtype='float32'
+                )  # Api create_array is not supported in paddle 2.0 yet.
+                zero = paddle.zeros(shape=[1], dtype="int64")

-        self.outputs = {
-            'Out': np.concatenate(
-                (self.x0, self.x1, self.x2), axis=self.actual_axis)
-        }
+                for i in range(self.iter_num):
+                    # Api array_write is not supported in paddle 2.0 yet.
+                    fluid.layers.array_write(input, zero + i, tensor_array)
+
+                self.out_var = paddle.concat(tensor_array, axis=self.axis)

    def set_npu(self):
        self.__class__.use_npu = True
-        self.__class__.no_need_check_grad = True
-
-    def init_dtype(self):
-        self.dtype = np.float16

-    def test_check_output(self):
-        self.check_output_with_place(self.place)
-
-    def init_test_data(self):
-        self.x0 = np.random.random((1, 4, 50)).astype(self.dtype)
-        self.x1 = np.random.random((2, 4, 50)).astype(self.dtype)
-        self.x2 = np.random.random((3, 4, 50)).astype(self.dtype)
-        self.axis = 0
+    def test_fluid_api(self):
+        self._run_static_mode(use_fluid_api=True)
+
+    def test_paddle_api(self):
+        self._run_static_mode(use_fluid_api=False)
+
+    def _run_static_mode(self, use_fluid_api):
+        self.set_program(use_fluid_api)
+        self.assertTrue(self.out_var.shape[self.axis] == -1)
+        exe = fluid.Executor(self.place)
+        res = exe.run(self.program, fetch_list=self.out_var)
+        self.assertTrue(
+            np.array_equal(
+                res[0],
+                np.concatenate(
+                    [self.x] * self.iter_num, axis=self.axis)))


 if __name__ == '__main__':