[NPU] fix fp16 (PART II) (#40537)

[NPU] fix fp16 (PART II)

[NPU] fix fp16 (PART II) (#40537)
[NPU] fix fp16 (PART II)
1a13fa0f · furnace · GitHub · ef4ef154 · 1a13fa0f · 1a13fa0f
19 changed file
--- a/paddle/fluid/operators/conv_op_npu.cc
+++ b/paddle/fluid/operators/conv_op_npu.cc
@@ -356,7 +356,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {

    auto stream = ctx.template device_context<NPUDeviceContext>().stream();
    if (filter_grad) {
-      filter_grad->mutable_data<T>(ctx.GetPlace());
+      filter_grad->mutable_data<float>(ctx.GetPlace());
      std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());

      const auto& runner = NpuOpRunner(

--- a/paddle/fluid/operators/pad_op_npu.cc
+++ b/paddle/fluid/operators/pad_op_npu.cc
@@ -90,5 +90,5 @@ namespace plat = paddle::platform;
 REGISTER_OP_NPU_KERNEL(pad, ops::PadNPUKernel<plat::float16>,
                       ops::PadNPUKernel<float>, ops::PadNPUKernel<int>);

-REGISTER_OP_NPU_KERNEL(pad_grad, ops::PadNPUKernel<plat::float16>,
+REGISTER_OP_NPU_KERNEL(pad_grad, ops::PadGradNPUKernel<plat::float16>,
                       ops::PadGradNPUKernel<float>);
--- a/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt
@@ -22,4 +22,5 @@ if (WITH_ASCEND_CL)
    set_tests_properties(test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200)
    set_tests_properties(test_conv2d_op_npu PROPERTIES TIMEOUT 300)
    set_tests_properties(test_matmulv2_op_npu PROPERTIES TIMEOUT 300)
+    set_tests_properties(test_elementwise_add_op_npu PROPERTIES TIMEOUT 200)
 endif()
--- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py
@@ -132,36 +132,50 @@ class TestDepthwiseConvNPU(OpTest):
        self.check_output_with_place(self.place, atol=1e-2)

    def test_check_grad(self):
-        if self.dtype == np.float16:
-            return
        if self.dilations[0] == 1 and self.dilations[1] == 1:
-            self.check_grad_with_place(
-                self.place, {'Input', 'Filter'},
-                'Output',
-                max_relative_error=0.03,
-                numeric_place=paddle.CPUPlace())
+            if self.dtype == np.float16:
+                self.check_grad_with_place(
+                    self.place, {'Input', 'Filter'},
+                    'Output',
+                    max_relative_error=0.9)
+            else:
+                self.check_grad_with_place(
+                    self.place, {'Input', 'Filter'},
+                    'Output',
+                    max_relative_error=0.03,
+                    numeric_place=paddle.CPUPlace())

    def test_check_grad_no_filter(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            self.place, ['Input'],
-            'Output',
-            no_grad_set=set(['Filter']),
-            max_relative_error=0.03,
-            numeric_place=paddle.CPUPlace())
-
-    def test_check_grad_no_input(self):
-        if self.dtype == np.float16:
-            return
-        if self.dilations[0] == 1 and self.dilations[1] == 1:
            self.check_grad_with_place(
-                self.place, ['Filter'],
+                self.place, ['Input'],
                'Output',
-                no_grad_set=set(['Input']),
+                no_grad_set=set(['Filter']),
+                max_relative_error=0.9)
+        else:
+            self.check_grad_with_place(
+                self.place, ['Input'],
+                'Output',
+                no_grad_set=set(['Filter']),
                max_relative_error=0.03,
                numeric_place=paddle.CPUPlace())

+    def test_check_grad_no_input(self):
+        if self.dilations[0] == 1 and self.dilations[1] == 1:
+            if self.dtype == np.float16:
+                self.check_grad_with_place(
+                    self.place, ['Filter'],
+                    'Output',
+                    no_grad_set=set(['Input']),
+                    max_relative_error=0.9)
+            else:
+                self.check_grad_with_place(
+                    self.place, ['Filter'],
+                    'Output',
+                    no_grad_set=set(['Input']),
+                    max_relative_error=0.03,
+                    numeric_place=paddle.CPUPlace())
+
    def init_data_format(self):
        self.data_format = "NCHW"

@@ -267,32 +281,46 @@ class TestDepthwiseConvNPU_Padding(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            self.place, {'Input', 'Filter'},
-            'Output',
-            max_relative_error=0.03,
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                self.place, {'Input', 'Filter'},
+                'Output',
+                max_relative_error=1.2)
+        else:
+            self.check_grad_with_place(
+                self.place, {'Input', 'Filter'},
+                'Output',
+                max_relative_error=0.03,
+                numeric_place=paddle.CPUPlace())

    def test_check_grad_no_filter(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            self.place, ['Input'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Filter']),
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                self.place, ['Input'],
+                'Output',
+                max_relative_error=0.7,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad_with_place(
+                self.place, ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']),
+                numeric_place=paddle.CPUPlace())

    def test_check_grad_no_input(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            self.place, ['Filter'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Input']),
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                self.place, ['Filter'],
+                'Output',
+                max_relative_error=0.8,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad_with_place(
+                self.place, ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']),
+                numeric_place=paddle.CPUPlace())

    def init_data_format(self):
        self.data_format = "NCHW"

--- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py
@@ -127,8 +127,6 @@ class TestConv2DOp(OpTest):
        self.check_output_with_place(fluid.NPUPlace(0), atol=1e-2)

    def test_check_grad(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            fluid.NPUPlace(0), {'Input', 'Filter'},
            'Output',
@@ -136,8 +134,6 @@ class TestConv2DOp(OpTest):
            numeric_place=paddle.CPUPlace())

    def test_check_grad_no_filter(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            fluid.NPUPlace(0), ['Input'],
            'Output',
@@ -146,8 +142,6 @@ class TestConv2DOp(OpTest):
            numeric_place=paddle.CPUPlace())

    def test_check_grad_no_input(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            fluid.NPUPlace(0), ['Filter'],
            'Output',
@@ -276,10 +270,13 @@ class TestConv2DOp_v2(OpTest):
    def set_npu(self):
        self.__class__.use_npu = True

+    def init_dtype(self):
+        self.dtype = np.float32
+
    def setUp(self):
        self.set_npu()
        self.op_type = "conv2d"
-        self.dtype = np.float32
+        self.init_dtype()
        self.init_kernel_type()
        self.init_group()
        self.init_dilation()
@@ -320,31 +317,45 @@ class TestConv2DOp_v2(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            paddle.NPUPlace(0), {'Input', 'Filter'},
-            'Output',
-            max_relative_error=0.02,
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), {'Input', 'Filter'},
+                'Output',
+                max_relative_error=1.1)
+        else:
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), {'Input', 'Filter'},
+                'Output',
+                max_relative_error=0.02,
+                numeric_place=paddle.CPUPlace())

    def test_check_grad_no_filter(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            paddle.NPUPlace(0), ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']),
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), ['Input'],
+                'Output',
+                max_relative_error=0.99,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']),
+                numeric_place=paddle.CPUPlace())

    def test_check_grad_no_input(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            paddle.NPUPlace(0), ['Filter'],
-            'Output',
-            no_grad_set=set(['Input']),
-            numeric_place=paddle.CPUPlace())
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), ['Filter'],
+                'Output',
+                max_relative_error=0.99,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad_with_place(
+                paddle.NPUPlace(0), ['Filter'],
+                'Output',
+                no_grad_set=set(['Input']),
+                numeric_place=paddle.CPUPlace())

    def init_test_case(self):
        self.pad = [0, 0]

--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py
@@ -65,36 +65,59 @@ class TestElementwiseAddOp(OpTest):
        self.check_output_with_place(self.place)

    def test_check_grad_normal(self):
-        if self.dtype == np.float16 or self.dtype == np.int64:
+        if self.dtype == np.int64:
            return

-        self.check_grad_with_place(
-            self.place,
-            ['X', 'Y'],
-            'Out',
-            max_relative_error=0.006, )
+        if self.dtype == np.float16:
+            self.check_grad_with_place(
+                self.place,
+                ['X', 'Y'],
+                'Out',
+                max_relative_error=0.15, )
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['X', 'Y'],
+                'Out',
+                max_relative_error=0.006, )

    def test_check_grad_ingore_x(self):
-        if self.dtype == np.float16 or self.dtype == np.int64:
+        if self.dtype == np.int64:
            return

-        self.check_grad_with_place(
-            self.place,
-            ['Y'],
-            'Out',
-            no_grad_set=set("X"),
-            max_relative_error=0.006, )
+        if self.dtype == np.float16:
+            self.check_grad_with_place(
+                self.place,
+                ['Y'],
+                'Out',
+                no_grad_set=set("X"),
+                max_relative_error=0.92, )
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['Y'],
+                'Out',
+                no_grad_set=set("X"),
+                max_relative_error=0.006, )

    def test_check_grad_ingore_y(self):
-        if self.dtype == np.float16 or self.dtype == np.int64:
+        if self.dtype == np.int64:
            return

-        self.check_grad_with_place(
-            self.place,
-            ['X'],
-            'Out',
-            no_grad_set=set("Y"),
-            max_relative_error=0.006, )
+        if self.dtype == np.float16:
+            self.check_grad_with_place(
+                self.place,
+                ['X'],
+                'Out',
+                no_grad_set=set("Y"),
+                max_relative_error=0.8, )
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['X'],
+                'Out',
+                no_grad_set=set("Y"),
+                max_relative_error=0.006, )


 class TestFP16ElementwiseAddOp(TestElementwiseAddOp):

--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
@@ -116,19 +116,13 @@ class TestElementwiseMaxOp(OpTest):
        self.check_output_with_place(self.place)

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')

    def test_check_grad_ingore_x(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            self.place, ['Y'], 'Out', no_grad_set=set("X"))

    def test_check_grad_ingore_y(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            self.place, ['X'], 'Out', no_grad_set=set("Y"))

@@ -213,15 +207,11 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp):
        self.out = np.maximum(self.x, self.y.reshape(1, 1, 100))

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])

    def test_check_grad_ingore_x(self):
-        if self.dtype == np.float16:
-            return
        _, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['Y'],
@@ -230,8 +220,6 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp):
            user_defined_grads=[dy])

    def test_check_grad_ingore_y(self):
-        if self.dtype == np.float16:
-            return
        dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X'],

--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
@@ -64,32 +64,41 @@ class TestElementwiseMinOp(OpTest):

    def test_check_grad_normal(self):
        if self.dtype == np.float16:
-            return
-
-        self.check_grad_with_place(
-            self.place,
-            ['X', 'Y'],
-            'Out', )
+            self.check_grad_with_place(
+                self.place, ['X', 'Y'], 'Out', max_relative_error=0.5)
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['X', 'Y'],
+                'Out', )

    def test_check_grad_ingore_x(self):
        if self.dtype == np.float16:
-            return
-
-        self.check_grad_with_place(
-            self.place,
-            ['Y'],
-            'Out',
-            no_grad_set=set("X"), )
+            self.check_grad_with_place(
+                self.place, ['Y'],
+                'Out',
+                no_grad_set=set("X"),
+                max_relative_error=0.9)
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['Y'],
+                'Out',
+                no_grad_set=set("X"), )

    def test_check_grad_ingore_y(self):
        if self.dtype == np.float16:
-            return
-
-        self.check_grad_with_place(
-            self.place,
-            ['X'],
-            'Out',
-            no_grad_set=set("Y"), )
+            self.check_grad_with_place(
+                self.place, ['X'],
+                'Out',
+                no_grad_set=set("Y"),
+                max_relative_error=0.1)
+        else:
+            self.check_grad_with_place(
+                self.place,
+                ['X'],
+                'Out',
+                no_grad_set=set("Y"), )


 class TestElementwiseMinOpFp16(TestElementwiseMinOp):

--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
@@ -114,8 +114,6 @@ class TestElementwisePow(OpTest):
        self.out = np.power(self.x, self.y)

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
@@ -184,8 +182,6 @@ class TestElementwisePowOp_broadcast_0(TestElementwisePow):
        self.out = np.power(self.x, self.y)

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
@@ -218,8 +214,6 @@ class TestElementwisePowOp_broadcast_1(TestElementwisePow):
        self.out = np.power(self.x, self.y.reshape(1, 100, 1))

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
@@ -252,8 +246,6 @@ class TestElementwisePowOp_broadcast_2(TestElementwisePow):
        self.out = np.power(self.x, self.y.reshape(100, 1, 1))

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
        self.check_grad_with_place(
            self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])

--- a/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py
@@ -34,7 +34,7 @@ class TestExpand(OpTest):

        self.init_dtype()
        np.random.seed(SEED)
-        x = np.random.randn(3, 1, 7).astype(self.dtype)
+        x = np.random.randn(30, 1, 7).astype(self.dtype)
        out = np.tile(x, [1, 10, 1])

        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -50,12 +50,8 @@ class TestExpand(OpTest):
    def test_check_output(self):
        self.check_output_with_place(self.place)

-    # TODO(ascendrc): Add grad test
-    # def test_check_grad(self):
-    #     if self.dtype == np.float16:
-    #         return
-    #     self.check_grad(['X'], 'Out')
-    #
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')


 class TestExpandV2(TestExpand):
@@ -66,7 +62,7 @@ class TestExpandV2(TestExpand):

        self.init_dtype()
        np.random.seed(SEED)
-        x = np.random.randn(3, 1, 7).astype(self.dtype)
+        x = np.random.randn(30, 1, 7).astype(self.dtype)
        out = np.tile(x, [1, 10, 1])
        expand_times = np.array([1, 10, 1]).astype(np.int32)

@@ -145,7 +141,7 @@ class TestExpand_expand_times_all_one(TestExpand):

        self.init_dtype()
        np.random.seed(SEED)
-        x = np.random.randn(3, 1, 7).astype(self.dtype)
+        x = np.random.randn(30, 1, 7).astype(self.dtype)
        out = np.tile(x, [1, 1, 1])

        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}

--- a/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py
@@ -81,13 +81,9 @@ class TestHuberLossOp(OpTest):
        self.check_output_with_place(self.place)

    def test_check_grad_normal(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')

    def test_check_grad_ingore_x(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            self.place, ['Y'],
            'Out',
@@ -95,8 +91,6 @@ class TestHuberLossOp(OpTest):
            no_grad_set=set("residual"))

    def test_check_grad_ingore_y(self):
-        if self.dtype == np.float16:
-            return
        self.check_grad_with_place(
            self.place, ['X'],
            'Out',

--- a/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py
@@ -78,8 +78,10 @@ class TestLabelSmoothOp(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(self.place, ['X'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['X'], 'Out', max_relative_error=0.5)
+        else:
+            self.check_grad_with_place(self.place, ['X'], 'Out')


 class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp):

--- a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
@@ -63,8 +63,10 @@ class TestLeadyRelu(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(self.place, ['X'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['X'], 'Out', max_relative_error=0.006)
+        else:
+            self.check_grad_with_place(self.place, ['X'], 'Out')


 class TestLeadyReluFP16(TestLeadyRelu):

--- a/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py
@@ -63,9 +63,13 @@ class TestLogSoftmaxNPUOp(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(
-            self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad])
+            self.check_grad_with_place(
+                self.place, ['X'], ['Out'],
+                user_defined_grads=[self.x_grad],
+                max_relative_error=0.02)
+        else:
+            self.check_grad_with_place(
+                self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad])


 def test_class(op_type, typename):

--- a/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py
@@ -77,8 +77,10 @@ class TestLookupTableV2(OpTest):

    def test_check_grad(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(self.place, ['W'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['W'], 'Out', max_relative_error=0.01)
+        else:
+            self.check_grad_with_place(self.place, ['W'], 'Out')


 class TestLookupTableV2FP16(TestLookupTableV2):

--- a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py
@@ -39,10 +39,11 @@ class TestNearestInterpOp(OpTest):
        self.set_npu()
        self.out_size = None
        self.actual_shape = None
+        self.init_dtype()
        self.data_layout = 'NCHW'
        self.init_test_case()
        self.op_type = "nearest_interp_v2"
-        input_np = np.random.random(self.input_shape).astype("float32")
+        input_np = np.random.random(self.input_shape).astype(self.dtype)

        if self.data_layout == "NCHW":
            in_h = self.input_shape[2]
@@ -95,8 +96,21 @@ class TestNearestInterpOp(OpTest):
        self.check_output_with_place(self.place)

    def test_check_grad(self):
-        self.check_grad_with_place(
-            self.place, ['X'], 'Out', in_place=True, max_relative_error=0.006)
+        if self.dtype == np.float16:
+            self.check_grad_with_place(
+                self.place, ['X'],
+                'Out',
+                in_place=True,
+                max_relative_error=0.02)
+        else:
+            self.check_grad_with_place(
+                self.place, ['X'],
+                'Out',
+                in_place=True,
+                max_relative_error=0.006)
+
+    def init_dtype(self):
+        self.dtype = np.float32

    def init_test_case(self):
        self.interp_method = 'nearest'
@@ -108,6 +122,11 @@ class TestNearestInterpOp(OpTest):
        self.align_corners = False


+class TestNearestNeighborInterpFP16(TestNearestInterpOp):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
 class TestNearestNeighborInterpCase1(TestNearestInterpOp):
    def init_test_case(self):
        self.interp_method = 'nearest'

--- a/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py
@@ -50,9 +50,10 @@ class TestPadOp(OpTest):

    def test_check_grad_normal(self):
        if self.dtype == np.float16:
-            return
-
-        self.check_grad_with_place(self.place, ['X'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['X'], 'Out', max_relative_error=0.6)
+        else:
+            self.check_grad_with_place(self.place, ['X'], 'Out')

    def set_npu(self):
        self.__class__.use_npu = True

--- a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
@@ -34,11 +34,12 @@ class TestRelu(OpTest):

        self.init_dtype()
        np.random.seed(SEED)
-        x = np.random.rand(3, 2).astype(self.dtype)
-        out = x

-        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.attrs = {}
+        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        # The same reason with TestAbs
+        x[np.abs(x) < 0.005] = 0.02
+        out = np.maximum(x, 0)
+        self.inputs = {'X': x}
        self.outputs = {'Out': out}

    def set_npu(self):
@@ -50,32 +51,18 @@ class TestRelu(OpTest):
    def test_check_output(self):
        self.check_output_with_place(self.place)

+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            self.check_grad_with_place(
+                self.place, ['X'], 'Out', max_relative_error=0.006)
+        else:
+            self.check_grad_with_place(self.place, ['X'], 'Out')

-class TestReluFp16(OpTest):
-    def setUp(self):
-        self.set_npu()
-        self.op_type = "relu"
-        self.place = paddle.NPUPlace(0)
-
-        self.init_dtype()
-        np.random.seed(SEED)
-        x = np.random.rand(3, 2).astype(self.dtype)
-        out = x
-
-        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.attrs = {}
-        self.outputs = {'Out': out}
-
-    def set_npu(self):
-        self.__class__.use_npu = True
-        self.__class__.no_need_check_grad = True

+class TestReluFp16(TestRelu):
    def init_dtype(self):
        self.dtype = np.float16

-    def test_check_output(self):
-        self.check_output_with_place(self.place, atol=1e-5)
-

 class TestReluNeg(OpTest):
    def setUp(self):

--- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
@@ -58,12 +58,17 @@ class TestSliceOp(OpTest):
        self.place = paddle.NPUPlace(0)

    def test_check_output(self):
-        self.check_output_with_place(self.place)
+        if self.dtype == np.float16:
+            self.check_output_with_place(self.place)
+        else:
+            self.check_output_with_place(self.place)

    def test_check_grad_normal(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(self.place, ['Input'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['Input'], 'Out', max_relative_error=0.02)
+        else:
+            self.check_grad_with_place(self.place, ['Input'], 'Out')


 class TestSliceOp2(TestSliceOp):
@@ -347,8 +352,10 @@ class TestSliceOpDecsDim(OpTest):

    def test_check_grad_normal(self):
        if self.dtype == np.float16:
-            return
-        self.check_grad_with_place(self.place, ['Input'], 'Out')
+            self.check_grad_with_place(
+                self.place, ['Input'], 'Out', max_relative_error=0.5)
+        else:
+            self.check_grad_with_place(self.place, ['Input'], 'Out')


 class TestSliceOpDecsDimFp16(TestSliceOpDecsDim):