From aaa71ea43cdef2ba1297cbe8f6b10b1ef651dc5e Mon Sep 17 00:00:00 2001 From: furnace <34057289+windstamp@users.noreply.github.com> Date: Fri, 18 Mar 2022 16:34:46 +0800 Subject: [PATCH] [NPU] fix fp16 (PART I) (#40259) [NPU] fix fp16 (PART I) --- paddle/fluid/operators/batch_norm_op_npu.cc | 12 +++++------ .../unittests/npu/test_batch_norm_op_npu.py | 20 ++++++++++++++++--- .../tests/unittests/npu/test_cos_op_npu.py | 2 -- .../unittests/npu/test_dropout_op_npu.py | 2 -- .../tests/unittests/npu/test_exp_op_npu.py | 2 -- .../unittests/npu/test_hard_sigmoid_op_npu.py | 3 --- .../unittests/npu/test_hard_swish_op_npu.py | 2 -- .../tests/unittests/npu/test_log_op_npu.py | 8 ++------ .../tests/unittests/npu/test_norm_op_npu.py | 3 --- .../tests/unittests/npu/test_p_norm_op_npu.py | 2 -- .../tests/unittests/npu/test_pool2d_op_npu.py | 3 --- .../unittests/npu/test_reciprocal_op_npu.py | 2 -- .../tests/unittests/npu/test_relu6_op_npu.py | 2 -- .../unittests/npu/test_sigmoid_op_npu.py | 2 -- .../test_softmax_with_cross_entropy_op_npu.py | 2 -- .../tests/unittests/npu/test_sqrt_op_npu.py | 11 +++++----- .../tests/unittests/npu/test_square_op_npu.py | 2 -- .../tests/unittests/npu/test_tanh_op_npu.py | 11 +++++----- 18 files changed, 35 insertions(+), 56 deletions(-) diff --git a/paddle/fluid/operators/batch_norm_op_npu.cc b/paddle/fluid/operators/batch_norm_op_npu.cc index a70b6e99116..ae03ecbcb16 100644 --- a/paddle/fluid/operators/batch_norm_op_npu.cc +++ b/paddle/fluid/operators/batch_norm_op_npu.cc @@ -76,10 +76,10 @@ class NPUBatchNormOpKernel : public framework::OpKernel { auto *variance_out = ctx.Output("VarianceOut"); auto *saved_mean = ctx.Output("SavedMean"); auto *saved_variance = ctx.Output("SavedVariance"); - mean_out->mutable_data(ctx.GetPlace()); - variance_out->mutable_data(ctx.GetPlace()); - saved_mean->mutable_data(ctx.GetPlace()); - saved_variance->mutable_data(ctx.GetPlace()); + mean_out->mutable_data(ctx.GetPlace()); + variance_out->mutable_data(ctx.GetPlace()); + saved_mean->mutable_data(ctx.GetPlace()); + saved_variance->mutable_data(ctx.GetPlace()); // if MomentumTensor is set, use MomentumTensor value, momentum // is only used in this training branch @@ -170,8 +170,8 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); if (d_scale && d_bias) { - d_scale->mutable_data(ctx.GetPlace()); - d_bias->mutable_data(ctx.GetPlace()); + d_scale->mutable_data(ctx.GetPlace()); + d_bias->mutable_data(ctx.GetPlace()); if (use_global_stats) { const auto *running_mean = ctx.Input("Mean"); const auto *running_variance = ctx.Input("Variance"); diff --git a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py index 877f9904f34..e01b2b691a2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py @@ -144,6 +144,7 @@ class TestBatchNormOpTraining(unittest.TestCase): def setUp(self): self.set_npu() + self.init_dtype() self.use_mkldnn = False self.fuse_with_relu = False self.data_formats = ["NCHW", "NHWC"] @@ -153,6 +154,9 @@ class TestBatchNormOpTraining(unittest.TestCase): self.init_kernel_type() self.init_test_case() + def init_dtype(self): + self.dtype = np.float32 + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set() @@ -210,11 +214,16 @@ class TestBatchNormOpTraining(unittest.TestCase): scale_shape = [c] np.random.seed(123) - x = np.random.random_sample(shape).astype(np.float32) + x = np.random.random_sample(shape).astype(self.dtype) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean, variance = self.set_mean_variance(scale_shape, x, data_layout) - y_grad = np.random.random_sample(shape).astype(np.float32) + + if self.dtype == np.float16: + mean = mean.astype(np.float32) + variance = variance.astype(np.float32) + + y_grad = np.random.random_sample(shape).astype(self.dtype) momentum_var = np.array([momentum]).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( @@ -275,7 +284,7 @@ class TestBatchNormOpTraining(unittest.TestCase): inputs=inputs, outputs=outputs, attrs=attrs) - block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) + block.create_var(name='y@GRAD', dtype=self.dtype, shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( @@ -320,6 +329,11 @@ class TestBatchNormOpTraining(unittest.TestCase): pass +class TestFP16BatchNormOpTraining(TestBatchNormOpTraining): + def init_dtype(self): + self.dtype = np.float16 + + class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): def init_test_case(self): self.use_global_stats = False diff --git a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py index 9b29fc812fa..a4769442b08 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py @@ -51,8 +51,6 @@ class TestCos(OpTest): self.check_output_with_place(self.place, atol=1e-7) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place(self.place, ['X'], 'Out') diff --git a/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py index bd9022f56a3..fea8502f2d7 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py @@ -56,8 +56,6 @@ class TestDropoutOp(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - if self.dtype == np.float16: - return self.check_grad_with_place(self.place, ['X'], 'Out') diff --git a/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py index ccd5f0649d8..6be2fe0086b 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py @@ -50,8 +50,6 @@ class TestExpNPUOP(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place(self.place, ['X'], 'Out') def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py index d7aafccc88c..f1d89cb8d56 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py @@ -59,9 +59,6 @@ class TestNPUHardSigmoid(OpTest): self.check_output_with_place(self.place, atol=1e-5) def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad_with_place(self.place, ['X'], 'Out') def set_npu(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py index 32042ba83a9..9495cdb8a55 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py @@ -66,8 +66,6 @@ class TestHardSwishNPU(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return # There is a problem that precision of grad result using float32 # can't satisfy the default precision requirement # when compared with numeric_grads, but the results on diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py index 9534431e99a..5da3cb0ce56 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py @@ -50,12 +50,8 @@ class TestLog(OpTest): def test_check_output(self): self.check_output_with_place(self.place) - # TODO(ascendrc): Add grad test - # def test_check_grad(self): - # if self.dtype == np.float16: - # return - # self.check_grad(['X'], 'Out') - # + def test_check_grad(self): + self.check_grad(['X'], 'Out') class TestLogFp16(OpTest): diff --git a/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py index 2c41f09ff51..8e28b3fe413 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py @@ -54,9 +54,6 @@ class TestNPUNormOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad_with_place( self.place, ['X'], 'Out', max_relative_error=0.006) diff --git a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py index 3b75cba60b1..a7ca4edc524 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py @@ -51,8 +51,6 @@ class TestPnormOp(OpTest): self.check_output_with_place(paddle.NPUPlace(0)) def test_check_grad(self): - if self.dtype == "float16": - return self.check_grad_with_place( paddle.NPUPlace(0), ['X'], 'Out', user_defined_grads=self.gradient) diff --git a/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py index 2b8550a88de..4822abc3b25 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py @@ -67,9 +67,6 @@ def create_test_fp16_class(parent): self.use_cudnn = False self.dtype = np.float16 - def test_check_grad(self): - return - cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op") TestFp16Case.__name__ = cls_name globals()[cls_name] = TestFp16Case diff --git a/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py index e8f5de005d4..899d4ef43bd 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py @@ -40,8 +40,6 @@ class TestNPUReciprocal(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place( self.place, ['X'], 'Out', max_relative_error=0.01) diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py index 601a351c015..b1cb5e02a73 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py @@ -56,8 +56,6 @@ class TestRelu6(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place(self.place, ['X'], 'Out') def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py index 4516b25b59d..489f8bfb116 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py @@ -44,8 +44,6 @@ class TestNPUSigmoid(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place( self.place, ['X'], 'Out', max_relative_error=0.01) diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py index 8d78ee6a97e..f0ca7788345 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py @@ -87,8 +87,6 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return # fp32 has low precision, cpu and npu both need to relax the max_relative_error if using fp32 self.check_grad_with_place( self.place, ['Logits'], diff --git a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py index acb99746d23..24b34fa625c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py @@ -50,12 +50,11 @@ class TestSqrt(OpTest): def test_check_output(self): self.check_output_with_place(self.place) - # TODO(ascendrc): Add grad test - # def test_check_grad(self): - # if self.dtype == np.float16: - # return - # self.check_grad(['X'], 'Out') - # + def test_check_grad(self): + if self.dtype == np.float16: + self.check_grad(['X'], 'Out', max_relative_error=0.009) + else: + self.check_grad(['X'], 'Out', max_relative_error=0.009) class TestSqrtFp16(OpTest): diff --git a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py index caf55b4850f..170f6b6ca4f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py @@ -51,8 +51,6 @@ class TestSquare(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.float16: - return self.check_grad_with_place(self.place, ['X'], 'Out') diff --git a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py index 55be94da2b7..375eef12291 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py @@ -50,12 +50,11 @@ class TestTanh(OpTest): def test_check_output(self): self.check_output_with_place(self.place) - # TODO(ascendrc): Add grad test - # def test_check_grad(self): - # if self.dtype == np.float16: - # return - # self.check_grad(['X'], 'Out') - # + def test_check_grad(self): + if self.dtype == np.float16: + self.check_grad(['X'], 'Out', max_relative_error=0.009) + else: + self.check_grad(['X'], 'Out', max_relative_error=0.009) class TestTanhFp16(OpTest): -- GitLab