diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc index 4f3da27f4a67379624f5b5a66840bbc0cbac4f17..2622fe134c2a7f33d3a901d9b73c1f1e5aeff1a9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc @@ -63,15 +63,22 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor y_power(y->type()); - y_power.mutable_data(y->dims(), place); - const auto& runner_y_power = NpuOpRunner( - "Power", {*y}, {y_power}, {{"power", static_cast(-1)}}); - runner_y_power.Run(stream); - if (dx) { dx->mutable_data(place); + Tensor tensor_one(y->type()); + tensor_one.mutable_data({1}, place); + FillNpuTensorWithConstant(&tensor_one, static_cast(1.0)); + + // Use `Div` CANN OP to achieve `1/y` instead of `Power` CANN OP. + // Because `Power` will cause precision overflow, that is, `float_status` + // will be set to 1. + Tensor y_div(y->type()); + y_div.mutable_data(y->dims(), place); + const auto& runner_one_div_y = + NpuOpRunner("Div", {tensor_one, *y}, {y_div}, {}); + runner_one_div_y.Run(stream); + Tensor tensor_zeros(x->type()); tensor_zeros.mutable_data(x->dims(), place); const auto& runner_tensor_zeros = @@ -100,7 +107,7 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { Tensor x_grad_w(x->type()); x_grad_w.mutable_data(x->dims(), place); const auto& runner_x_grad_w = - NpuOpRunner("Mul", {x_nozero_f, y_power}, {x_grad_w}, {}); + NpuOpRunner("Mul", {x_nozero_f, y_div}, {x_grad_w}, {}); runner_x_grad_w.Run(stream); const auto& runner_x_grad = diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py index ee72ee74065e3b1dd417a5d3adac8711a2464cba..07c22868d5accf21d5d3aadfa685fe3625766be9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py @@ -21,6 +21,7 @@ sys.path.append("..") from op_test import OpTest import paddle import paddle.fluid as fluid +from paddle.fluid.core import ops paddle.enable_static() SEED = 2021 @@ -173,5 +174,30 @@ class TestElementwiseDivNet(unittest.TestCase): self.assertTrue(np.allclose(npu_loss, cpu_loss)) +class TestFloatStatus(unittest.TestCase): + def test_overflow(self): + paddle.disable_static() + paddle.set_device('npu') + + flag = paddle.zeros([8]) + ops.clear_float_status(flag, flag) + self.assertEqual(flag.numpy().sum(), 0.0) + + x = paddle.to_tensor([12.564], stop_gradient=False) + y = paddle.to_tensor([2.], stop_gradient=False) + z = x / y + out = 32768. * z + + ops.get_float_status(flag, flag) + self.assertEqual(flag.numpy().sum(), 0.0) + + out.sum().backward() + + ops.get_float_status(flag, flag) + self.assertEqual(flag.numpy().sum(), 0.0) + + paddle.enable_static() + + if __name__ == '__main__': unittest.main()