diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 5506ddd89692b5c2811bf48acc8e020090c447e7..cfc933c7a76fa77dca3bf368a3e55cc1c7485bea 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -90,6 +90,9 @@ aclrtStream GetCurrentNPUStream(int device_id = -1); template void FillNpuTensorWithConstant(Tensor *tensor, T val) { + // NOTE(zhiqiu): we found that power sometimes returns 0 when val is small + // like 1e-8. + constexpr float MIN_PRECISION_FOR_POWER = 1e-3; PADDLE_ENFORCE_EQ( tensor->IsInitialized(), true, platform::errors::InvalidArgument("The tensor should be initialized.")); @@ -97,7 +100,8 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { platform::is_npu_place(tensor->place()), true, platform::errors::InvalidArgument("The tensor should be on NPUPlace.")); // do async for better performance - if (typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) { + if ((typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) && + static_cast(val) > MIN_PRECISION_FOR_POWER) { Tensor tmp(tensor->type()); tmp.Resize(tensor->dims()); tmp.mutable_data(tensor->place());