diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu index 514e5aa3e5d5610d11b0604c569dff0d877bc8c6..49e07944c83b67d9db13bae85d21fe8108e29bde 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu @@ -174,15 +174,19 @@ __device__ T BlockReduce(T value) { __device__ void BlockReduceNumNanInfAndWrite(const int64_t num_nan, const int64_t num_inf, + const int64_t num_zero, int64_t offset, int64_t* num_nan_ptr, - int64_t* num_inf_ptr) { + int64_t* num_inf_ptr, + int64_t* num_zero_ptr) { int64_t block_num_nan = BlockReduce(num_nan); int64_t block_num_inf = BlockReduce(num_inf); + int64_t block_num_zero = BlockReduce(num_zero); if (threadIdx.x == 0) { num_nan_ptr[offset] = block_num_nan; num_inf_ptr[offset] = block_num_inf; + num_zero_ptr[offset] = block_num_zero; } } @@ -233,6 +237,7 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr, const int64_t numel, int64_t* block_num_nan_ptr, int64_t* block_num_inf_ptr, + int64_t* block_num_zero_ptr, MT* tensor_block_max_ptr, MT* tensor_block_min_ptr, MT* tensor_block_mean_ptr) { @@ -240,6 +245,7 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr, int64_t num_nan = 0; int64_t num_inf = 0; + int64_t num_zero = 0; MT max_value = static_cast(i < numel ? value_ptr[i] : value_ptr[0]); MT min_value = static_cast(i < numel ? value_ptr[i] : value_ptr[0]); @@ -256,10 +262,18 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr, } else if (isinf(value)) { num_inf += 1; } + if (value == static_cast(0)) { + num_zero += 1; + } } - BlockReduceNumNanInfAndWrite( - num_nan, num_inf, blockIdx.x, block_num_nan_ptr, block_num_inf_ptr); + BlockReduceNumNanInfAndWrite(num_nan, + num_inf, + num_zero, + blockIdx.x, + block_num_nan_ptr, + block_num_inf_ptr, + block_num_zero_ptr); BlockReduceMaxMinAndWrite(max_value, min_value, @@ -273,6 +287,7 @@ __global__ void FindNanInfAndBlockMaxMin(const T* value_ptr, template __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr, const int64_t* block_num_inf_ptr, + const int64_t* block_num_zero_ptr, const MT* tensor_block_max_ptr, const MT* tensor_block_min_ptr, const MT* tensor_block_mean_ptr, @@ -283,11 +298,13 @@ __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr, if (blockIdx.x == 0 && threadIdx.x == 0) { int64_t num_nan = 0; int64_t num_inf = 0; + int64_t num_zero = 0; // numel_max_min <= 128 for (int64_t i = 0; i < numel_max_min; ++i) { num_nan += block_num_nan_ptr[i]; num_inf += block_num_inf_ptr[i]; + num_zero += block_num_zero_ptr[i]; } MT max_value = static_cast(0); @@ -314,6 +331,7 @@ __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr, numel, num_nan, num_inf, + num_zero, max_value, min_value, mean_value, @@ -451,11 +469,12 @@ void TensorCheckerVisitor::apply( int64_t numel_max_min = blocks; - phi::DenseTensor block_num_nan_inf; - block_num_nan_inf.Resize({static_cast(2 * numel_max_min)}); + phi::DenseTensor block_num_nan_inf_zero; + block_num_nan_inf_zero.Resize({static_cast(3 * numel_max_min)}); int64_t* block_num_nan_ptr = - dev_ctx->template Alloc(&block_num_nan_inf); + dev_ctx->template Alloc(&block_num_nan_inf_zero); int64_t* block_num_inf_ptr = block_num_nan_ptr + numel_max_min; + int64_t* block_num_zero_ptr = block_num_inf_ptr + numel_max_min; phi::DenseTensor tensor_block_max_min; tensor_block_max_min.Resize({static_cast(3 * numel_max_min)}); @@ -468,6 +487,7 @@ void TensorCheckerVisitor::apply( tensor.numel(), block_num_nan_ptr, block_num_inf_ptr, + block_num_zero_ptr, tensor_block_max_ptr, tensor_block_min_ptr, tensor_block_mean_ptr); @@ -476,6 +496,7 @@ void TensorCheckerVisitor::apply( FindGlobalMaxMinAndPrint <<<1, 1, 0, dev_ctx->stream()>>>(block_num_nan_ptr, block_num_inf_ptr, + block_num_zero_ptr, tensor_block_max_ptr, tensor_block_min_ptr, tensor_block_mean_ptr, diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h index fee2a52b428b2be97db67d43b816efe068fae862..e83fc6c2dd18e53b3ed3351526da98ab643d0591 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.h +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.h @@ -69,6 +69,7 @@ HOSTDEVICE void PrintForDifferentLevel(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, + int64_t num_zero, MT max_value, MT min_value, MT mean_value, @@ -76,26 +77,31 @@ HOSTDEVICE void PrintForDifferentLevel(const char* debug_info, if (num_nan > 0 || num_inf > 0) { printf( "[PRECISION] [ERROR] in %s, numel=%lld, num_nan=%lld, " - "num_inf=%lld, max=%e, min=%e, mean=%e\n", + "num_inf=%lld, num_zero=%lld, max=%e, min=%e, mean=%e\n", debug_info, - static_cast(numel), // NOLINT - static_cast(num_nan), // NOLINT - static_cast(num_inf), // NOLINT + static_cast(numel), // NOLINT + static_cast(num_nan), // NOLINT + static_cast(num_inf), // NOLINT + static_cast(num_zero), // NOLINT static_cast(max_value), static_cast(min_value), static_cast(mean_value)); if (check_nan_inf_level == 0) { #if defined(__NVCC__) || defined(__HIPCC__) PADDLE_ENFORCE(false, - "There are NAN or INF (num_nan=%ld, num_inf=%lld) in %s.", - static_cast(num_nan), // NOLINT - static_cast(num_inf), // NOLINT + "There are NAN or INF (num_nan=%ld, num_inf=%lld, " + "num_zero=%lld) in %s.", + static_cast(num_nan), // NOLINT + static_cast(num_inf), // NOLINT + static_cast(num_zero), // NOLINT debug_info); #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "There are NAN or INF (num_nan=%lld, num_inf=%lld) in %s.", - static_cast(num_nan), // NOLINT - static_cast(num_inf), // NOLINT + "There are NAN or INF (num_nan=%lld, num_inf=%lld, num_zero=%lld) in " + "%s.", + static_cast(num_nan), // NOLINT + static_cast(num_inf), // NOLINT + static_cast(num_zero), // NOLINT debug_info)); #endif } @@ -114,6 +120,7 @@ void PrintForDifferentLevelFile(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, + int64_t num_zero, MT max_value, MT min_value, MT mean_value, @@ -136,9 +143,10 @@ void PrintForDifferentLevelFile(const char* debug_info, if (num_nan > 0 || num_inf > 0) { outfile << "[PRECISION] [ERROR] in " << debug_info - << ", numel=" << static_cast(numel) // NOLINT - << ", num_nan=" << static_cast(num_nan) // NOLINT - << ", num_inf=" << static_cast(num_inf) // NOLINT + << ", numel=" << static_cast(numel) // NOLINT + << ", num_nan=" << static_cast(num_nan) // NOLINT + << ", num_inf=" << static_cast(num_inf) // NOLINT + << ", num_zero=" << static_cast(num_zero) // NOLINT << ", max=" << static_cast(max_value) << ", min=" << static_cast(min_value) << ", mean=" << static_cast(mean_value) << std::endl; @@ -200,6 +208,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr, std::vector thread_num_nan(num_threads, 0); std::vector thread_num_inf(num_threads, 0); + std::vector thread_num_zero(num_threads, 0); std::vector thread_min_value(num_threads, static_cast(value_ptr[0])); std::vector thread_max_value(num_threads, static_cast(value_ptr[0])); std::vector thread_mean_value(num_threads, static_cast(0)); @@ -230,17 +239,22 @@ static void CheckNanInfCpuImpl(const T* value_ptr, } else if (std::isinf(value)) { thread_num_inf[tid] += 1; } + if (value == 0) { + thread_num_zero[tid] += 1; + } } } int64_t num_nan = 0; int64_t num_inf = 0; + int64_t num_zero = 0; MT min_value = thread_min_value[0]; MT max_value = thread_max_value[0]; MT mean_value = static_cast(0); for (int i = 0; i < num_threads; ++i) { num_nan += thread_num_nan[i]; num_inf += thread_num_inf[i]; + num_zero += thread_num_zero[i]; min_value = std::min(thread_min_value[i], min_value); max_value = std::max(thread_max_value[i], max_value); mean_value += thread_mean_value[i]; @@ -254,6 +268,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr, numel, num_nan, num_inf, + num_zero, max_value, min_value, mean_value, @@ -266,6 +281,7 @@ static void CheckNanInfCpuImpl(const T* value_ptr, numel, num_nan, num_inf, + num_zero, max_value, min_value, mean_value,