// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #ifdef PADDLE_WITH_MKLML #include #endif #include #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/enforce.h" #ifdef _WIN32 #include #include #define MKDIR(path) _mkdir(path) #else #include #define MKDIR(path) mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) #endif namespace phi { namespace funcs { template ::value, bool> = true> HOSTDEVICE bool NeedPrint(MT max_value, MT min_value, int check_nan_inf_level) { if (check_nan_inf_level >= 3) { return true; } else if (check_nan_inf_level >= 2) { MT fp16_max = static_cast(std::numeric_limits::max()); return max_value > fp16_max || min_value < -fp16_max; } return false; } template ::value, bool> = true> HOSTDEVICE bool NeedPrint(MT max_value UNUSED, MT min_value UNUSED, int check_nan_inf_level) { if (check_nan_inf_level >= 3) { return true; } return false; } HOSTDEVICE static void PrintAndThrowError(const char* debug_info, int64_t num_nan, int64_t num_inf, int64_t num_zero) { #if !defined(__HIPCC__) && !defined(__CUDA_ARCH__) PADDLE_THROW(phi::errors::PreconditionNotMet( "There are NAN or INF (num_nan=%lld, num_inf=%lld, num_zero=%lld) in " "%s.", static_cast(num_nan), // NOLINT static_cast(num_inf), // NOLINT static_cast(num_zero), // NOLINT debug_info)); #endif } template HOSTDEVICE void PrintForDifferentLevel(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, int64_t num_zero, MT max_value, MT min_value, MT mean_value, int check_nan_inf_level) { if (num_nan > 0 || num_inf > 0) { printf( "[PRECISION] [ERROR] in %s, numel=%lld, num_nan=%lld, " "num_inf=%lld, num_zero=%lld, max=%e, min=%e, mean=%e\n", debug_info, static_cast(numel), // NOLINT static_cast(num_nan), // NOLINT static_cast(num_inf), // NOLINT static_cast(num_zero), // NOLINT static_cast(max_value), static_cast(min_value), static_cast(mean_value)); if (check_nan_inf_level == 0) { PrintAndThrowError(debug_info, num_nan, num_inf, num_zero); } } else if (NeedPrint(max_value, min_value, check_nan_inf_level)) { printf( "[PRECISION] in %s, numel=%lld, num_zero=%lld, max=%e, min=%e, " "mean=%e\n", debug_info, static_cast(numel), // NOLINT static_cast(num_zero), // NOLINT static_cast(max_value), static_cast(min_value), static_cast(mean_value)); } } template void WriteToFileForDifferentLevel(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, int64_t num_zero, MT max_value, MT min_value, MT mean_value, int check_nan_inf_level, const std::string& log_name, const std::string output_dir) { MKDIR(output_dir.c_str()); std::string filename = output_dir + "worker_" + log_name; std::ofstream outfile(filename, std::ios::app); PADDLE_ENFORCE_EQ( outfile.is_open(), true, phi::errors::Unavailable("Fail to open output file %s, please check the " "specified output_dir (%s).", filename, output_dir)); if (num_nan > 0 || num_inf > 0) { outfile << "[PRECISION] [ERROR] in " << debug_info << ", numel=" << static_cast(numel) // NOLINT << ", num_nan=" << static_cast(num_nan) // NOLINT << ", num_inf=" << static_cast(num_inf) // NOLINT << ", num_zero=" << static_cast(num_zero) // NOLINT << std::scientific << std::setprecision(6) << ", max=" << static_cast(max_value) << ", min=" << static_cast(min_value) << ", mean=" << static_cast(mean_value) << std::endl; } else if (phi::funcs::NeedPrint( max_value, min_value, check_nan_inf_level)) { outfile << "[PRECISION] in " << debug_info << ", numel=" << static_cast(numel) // NOLINT << ", num_zero=" << static_cast(num_zero) // NOLINT << std::scientific << std::setprecision(6) << ", max=" << static_cast(max_value) << ", min=" << static_cast(min_value) << ", mean=" << static_cast(mean_value) << std::endl; } outfile.close(); } template inline std::string GetCpuHintString(const std::string& op_type, const std::string& var_name, const phi::Place& place, int device_id = -1) { std::string dtype_str; phi::DataType dtype = phi::CppTypeToDataType::Type(); if (dtype == DataType::FLOAT32) { dtype_str = "fp32"; } else if (dtype == DataType::FLOAT64) { dtype_str = "fp64"; } else if (dtype == DataType::FLOAT16) { dtype_str = "fp16"; } else if (dtype == DataType::BFLOAT16) { dtype_str = "bf16"; } std::stringstream ss; if (place.GetType() == phi::AllocationType::GPU) { ss << "[device=gpu:" << device_id << ", "; } else { ss << "[device=cpu, "; } ss << "op=" << op_type << ", tensor=" << var_name << ", dtype=" << dtype_str << "]"; return ss.str(); } template < typename T, std::enable_if_t>::value && !std::is_same>::value, bool> = true> static void CheckNumericsCpuImpl(const T* value_ptr, const int64_t numel, const std::string& cpu_hint_str, const int check_nan_inf_level, const std::string log_name = "cpu", const std::string output_dir = "") { using MT = typename phi::dtype::template MPTypeTrait::Type; #ifdef _OPENMP // Use maximum 4 threads to collect the nan and inf information. int num_threads = std::max(omp_get_num_threads(), 1); num_threads = std::min(num_threads, 4); #else int num_threads = 1; #endif std::vector thread_num_nan(num_threads, 0); std::vector thread_num_inf(num_threads, 0); std::vector thread_num_zero(num_threads, 0); std::vector thread_min_value(num_threads, static_cast(value_ptr[0])); std::vector thread_max_value(num_threads, static_cast(value_ptr[0])); std::vector thread_mean_value(num_threads, static_cast(0)); #ifdef _OPENMP #pragma omp parallel num_threads(num_threads) #endif { #ifdef _OPENMP int64_t tid = omp_get_thread_num(); int64_t chunk_size = (numel + num_threads - 1) / num_threads; int64_t begin = tid * chunk_size; int64_t end = chunk_size + begin > numel ? numel : chunk_size + begin; #else int64_t tid = 0; int64_t begin = 0; int64_t end = numel; #endif for (int64_t i = begin; i < end; ++i) { MT value = static_cast(value_ptr[i]); thread_min_value[tid] = std::min(thread_min_value[tid], value); thread_max_value[tid] = std::max(thread_max_value[tid], value); thread_mean_value[tid] += value / static_cast(numel); if (std::isnan(value)) { thread_num_nan[tid] += 1; } else if (std::isinf(value)) { thread_num_inf[tid] += 1; } if (value == 0) { thread_num_zero[tid] += 1; } } } int64_t num_nan = 0; int64_t num_inf = 0; int64_t num_zero = 0; MT min_value = thread_min_value[0]; MT max_value = thread_max_value[0]; MT mean_value = static_cast(0); for (int i = 0; i < num_threads; ++i) { num_nan += thread_num_nan[i]; num_inf += thread_num_inf[i]; num_zero += thread_num_zero[i]; min_value = std::min(thread_min_value[i], min_value); max_value = std::max(thread_max_value[i], max_value); mean_value += thread_mean_value[i]; } // Write log to file if (output_dir.size() > 0) { WriteToFileForDifferentLevel(cpu_hint_str.c_str(), numel, num_nan, num_inf, num_zero, max_value, min_value, mean_value, check_nan_inf_level, log_name, output_dir); } else { PrintForDifferentLevel(cpu_hint_str.c_str(), numel, num_nan, num_inf, num_zero, max_value, min_value, mean_value, check_nan_inf_level); } } template < typename T, std::enable_if_t>::value || std::is_same>::value, bool> = true> void CheckNumericsCpuImpl(const T* value_ptr, const int64_t numel, const std::string& cpu_hint_str, const int check_nan_inf_level, const std::string log_name = "cpu", const std::string output_dir = "") { using RealType = typename T::value_type; RealType real_sum = 0.0f, imag_sum = 0.0f; #ifdef _OPENMP #pragma omp parallel for reduction(+ : real_sum) reduction(+ : imag_sum) #endif for (int64_t i = 0; i < numel; ++i) { T value = value_ptr[i]; real_sum += (value.real - value.real); imag_sum += (value.imag - value.imag); } if (std::isnan(real_sum) || std::isinf(real_sum) || std::isnan(imag_sum) || std::isinf(imag_sum)) { // hot fix for compile failed in gcc4.8 // here also need print detail info of nan or inf later PADDLE_THROW(phi::errors::PreconditionNotMet("There are NAN or INF in %s.", cpu_hint_str)); } } } // namespace funcs } // namespace phi