// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" #ifdef _WIN32 #include #include #define MKDIR(path) _mkdir(path) #else #include #define MKDIR(path) mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) #endif DECLARE_int32(check_nan_inf_level); namespace paddle { namespace framework { namespace details { void SetNanInfDebugPath(const std::string& nan_inf_path); std::string GetNanPath(); template ::value, bool> = true> HOSTDEVICE bool NeedPrint(MT max_value, MT min_value, int check_nan_inf_level) { if (check_nan_inf_level >= 3) { return true; } else if (check_nan_inf_level >= 2) { MT fp16_max = static_cast(std::numeric_limits::max()); return max_value > fp16_max || min_value < -fp16_max; } return false; } template ::value, bool> = true> HOSTDEVICE bool NeedPrint(MT max_value, MT min_value, int check_nan_inf_level) { if (check_nan_inf_level >= 3) { return true; } return false; } template HOSTDEVICE void PrintForDifferentLevel(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, MT max_value, MT min_value, MT mean_value, int check_nan_inf_level) { if (num_nan > 0 || num_inf > 0) { printf( "[PRECISION] [ERROR] in %s, numel=%lld, num_nan=%lld, " "num_inf=%lld, max=%e, min=%e, mean=%e\n", debug_info, static_cast(numel), // NOLINT static_cast(num_nan), // NOLINT static_cast(num_inf), // NOLINT static_cast(max_value), static_cast(min_value), static_cast(mean_value)); if (check_nan_inf_level == 0) { #if defined(__NVCC__) || defined(__HIPCC__) PADDLE_ENFORCE(false, "There are NAN or INF (num_nan=%ld, num_inf=%lld) in %s.", static_cast(num_nan), // NOLINT static_cast(num_inf), // NOLINT debug_info); #else PADDLE_THROW(platform::errors::PreconditionNotMet( "There are NAN or INF (num_nan=%lld, num_inf=%lld) in %s.", static_cast(num_nan), // NOLINT static_cast(num_inf), // NOLINT debug_info)); #endif } } else if (NeedPrint(max_value, min_value, check_nan_inf_level)) { printf("[PRECISION] in %s, numel=%lld, max=%e, min=%e, mean=%e\n", debug_info, static_cast(numel), // NOLINT static_cast(max_value), static_cast(min_value), static_cast(mean_value)); } } template void PrintForDifferentLevelFile(const char* debug_info, int64_t numel, int64_t num_nan, int64_t num_inf, MT max_value, MT min_value, MT mean_value, int check_nan_inf_level, const std::string& log_name) { int dev_id = 0; #ifdef PADDLE_WITH_HIP hipGetDevice(&dev_id); #elif PADDLE_WITH_CUDA cudaGetDevice(&dev_id); #endif auto file_path = GetNanPath(); MKDIR(file_path.c_str()); std::string file_name = "worker_" + log_name + "." + std::to_string(dev_id); std::string path = file_path + file_name; std::ofstream outfile(path, std::ios::app); if (!outfile.is_open()) { return; } if (num_nan > 0 || num_inf > 0) { outfile << "[PRECISION] [ERROR] in " << debug_info << ", numel=" << static_cast(numel) // NOLINT << ", num_nan=" << static_cast(num_nan) // NOLINT << ", num_inf=" << static_cast(num_inf) // NOLINT << ", max=" << static_cast(max_value) << ", min=" << static_cast(min_value) << ", mean=" << static_cast(mean_value) << std::endl; } else if (NeedPrint(max_value, min_value, check_nan_inf_level)) { outfile << "[PRECISION] in " << debug_info << ", numel=" << static_cast(numel) // NOLINT << ", max=" << static_cast(max_value) << ", min=" << static_cast(min_value) << ", mean=" << static_cast(mean_value) << std::endl; } outfile.close(); } template inline std::string GetCpuHintString(const std::string& op_type, const std::string& var_name, const phi::Place& place, int device_id = -1) { std::string dtype_str = DataTypeToString(DataTypeTrait::DataType()); if (dtype_str == "float") { dtype_str = "fp32"; } else if (dtype_str == "double") { dtype_str = "fp64"; } else if (dtype_str == "::paddle::platform::float16") { dtype_str = "fp16"; } else if (dtype_str == "::paddle::platform::bfloat16") { dtype_str = "bf16"; } std::stringstream ss; if (platform::is_gpu_place(place)) { ss << "[device=gpu:" << device_id << ", "; } else { ss << "[device=cpu, "; } ss << "op=" << op_type << ", tensor=" << var_name << ", dtype=" << dtype_str << "]"; return ss.str(); } template < typename T, std::enable_if_t>::value && !std::is_same>::value, bool> = true> static void CheckNanInfCpuImpl(const T* value_ptr, const int64_t numel, const std::string& cpu_hint_str, const std::string log_name = "cpu") { using MT = typename phi::dtype::template MPTypeTrait::Type; #ifdef _OPENMP // Use maximum 4 threads to collect the nan and inf information. int num_threads = std::max(omp_get_num_threads(), 1); num_threads = std::min(num_threads, 4); #else int num_threads = 1; #endif std::vector thread_num_nan(num_threads, 0); std::vector thread_num_inf(num_threads, 0); std::vector thread_min_value(num_threads, static_cast(value_ptr[0])); std::vector thread_max_value(num_threads, static_cast(value_ptr[0])); std::vector thread_mean_value(num_threads, static_cast(0)); #ifdef _OPENMP #pragma omp parallel num_threads(num_threads) #endif { #ifdef _OPENMP int64_t tid = omp_get_thread_num(); int64_t chunk_size = (numel + num_threads - 1) / num_threads; int64_t begin = tid * chunk_size; int64_t end = chunk_size + begin > numel ? numel : chunk_size + begin; #else int64_t tid = 0; int64_t begin = 0; int64_t end = numel; #endif for (int64_t i = begin; i < end; ++i) { MT value = static_cast(value_ptr[i]); thread_min_value[tid] = std::min(thread_min_value[tid], value); thread_max_value[tid] = std::max(thread_max_value[tid], value); thread_mean_value[tid] += value / static_cast(numel); if (std::isnan(value)) { thread_num_nan[tid] += 1; } else if (std::isinf(value)) { thread_num_inf[tid] += 1; } } } int64_t num_nan = 0; int64_t num_inf = 0; MT min_value = thread_min_value[0]; MT max_value = thread_max_value[0]; MT mean_value = static_cast(0); for (int i = 0; i < num_threads; ++i) { num_nan += thread_num_nan[i]; num_inf += thread_num_inf[i]; min_value = std::min(thread_min_value[i], min_value); max_value = std::max(thread_max_value[i], max_value); mean_value += thread_mean_value[i]; } auto file_path = GetNanPath(); // Write log to file if (file_path.size() > 0) { VLOG(4) << "[FLAGS_check_nan_inf_level=" << FLAGS_check_nan_inf_level << "]. Write log to " << file_path; PrintForDifferentLevelFile(cpu_hint_str.c_str(), numel, num_nan, num_inf, max_value, min_value, mean_value, FLAGS_check_nan_inf_level, log_name); return; } PrintForDifferentLevel(cpu_hint_str.c_str(), numel, num_nan, num_inf, max_value, min_value, mean_value, FLAGS_check_nan_inf_level); } template < typename T, std::enable_if_t>::value || std::is_same>::value, bool> = true> void CheckNanInfCpuImpl(const T* value_ptr, const int64_t numel, const std::string& cpu_hint_str, const std::string log_name = "cpu") { using RealType = typename T::value_type; RealType real_sum = 0.0f, imag_sum = 0.0f; #ifdef _OPENMP #pragma omp parallel for reduction(+ : real_sum) reduction(+ : imag_sum) #endif for (int64_t i = 0; i < numel; ++i) { T value = value_ptr[i]; real_sum += (value.real - value.real); imag_sum += (value.imag - value.imag); } if (std::isnan(real_sum) || std::isinf(real_sum) || std::isnan(imag_sum) || std::isinf(imag_sum)) { // hot fix for compile failed in gcc4.8 // here also need print detail info of nan or inf later PADDLE_THROW(platform::errors::PreconditionNotMet( "There are NAN or INF in %s.", cpu_hint_str)); } } template struct TensorCheckerVisitor { TensorCheckerVisitor(const std::string& o, const std::string& v, const phi::DenseTensor& t, const platform::Place& p) : op_type(o), var_name(v), tensor(t), place(p) {} template void apply( typename std::enable_if::value>::type* = 0) const { VLOG(10) << var_name << " need not to check, it's type is not float point"; } template void apply( typename std::enable_if< std::is_floating_point::value || std::is_same>::value || std::is_same>::value>::type* = 0) const; std::string op_type; std::string var_name; const phi::DenseTensor& tensor; const platform::Place& place; }; template void tensor_check(const std::string& op_type, const std::string& var_name, const phi::DenseTensor& tensor, const platform::Place& place); } // namespace details } // namespace framework } // namespace paddle