/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/enforce.h" #include #include #include #include #include "gflags/gflags.h" #include "glog/logging.h" #include "paddle/phi/common/scalar.h" #include "paddle/utils/blank.h" #ifdef PADDLE_WITH_CUDA #include "paddle/phi/core/external_error.pb.h" #endif // PADDLE_WITH_CUDA DECLARE_int32(call_stack_level); namespace egr { class EagerVariable; } namespace paddle { namespace framework { class VarDesc; class BlockDesc; using Attribute = paddle::variant, std::vector, std::vector, bool, std::vector, BlockDesc*, int64_t, std::vector, std::vector, std::vector, VarDesc*, std::vector, double, paddle::experimental::Scalar, std::vector>; using AttributeMap = std::unordered_map; } // namespace framework namespace imperative { class VariableWrapper; class SavedVariableWrapperList; class VarBase; namespace details { template struct NameVarMapTrait {}; template <> struct NameVarMapTrait { using Type = std::map>>; }; template <> struct NameVarMapTrait { using Type = std::map; }; template <> struct NameVarMapTrait { using Type = std::map>>; }; } // namespace details template using NameVarMap = typename details::NameVarMapTrait::Type; using NameVarBaseMap = NameVarMap; using NameVariableWrapperMap = NameVarMap; using NameTensorMap = NameVarMap; } // namespace imperative } // namespace paddle namespace phi { namespace enforce { int GetCallStackLevel() { return FLAGS_call_stack_level; } template static std::string ReplaceComplexTypeStr(std::string str, const std::string& type_name) { auto demangle_type_str = demangle(typeid(T).name()); size_t start_pos = 0; while ((start_pos = str.find(demangle_type_str, start_pos)) != std::string::npos) { str.replace(start_pos, demangle_type_str.length(), type_name); start_pos += type_name.length(); } return str; } #define __REPLACE_COMPLEX_TYPE_STR__(__TYPENAME, __STR) \ do { \ __STR = \ phi::enforce::ReplaceComplexTypeStr<__TYPENAME>(__STR, #__TYPENAME); \ } while (0) static std::string SimplifyDemangleStr(std::string str) { // the older is important, you have to put complex types in front __REPLACE_COMPLEX_TYPE_STR__(paddle::framework::AttributeMap, str); __REPLACE_COMPLEX_TYPE_STR__(paddle::framework::Attribute, str); __REPLACE_COMPLEX_TYPE_STR__(paddle::imperative::NameVariableWrapperMap, str); __REPLACE_COMPLEX_TYPE_STR__(paddle::imperative::NameVarBaseMap, str); __REPLACE_COMPLEX_TYPE_STR__(paddle::imperative::NameTensorMap, str); __REPLACE_COMPLEX_TYPE_STR__(std::string, str); return str; } std::string GetCurrentTraceBackString(bool for_signal) { std::ostringstream sout; if (!for_signal) { sout << "\n\n--------------------------------------\n"; sout << "C++ Traceback (most recent call last):"; sout << "\n--------------------------------------\n"; } #if !defined(_WIN32) && !defined(PADDLE_WITH_MUSL) static constexpr int TRACE_STACK_LIMIT = 100; void* call_stack[TRACE_STACK_LIMIT]; auto size = backtrace(call_stack, TRACE_STACK_LIMIT); auto symbols = backtrace_symbols(call_stack, size); Dl_info info; int idx = 0; // `for_signal` used to remove the stack trace introduced by // obtaining the error stack trace when the signal error occurred, // that is not related to the signal error self, remove it to // avoid misleading users and developers int end_idx = for_signal ? 2 : 0; for (int i = size - 1; i >= end_idx; --i) { if (dladdr(call_stack[i], &info) && info.dli_sname) { auto demangled = demangle(info.dli_sname); std::string path(info.dli_fname); // C++ traceback info are from core.so if (path.substr(path.length() - 3).compare(".so") == 0) { sout << paddle::string::Sprintf( "%-3d %s\n", idx++, SimplifyDemangleStr(demangled)); } } } free(symbols); #else sout << "Not support stack backtrace yet.\n"; #endif return sout.str(); } void ThrowWarnInternal(const std::string& msg) { LOG(WARNING) << "WARNING :" << msg; } std::string SimplifyErrorTypeFormat(const std::string& str) { std::ostringstream sout; size_t type_end_pos = str.find(":", 0); if (type_end_pos == std::string::npos) { sout << str; } else { // Remove "Error:", add "()"" sout << "(" << str.substr(0, type_end_pos - 5) << ")" << str.substr(type_end_pos + 1); } return sout.str(); } /**************************************************************************/ /**************************** NVIDIA ERROR ********************************/ #ifdef PADDLE_WITH_CUDA namespace details { template struct ExternalApiProtoType {}; #define DEFINE_EXTERNAL_API_PROTO_TYPE(type, proto_type) \ template <> \ struct ExternalApiProtoType { \ using Type = type; \ static constexpr const char* kTypeString = #proto_type; \ static constexpr phi::proto::ApiType kProtoType = \ phi::proto::ApiType::proto_type; \ } DEFINE_EXTERNAL_API_PROTO_TYPE(cudaError_t, CUDA); DEFINE_EXTERNAL_API_PROTO_TYPE(curandStatus_t, CURAND); DEFINE_EXTERNAL_API_PROTO_TYPE(cudnnStatus_t, CUDNN); DEFINE_EXTERNAL_API_PROTO_TYPE(cublasStatus_t, CUBLAS); DEFINE_EXTERNAL_API_PROTO_TYPE(cusparseStatus_t, CUSPARSE); DEFINE_EXTERNAL_API_PROTO_TYPE(cusolverStatus_t, CUSOLVER); DEFINE_EXTERNAL_API_PROTO_TYPE(cufftResult_t, CUFFT); DEFINE_EXTERNAL_API_PROTO_TYPE(CUresult, CU); #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) DEFINE_EXTERNAL_API_PROTO_TYPE(ncclResult_t, NCCL); #endif #undef DEFINE_EXTERNAL_API_PROTO_TYPE } // namespace details template inline const char* GetErrorMsgUrl(T status) { using __CUDA_STATUS_TYPE__ = decltype(status); phi::proto::ApiType proto_type = details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType; switch (proto_type) { case phi::proto::ApiType::CUDA: case phi::proto::ApiType::CU: return "https://docs.nvidia.com/cuda/cuda-runtime-api/" "group__CUDART__TYPES.html#group__CUDART__TYPES_" "1g3f51e3575c2178246db0a94a430e0038"; break; case phi::proto::ApiType::CURAND: return "https://docs.nvidia.com/cuda/curand/" "group__HOST.html#group__HOST_1gb94a31d5c165858c96b6c18b70644437"; break; case phi::proto::ApiType::CUDNN: return "https://docs.nvidia.com/deeplearning/cudnn/api/" "index.html#cudnnStatus_t"; break; case phi::proto::ApiType::CUBLAS: return "https://docs.nvidia.com/cuda/cublas/index.html#cublasstatus_t"; break; case phi::proto::ApiType::CUSOLVER: return "https://docs.nvidia.com/cuda/cusolver/" "index.html#cuSolverSPstatus"; break; case phi::proto::ApiType::NCCL: return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/" "types.html#ncclresult-t"; break; case phi::proto::ApiType::CUFFT: return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult"; case phi::proto::ApiType::CUSPARSE: return "https://docs.nvidia.com/cuda/cusparse/" "index.html#cusparseStatus_t"; break; default: return "Unknown type of External API, can't get error message URL!"; break; } } template std::string GetExternalErrorMsg(T status) { std::ostringstream sout; bool _initSucceed = false; phi::proto::ExternalErrorDesc externalError; if (externalError.ByteSizeLong() == 0) { std::string filePath; #if !defined(_WIN32) Dl_info info; if (dladdr(reinterpret_cast(GetCurrentTraceBackString), &info)) { std::string strModule(info.dli_fname); const size_t last_slash_idx = strModule.find_last_of("/"); std::string compare_path = strModule.substr(strModule.length() - 6); if (std::string::npos != last_slash_idx) { strModule.erase(last_slash_idx, std::string::npos); } // TODO(lizhiyu02): I don't know what the 'compare_path.compare("avx.so") // == 0' means, while // 'compare_path.find("dist-packages") != std::string::npos' means that // after using 'pip install paddle'. if (compare_path.compare("avx.so") == 0 || strModule.find("dist-packages") != std::string::npos) { filePath = strModule + "/../include/third_party/externalError/data/externalErrorMsg.pb"; } else { // Just for unittest filePath = strModule + "/../third_party/externalError/data/externalErrorMsg.pb"; } } #else char buf[512]; MEMORY_BASIC_INFORMATION mbi; HMODULE h_module = (::VirtualQuery(GetCurrentTraceBackString, &mbi, sizeof(mbi)) != 0) ? (HMODULE)mbi.AllocationBase : NULL; GetModuleFileName(h_module, buf, 512); std::string strModule(buf); const size_t last_slash_idx = strModule.find_last_of("\\"); std::string compare_path = strModule.substr(strModule.length() - 7); if (std::string::npos != last_slash_idx) { strModule.erase(last_slash_idx, std::string::npos); } if (strModule.find("dist-packages") != std::string::npos) { filePath = strModule + "\\..\\include\\third_" "party\\externalerror\\data\\externalErrorMsg.pb"; } else { filePath = strModule + "\\..\\..\\third_party" "\\externalerror\\data\\externalErrorMsg.pb"; } #endif std::ifstream fin(filePath, std::ios::in | std::ios::binary); _initSucceed = externalError.ParseFromIstream(&fin); } using __CUDA_STATUS_TYPE__ = decltype(status); phi::proto::ApiType proto_type = details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType; if (_initSucceed) { for (int i = 0; i < externalError.errors_size(); ++i) { if (proto_type == externalError.errors(i).type()) { for (int j = 0; j < externalError.errors(i).messages_size(); ++j) { if (status == externalError.errors(i).messages(j).code()) { sout << "\n [Hint: " << externalError.errors(i).messages(j).message() << "]"; return sout.str(); } } } } } sout << "\n [Hint: Please search for the error code(" << status << ") on website (" << GetErrorMsgUrl(status) << ") to get Nvidia's official solution and advice about " << details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kTypeString << " Error.]"; return sout.str(); } template std::string GetExternalErrorMsg(cudaError_t); template std::string GetExternalErrorMsg(curandStatus_t); template std::string GetExternalErrorMsg(cudnnStatus_t); template std::string GetExternalErrorMsg(cublasStatus_t); template std::string GetExternalErrorMsg(cusparseStatus_t); template std::string GetExternalErrorMsg(cusolverStatus_t); template std::string GetExternalErrorMsg(cufftResult_t); template std::string GetExternalErrorMsg(CUresult); #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) template std::string GetExternalErrorMsg(ncclResult_t); #endif #endif // PADDLE_WITH_CUDA } // namespace enforce } // namespace phi