diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc index 897ca5fe5c5ece5652f236ab8a334a553ae4c9e5..0fce1eee7005dfc68ff46c73a2955c8be7a794fa 100644 --- a/paddle/phi/core/enforce.cc +++ b/paddle/phi/core/enforce.cc @@ -23,6 +23,10 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" #include "paddle/utils/blank.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/phi/core/external_error.pb.h" +#endif // PADDLE_WITH_CUDA + DECLARE_int32(call_stack_level); namespace egr { @@ -177,5 +181,174 @@ std::string SimplifyErrorTypeFormat(const std::string& str) { return sout.str(); } +/**************************************************************************/ +/**************************** NVIDIA ERROR ********************************/ +#ifdef PADDLE_WITH_CUDA + +namespace details { + +template +struct ExternalApiProtoType {}; + +#define DEFINE_EXTERNAL_API_PROTO_TYPE(type, proto_type) \ + template <> \ + struct ExternalApiProtoType { \ + using Type = type; \ + static constexpr const char* kTypeString = #proto_type; \ + static constexpr phi::proto::ApiType kProtoType = \ + phi::proto::ApiType::proto_type; \ + } + +DEFINE_EXTERNAL_API_PROTO_TYPE(cudaError_t, CUDA); +DEFINE_EXTERNAL_API_PROTO_TYPE(curandStatus_t, CURAND); +DEFINE_EXTERNAL_API_PROTO_TYPE(cudnnStatus_t, CUDNN); +DEFINE_EXTERNAL_API_PROTO_TYPE(cublasStatus_t, CUBLAS); +DEFINE_EXTERNAL_API_PROTO_TYPE(cusparseStatus_t, CUSPARSE); +DEFINE_EXTERNAL_API_PROTO_TYPE(cusolverStatus_t, CUSOLVER); +DEFINE_EXTERNAL_API_PROTO_TYPE(cufftResult_t, CUFFT); +DEFINE_EXTERNAL_API_PROTO_TYPE(CUresult, CU); + +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) +DEFINE_EXTERNAL_API_PROTO_TYPE(ncclResult_t, NCCL); +#endif + +#undef DEFINE_EXTERNAL_API_PROTO_TYPE + +} // namespace details + +template +inline const char* GetErrorMsgUrl(T status) { + using __CUDA_STATUS_TYPE__ = decltype(status); + phi::proto::ApiType proto_type = + details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType; + switch (proto_type) { + case phi::proto::ApiType::CUDA: + case phi::proto::ApiType::CU: + return "https://docs.nvidia.com/cuda/cuda-runtime-api/" + "group__CUDART__TYPES.html#group__CUDART__TYPES_" + "1g3f51e3575c2178246db0a94a430e0038"; + break; + case phi::proto::ApiType::CURAND: + return "https://docs.nvidia.com/cuda/curand/" + "group__HOST.html#group__HOST_1gb94a31d5c165858c96b6c18b70644437"; + break; + case phi::proto::ApiType::CUDNN: + return "https://docs.nvidia.com/deeplearning/cudnn/api/" + "index.html#cudnnStatus_t"; + break; + case phi::proto::ApiType::CUBLAS: + return "https://docs.nvidia.com/cuda/cublas/index.html#cublasstatus_t"; + break; + case phi::proto::ApiType::CUSOLVER: + return "https://docs.nvidia.com/cuda/cusolver/" + "index.html#cuSolverSPstatus"; + break; + case phi::proto::ApiType::NCCL: + return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/" + "types.html#ncclresult-t"; + break; + case phi::proto::ApiType::CUFFT: + return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult"; + case phi::proto::ApiType::CUSPARSE: + return "https://docs.nvidia.com/cuda/cusparse/" + "index.html#cusparseStatus_t"; + break; + default: + return "Unknown type of External API, can't get error message URL!"; + break; + } +} + +template +std::string GetExternalErrorMsg(T status) { + std::ostringstream sout; + bool _initSucceed = false; + phi::proto::ExternalErrorDesc externalError; + if (externalError.ByteSizeLong() == 0) { + std::string filePath; +#if !defined(_WIN32) + Dl_info info; + if (dladdr(reinterpret_cast(GetCurrentTraceBackString), &info)) { + std::string strModule(info.dli_fname); + const size_t last_slash_idx = strModule.find_last_of("/"); + std::string compare_path = strModule.substr(strModule.length() - 6); + if (std::string::npos != last_slash_idx) { + strModule.erase(last_slash_idx, std::string::npos); + } + if (compare_path.compare("avx.so") == 0) { + filePath = + strModule + + "/../include/third_party/externalError/data/externalErrorMsg.pb"; + } else { + filePath = strModule + + "/../../third_party/externalError/data/externalErrorMsg.pb"; + } + } +#else + char buf[512]; + MEMORY_BASIC_INFORMATION mbi; + HMODULE h_module = + (::VirtualQuery(GetCurrentTraceBackString, &mbi, sizeof(mbi)) != 0) + ? (HMODULE)mbi.AllocationBase + : NULL; + GetModuleFileName(h_module, buf, 512); + std::string strModule(buf); + const size_t last_slash_idx = strModule.find_last_of("\\"); + std::string compare_path = strModule.substr(strModule.length() - 7); + if (std::string::npos != last_slash_idx) { + strModule.erase(last_slash_idx, std::string::npos); + } + if (compare_path.compare("avx.pyd") == 0) { + filePath = strModule + + "\\..\\include\\third_" + "party\\externalerror\\data\\externalErrorMsg.pb"; + } else { + filePath = + strModule + + "\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb"; + } +#endif + std::ifstream fin(filePath, std::ios::in | std::ios::binary); + _initSucceed = externalError.ParseFromIstream(&fin); + } + using __CUDA_STATUS_TYPE__ = decltype(status); + phi::proto::ApiType proto_type = + details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType; + if (_initSucceed) { + for (int i = 0; i < externalError.errors_size(); ++i) { + if (proto_type == externalError.errors(i).type()) { + for (int j = 0; j < externalError.errors(i).messages_size(); ++j) { + if (status == externalError.errors(i).messages(j).code()) { + sout << "\n [Hint: " + << externalError.errors(i).messages(j).message() << "]"; + return sout.str(); + } + } + } + } + } + + sout << "\n [Hint: Please search for the error code(" << status + << ") on website (" << GetErrorMsgUrl(status) + << ") to get Nvidia's official solution and advice about " + << details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kTypeString + << " Error.]"; + return sout.str(); +} + +template std::string GetExternalErrorMsg(cudaError_t); +template std::string GetExternalErrorMsg(curandStatus_t); +template std::string GetExternalErrorMsg(cudnnStatus_t); +template std::string GetExternalErrorMsg(cublasStatus_t); +template std::string GetExternalErrorMsg(cusparseStatus_t); +template std::string GetExternalErrorMsg(cusolverStatus_t); +template std::string GetExternalErrorMsg(cufftResult_t); +template std::string GetExternalErrorMsg(CUresult); +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) +template std::string GetExternalErrorMsg(ncclResult_t); +#endif + +#endif // PADDLE_WITH_CUDA + } // namespace enforce } // namespace phi diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h index 96006fe83a42b8092ce563768fe28498c8b0a7dd..d0b240e89417d2081fd27f784499002206c30542 100644 --- a/paddle/phi/core/enforce.h +++ b/paddle/phi/core/enforce.h @@ -33,8 +33,6 @@ limitations under the License. */ #include #include #include - -#include "paddle/phi/core/external_error.pb.h" #endif // PADDLE_WITH_CUDA #ifdef PADDLE_WITH_HIP @@ -90,7 +88,6 @@ limitations under the License. */ #endif // PADDLE_WITH_HIP // Note: these headers for simplify demangle type string -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/type_defs.h" // Note: this header for simplify HIP and CUDA type string #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -615,162 +612,30 @@ namespace details { template struct ExternalApiType {}; -#define DEFINE_EXTERNAL_API_TYPE(type, success_value, proto_type) \ - template <> \ - struct ExternalApiType { \ - using Type = type; \ - static constexpr Type kSuccess = success_value; \ - static constexpr const char* kTypeString = #proto_type; \ - static constexpr phi::proto::ApiType kProtoType = \ - phi::proto::ApiType::proto_type; \ +#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \ + template <> \ + struct ExternalApiType { \ + using Type = type; \ + static constexpr Type kSuccess = success_value; \ } -DEFINE_EXTERNAL_API_TYPE(cudaError_t, cudaSuccess, CUDA); -DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND); -DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN); -DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS); -DEFINE_EXTERNAL_API_TYPE(cusparseStatus_t, CUSPARSE_STATUS_SUCCESS, CUSPARSE); -DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER); -DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT); -DEFINE_EXTERNAL_API_TYPE(CUresult, CUDA_SUCCESS, CU); +DEFINE_EXTERNAL_API_TYPE(cudaError_t, cudaSuccess); +DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(cusparseStatus_t, CUSPARSE_STATUS_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS); +DEFINE_EXTERNAL_API_TYPE(CUresult, CUDA_SUCCESS); #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) -DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL); +DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess); #endif } // namespace details template -inline const char* GetErrorMsgUrl(T status) { - using __CUDA_STATUS_TYPE__ = decltype(status); - phi::proto::ApiType proto_type = - details::ExternalApiType<__CUDA_STATUS_TYPE__>::kProtoType; - switch (proto_type) { - case phi::proto::ApiType::CUDA: - case phi::proto::ApiType::CU: - return "https://docs.nvidia.com/cuda/cuda-runtime-api/" - "group__CUDART__TYPES.html#group__CUDART__TYPES_" - "1g3f51e3575c2178246db0a94a430e0038"; - break; - case phi::proto::ApiType::CURAND: - return "https://docs.nvidia.com/cuda/curand/" - "group__HOST.html#group__HOST_1gb94a31d5c165858c96b6c18b70644437"; - break; - case phi::proto::ApiType::CUDNN: - return "https://docs.nvidia.com/deeplearning/cudnn/api/" - "index.html#cudnnStatus_t"; - break; - case phi::proto::ApiType::CUBLAS: - return "https://docs.nvidia.com/cuda/cublas/index.html#cublasstatus_t"; - break; - case phi::proto::ApiType::CUSOLVER: - return "https://docs.nvidia.com/cuda/cusolver/" - "index.html#cuSolverSPstatus"; - break; - case phi::proto::ApiType::NCCL: - return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/" - "types.html#ncclresult-t"; - break; - case phi::proto::ApiType::CUFFT: - return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult"; - case phi::proto::ApiType::CUSPARSE: - return "https://docs.nvidia.com/cuda/cusparse/" - "index.html#cusparseStatus_t"; - break; - default: - return "Unknown type of External API, can't get error message URL!"; - break; - } -} - -template -inline std::string GetExternalErrorMsg(T status) { - std::ostringstream sout; - bool _initSucceed = false; - phi::proto::ExternalErrorDesc externalError; - if (externalError.ByteSizeLong() == 0) { - std::string filePath; -#if !defined(_WIN32) - Dl_info info; - if (dladdr(reinterpret_cast(GetCurrentTraceBackString), &info)) { - std::string strModule(info.dli_fname); - const size_t last_slash_idx = strModule.find_last_of("/"); - std::string compare_path = strModule.substr(strModule.length() - 6); - if (std::string::npos != last_slash_idx) { - strModule.erase(last_slash_idx, std::string::npos); - } - if (compare_path.compare("avx.so") == 0) { - filePath = - strModule + - "/../include/third_party/externalError/data/externalErrorMsg.pb"; - } else { - filePath = strModule + - "/../../third_party/externalError/data/externalErrorMsg.pb"; - } - } -#else - char buf[512]; - MEMORY_BASIC_INFORMATION mbi; - HMODULE h_module = - (::VirtualQuery(GetCurrentTraceBackString, &mbi, sizeof(mbi)) != 0) - ? (HMODULE)mbi.AllocationBase - : NULL; - GetModuleFileName(h_module, buf, 512); - std::string strModule(buf); - const size_t last_slash_idx = strModule.find_last_of("\\"); - std::string compare_path = strModule.substr(strModule.length() - 7); - if (std::string::npos != last_slash_idx) { - strModule.erase(last_slash_idx, std::string::npos); - } - if (compare_path.compare("avx.pyd") == 0) { - filePath = strModule + - "\\..\\include\\third_" - "party\\externalerror\\data\\externalErrorMsg.pb"; - } else { - filePath = - strModule + - "\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb"; - } -#endif - std::ifstream fin(filePath, std::ios::in | std::ios::binary); - _initSucceed = externalError.ParseFromIstream(&fin); - } - using __CUDA_STATUS_TYPE__ = decltype(status); - phi::proto::ApiType proto_type = - details::ExternalApiType<__CUDA_STATUS_TYPE__>::kProtoType; - if (_initSucceed) { - for (int i = 0; i < externalError.errors_size(); ++i) { - if (proto_type == externalError.errors(i).type()) { - for (int j = 0; j < externalError.errors(i).messages_size(); ++j) { - if (status == externalError.errors(i).messages(j).code()) { - sout << "\n [Hint: " - << externalError.errors(i).messages(j).message() << "]"; - return sout.str(); - } - } - } - } - } - - sout << "\n [Hint: Please search for the error code(" << status - << ") on website (" << GetErrorMsgUrl(status) - << ") to get Nvidia's official solution and advice about " - << details::ExternalApiType<__CUDA_STATUS_TYPE__>::kTypeString - << " Error.]"; - return sout.str(); -} - -template std::string GetExternalErrorMsg(cudaError_t); -template std::string GetExternalErrorMsg(curandStatus_t); -template std::string GetExternalErrorMsg(cudnnStatus_t); -template std::string GetExternalErrorMsg(cublasStatus_t); -template std::string GetExternalErrorMsg(cusparseStatus_t); -template std::string GetExternalErrorMsg(cusolverStatus_t); -template std::string GetExternalErrorMsg(cufftResult_t); -template std::string GetExternalErrorMsg(CUresult); -#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) -template std::string GetExternalErrorMsg(ncclResult_t); -#endif +std::string GetExternalErrorMsg(T status); /*************** CUDA ERROR ***************/ inline bool is_error(cudaError_t e) { return e != cudaSuccess; }