move some function of cuda error from enforce.h to enforce.cc (#52828)

e64ce0bb · zyfncg · GitHub · 802129b3 · e64ce0bb · e64ce0bb
显示空白变更内容
内联并排

Showing with 188 addition and 150 deletion

paddle/phi/core/enforce.cc paddle/phi/core/enforce.cc +173 -0

paddle/phi/core/enforce.h paddle/phi/core/enforce.h +15 -150

未找到文件。
--- a/paddle/phi/core/enforce.cc
+++ b/paddle/phi/core/enforce.cc
@@ -23,6 +23,10 @@ limitations under the License. */
 #include "paddle/phi/common/scalar.h"
 #include "paddle/utils/blank.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/phi/core/external_error.pb.h"
+#endif  // PADDLE_WITH_CUDA
 DECLARE_int32(call_stack_level);
 namespace egr {
@@ -177,5 +181,174 @@ std::string SimplifyErrorTypeFormat(const std::string& str) {
  return sout.str();
 }
+/**************************************************************************/
+/**************************** NVIDIA ERROR ********************************/
+#ifdef PADDLE_WITH_CUDA
+namespace details {
+template <typename T>
+struct ExternalApiProtoType {};
+#define DEFINE_EXTERNAL_API_PROTO_TYPE(type, proto_type)    \
+  template <>                                               \
+  struct ExternalApiProtoType<type> {                       \
+    using Type = type;                                      \
+    static constexpr const char* kTypeString = #proto_type; \
+    static constexpr phi::proto::ApiType kProtoType =       \
+        phi::proto::ApiType::proto_type;                    \
+  }
+DEFINE_EXTERNAL_API_PROTO_TYPE(cudaError_t, CUDA);
+DEFINE_EXTERNAL_API_PROTO_TYPE(curandStatus_t, CURAND);
+DEFINE_EXTERNAL_API_PROTO_TYPE(cudnnStatus_t, CUDNN);
+DEFINE_EXTERNAL_API_PROTO_TYPE(cublasStatus_t, CUBLAS);
+DEFINE_EXTERNAL_API_PROTO_TYPE(cusparseStatus_t, CUSPARSE);
+DEFINE_EXTERNAL_API_PROTO_TYPE(cusolverStatus_t, CUSOLVER);
+DEFINE_EXTERNAL_API_PROTO_TYPE(cufftResult_t, CUFFT);
+DEFINE_EXTERNAL_API_PROTO_TYPE(CUresult, CU);
+#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
+DEFINE_EXTERNAL_API_PROTO_TYPE(ncclResult_t, NCCL);
+#endif
+#undef DEFINE_EXTERNAL_API_PROTO_TYPE
+}  // namespace details
+template <typename T>
+inline const char* GetErrorMsgUrl(T status) {
+  using __CUDA_STATUS_TYPE__ = decltype(status);
+  phi::proto::ApiType proto_type =
+      details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType;
+  switch (proto_type) {
+    case phi::proto::ApiType::CUDA:
+    case phi::proto::ApiType::CU:
+      return "https://docs.nvidia.com/cuda/cuda-runtime-api/"
+             "group__CUDART__TYPES.html#group__CUDART__TYPES_"
+             "1g3f51e3575c2178246db0a94a430e0038";
+      break;
+    case phi::proto::ApiType::CURAND:
+      return "https://docs.nvidia.com/cuda/curand/"
+             "group__HOST.html#group__HOST_1gb94a31d5c165858c96b6c18b70644437";
+      break;
+    case phi::proto::ApiType::CUDNN:
+      return "https://docs.nvidia.com/deeplearning/cudnn/api/"
+             "index.html#cudnnStatus_t";
+      break;
+    case phi::proto::ApiType::CUBLAS:
+      return "https://docs.nvidia.com/cuda/cublas/index.html#cublasstatus_t";
+      break;
+    case phi::proto::ApiType::CUSOLVER:
+      return "https://docs.nvidia.com/cuda/cusolver/"
+             "index.html#cuSolverSPstatus";
+      break;
+    case phi::proto::ApiType::NCCL:
+      return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/"
+             "types.html#ncclresult-t";
+      break;
+    case phi::proto::ApiType::CUFFT:
+      return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult";
+    case phi::proto::ApiType::CUSPARSE:
+      return "https://docs.nvidia.com/cuda/cusparse/"
+             "index.html#cusparseStatus_t";
+      break;
+    default:
+      return "Unknown type of External API, can't get error message URL!";
+      break;
+  }
+}
+template <typename T>
+std::string GetExternalErrorMsg(T status) {
+  std::ostringstream sout;
+  bool _initSucceed = false;
+  phi::proto::ExternalErrorDesc externalError;
+  if (externalError.ByteSizeLong() == 0) {
+    std::string filePath;
+#if !defined(_WIN32)
+    Dl_info info;
+    if (dladdr(reinterpret_cast<void*>(GetCurrentTraceBackString), &info)) {
+      std::string strModule(info.dli_fname);
+      const size_t last_slash_idx = strModule.find_last_of("/");
+      std::string compare_path = strModule.substr(strModule.length() - 6);
+      if (std::string::npos != last_slash_idx) {
+        strModule.erase(last_slash_idx, std::string::npos);
+      }
+      if (compare_path.compare("avx.so") == 0) {
+        filePath =
+            strModule +
+            "/../include/third_party/externalError/data/externalErrorMsg.pb";
+      } else {
+        filePath = strModule +
+                   "/../../third_party/externalError/data/externalErrorMsg.pb";
+      }
+    }
+#else
+    char buf[512];
+    MEMORY_BASIC_INFORMATION mbi;
+    HMODULE h_module =
+        (::VirtualQuery(GetCurrentTraceBackString, &mbi, sizeof(mbi)) != 0)
+            ? (HMODULE)mbi.AllocationBase
+            : NULL;
+    GetModuleFileName(h_module, buf, 512);
+    std::string strModule(buf);
+    const size_t last_slash_idx = strModule.find_last_of("\\");
+    std::string compare_path = strModule.substr(strModule.length() - 7);
+    if (std::string::npos != last_slash_idx) {
+      strModule.erase(last_slash_idx, std::string::npos);
+    }
+    if (compare_path.compare("avx.pyd") == 0) {
+      filePath = strModule +
+                 "\\..\\include\\third_"
+                 "party\\externalerror\\data\\externalErrorMsg.pb";
+    } else {
+      filePath =
+          strModule +
+          "\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb";
+    }
+#endif
+    std::ifstream fin(filePath, std::ios::in | std::ios::binary);
+    _initSucceed = externalError.ParseFromIstream(&fin);
+  }
+  using __CUDA_STATUS_TYPE__ = decltype(status);
+  phi::proto::ApiType proto_type =
+      details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kProtoType;
+  if (_initSucceed) {
+    for (int i = 0; i < externalError.errors_size(); ++i) {
+      if (proto_type == externalError.errors(i).type()) {
+        for (int j = 0; j < externalError.errors(i).messages_size(); ++j) {
+          if (status == externalError.errors(i).messages(j).code()) {
+            sout << "\n  [Hint: "
+                 << externalError.errors(i).messages(j).message() << "]";
+            return sout.str();
+          }
+        }
+      }
+    }
+  }
+  sout << "\n  [Hint: Please search for the error code(" << status
+       << ") on website (" << GetErrorMsgUrl(status)
+       << ") to get Nvidia's official solution and advice about "
+       << details::ExternalApiProtoType<__CUDA_STATUS_TYPE__>::kTypeString
+       << " Error.]";
+  return sout.str();
+}
+template std::string GetExternalErrorMsg<cudaError_t>(cudaError_t);
+template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t);
+template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t);
+template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t);
+template std::string GetExternalErrorMsg<cusparseStatus_t>(cusparseStatus_t);
+template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t);
+template std::string GetExternalErrorMsg<cufftResult_t>(cufftResult_t);
+template std::string GetExternalErrorMsg<CUresult>(CUresult);
+#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
+template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t);
+#endif
+#endif  // PADDLE_WITH_CUDA
 }  // namespace enforce
 }  // namespace phi
--- a/paddle/phi/core/enforce.h
+++ b/paddle/phi/core/enforce.h
@@ -33,8 +33,6 @@ limitations under the License. */
 #include <cusparse.h>
 #include <thrust/system/cuda/error.h>
 #include <thrust/system_error.h>
-#include "paddle/phi/core/external_error.pb.h"
 #endif  // PADDLE_WITH_CUDA
 #ifdef PADDLE_WITH_HIP
@@ -90,7 +88,6 @@ limitations under the License. */
 #endif  // PADDLE_WITH_HIP
 // Note: these headers for simplify demangle type string
-#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/type_defs.h"
 // Note: this header for simplify HIP and CUDA type string
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -615,162 +612,30 @@ namespace details {
 template <typename T>
 struct ExternalApiType {};
-#define DEFINE_EXTERNAL_API_TYPE(type, success_value, proto_type) \
+#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \
  template <>                                         \
  struct ExternalApiType<type> {                      \
    using Type = type;                                \
    static constexpr Type kSuccess = success_value;   \
-    static constexpr const char* kTypeString = #proto_type;       \
-    static constexpr phi::proto::ApiType kProtoType =             \
-        phi::proto::ApiType::proto_type;                          \
  }
-DEFINE_EXTERNAL_API_TYPE(cudaError_t, cudaSuccess, CUDA);
+DEFINE_EXTERNAL_API_TYPE(cudaError_t, cudaSuccess);
-DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND);
+DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN);
+DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS);
+DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(cusparseStatus_t, CUSPARSE_STATUS_SUCCESS, CUSPARSE);
+DEFINE_EXTERNAL_API_TYPE(cusparseStatus_t, CUSPARSE_STATUS_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER);
+DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT);
+DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS);
-DEFINE_EXTERNAL_API_TYPE(CUresult, CUDA_SUCCESS, CU);
+DEFINE_EXTERNAL_API_TYPE(CUresult, CUDA_SUCCESS);
 #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
-DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL);
+DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess);
 #endif
 }  // namespace details
 template <typename T>
-inline const char* GetErrorMsgUrl(T status) {
+std::string GetExternalErrorMsg(T status);
-  using __CUDA_STATUS_TYPE__ = decltype(status);
-  phi::proto::ApiType proto_type =
-      details::ExternalApiType<__CUDA_STATUS_TYPE__>::kProtoType;
-  switch (proto_type) {
-    case phi::proto::ApiType::CUDA:
-    case phi::proto::ApiType::CU:
-      return "https://docs.nvidia.com/cuda/cuda-runtime-api/"
-             "group__CUDART__TYPES.html#group__CUDART__TYPES_"
-             "1g3f51e3575c2178246db0a94a430e0038";
-      break;
-    case phi::proto::ApiType::CURAND:
-      return "https://docs.nvidia.com/cuda/curand/"
-             "group__HOST.html#group__HOST_1gb94a31d5c165858c96b6c18b70644437";
-      break;
-    case phi::proto::ApiType::CUDNN:
-      return "https://docs.nvidia.com/deeplearning/cudnn/api/"
-             "index.html#cudnnStatus_t";
-      break;
-    case phi::proto::ApiType::CUBLAS:
-      return "https://docs.nvidia.com/cuda/cublas/index.html#cublasstatus_t";
-      break;
-    case phi::proto::ApiType::CUSOLVER:
-      return "https://docs.nvidia.com/cuda/cusolver/"
-             "index.html#cuSolverSPstatus";
-      break;
-    case phi::proto::ApiType::NCCL:
-      return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/"
-             "types.html#ncclresult-t";
-      break;
-    case phi::proto::ApiType::CUFFT:
-      return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult";
-    case phi::proto::ApiType::CUSPARSE:
-      return "https://docs.nvidia.com/cuda/cusparse/"
-             "index.html#cusparseStatus_t";
-      break;
-    default:
-      return "Unknown type of External API, can't get error message URL!";
-      break;
-  }
-}
-template <typename T>
-inline std::string GetExternalErrorMsg(T status) {
-  std::ostringstream sout;
-  bool _initSucceed = false;
-  phi::proto::ExternalErrorDesc externalError;
-  if (externalError.ByteSizeLong() == 0) {
-    std::string filePath;
-#if !defined(_WIN32)
-    Dl_info info;
-    if (dladdr(reinterpret_cast<void*>(GetCurrentTraceBackString), &info)) {
-      std::string strModule(info.dli_fname);
-      const size_t last_slash_idx = strModule.find_last_of("/");
-      std::string compare_path = strModule.substr(strModule.length() - 6);
-      if (std::string::npos != last_slash_idx) {
-        strModule.erase(last_slash_idx, std::string::npos);
-      }
-      if (compare_path.compare("avx.so") == 0) {
-        filePath =
-            strModule +
-            "/../include/third_party/externalError/data/externalErrorMsg.pb";
-      } else {
-        filePath = strModule +
-                   "/../../third_party/externalError/data/externalErrorMsg.pb";
-      }
-    }
-#else
-    char buf[512];
-    MEMORY_BASIC_INFORMATION mbi;
-    HMODULE h_module =
-        (::VirtualQuery(GetCurrentTraceBackString, &mbi, sizeof(mbi)) != 0)
-            ? (HMODULE)mbi.AllocationBase
-            : NULL;
-    GetModuleFileName(h_module, buf, 512);
-    std::string strModule(buf);
-    const size_t last_slash_idx = strModule.find_last_of("\\");
-    std::string compare_path = strModule.substr(strModule.length() - 7);
-    if (std::string::npos != last_slash_idx) {
-      strModule.erase(last_slash_idx, std::string::npos);
-    }
-    if (compare_path.compare("avx.pyd") == 0) {
-      filePath = strModule +
-                 "\\..\\include\\third_"
-                 "party\\externalerror\\data\\externalErrorMsg.pb";
-    } else {
-      filePath =
-          strModule +
-          "\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb";
-    }
-#endif
-    std::ifstream fin(filePath, std::ios::in | std::ios::binary);
-    _initSucceed = externalError.ParseFromIstream(&fin);
-  }
-  using __CUDA_STATUS_TYPE__ = decltype(status);
-  phi::proto::ApiType proto_type =
-      details::ExternalApiType<__CUDA_STATUS_TYPE__>::kProtoType;
-  if (_initSucceed) {
-    for (int i = 0; i < externalError.errors_size(); ++i) {
-      if (proto_type == externalError.errors(i).type()) {
-        for (int j = 0; j < externalError.errors(i).messages_size(); ++j) {
-          if (status == externalError.errors(i).messages(j).code()) {
-            sout << "\n  [Hint: "
-                 << externalError.errors(i).messages(j).message() << "]";
-            return sout.str();
-          }
-        }
-      }
-    }
-  }
-  sout << "\n  [Hint: Please search for the error code(" << status
-       << ") on website (" << GetErrorMsgUrl(status)
-       << ") to get Nvidia's official solution and advice about "
-       << details::ExternalApiType<__CUDA_STATUS_TYPE__>::kTypeString
-       << " Error.]";
-  return sout.str();
-}
-template std::string GetExternalErrorMsg<cudaError_t>(cudaError_t);
-template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t);
-template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t);
-template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t);
-template std::string GetExternalErrorMsg<cusparseStatus_t>(cusparseStatus_t);
-template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t);
-template std::string GetExternalErrorMsg<cufftResult_t>(cufftResult_t);
-template std::string GetExternalErrorMsg<CUresult>(CUresult);
-#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
-template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t);
-#endif
 /*************** CUDA ERROR ***************/
 inline bool is_error(cudaError_t e) { return e != cudaSuccess; }