提交 b3a3e6f6 编写于 作者: C Chen Weihang 提交者: Tao Luo

change cuda enforce & add example (#21142)

上级 37e0e7a9
...@@ -81,7 +81,8 @@ class CUDADeviceContextAllocator : public Allocator { ...@@ -81,7 +81,8 @@ class CUDADeviceContextAllocator : public Allocator {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
PADDLE_ENFORCE_CUDA_SUCCESS( PADDLE_ENFORCE_CUDA_SUCCESS(
cudaEventCreate(&event_, cudaEventDisableTiming), cudaEventCreate(&event_, cudaEventDisableTiming),
"Create event failed in CUDADeviceContextAllocator"); platform::errors::External(
"Create event failed in CUDADeviceContextAllocator"));
} }
~CUDADeviceContextAllocator() { ~CUDADeviceContextAllocator() {
......
...@@ -373,6 +373,10 @@ struct EOFException : public std::exception { ...@@ -373,6 +373,10 @@ struct EOFException : public std::exception {
inline bool is_error(cudaError_t e) { return e != cudaSuccess; } inline bool is_error(cudaError_t e) { return e != cudaSuccess; }
inline std::string build_ex_string(cudaError_t e, const std::string& msg) {
return msg;
}
inline void throw_on_error(cudaError_t e, const std::string& msg) { inline void throw_on_error(cudaError_t e, const std::string& msg) {
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
throw thrust::system_error(e, thrust::cuda_category(), msg); throw thrust::system_error(e, thrust::cuda_category(), msg);
...@@ -385,6 +389,11 @@ inline bool is_error(curandStatus_t stat) { ...@@ -385,6 +389,11 @@ inline bool is_error(curandStatus_t stat) {
return stat != CURAND_STATUS_SUCCESS; return stat != CURAND_STATUS_SUCCESS;
} }
inline std::string build_ex_string(curandStatus_t stat,
const std::string& msg) {
return msg;
}
inline void throw_on_error(curandStatus_t stat, const std::string& msg) { inline void throw_on_error(curandStatus_t stat, const std::string& msg) {
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
...@@ -398,11 +407,15 @@ inline bool is_error(cudnnStatus_t stat) { ...@@ -398,11 +407,15 @@ inline bool is_error(cudnnStatus_t stat) {
return stat != CUDNN_STATUS_SUCCESS; return stat != CUDNN_STATUS_SUCCESS;
} }
inline std::string build_ex_string(cudnnStatus_t stat, const std::string& msg) {
return msg + "\n [" + platform::dynload::cudnnGetErrorString(stat) + "]";
}
inline void throw_on_error(cudnnStatus_t stat, const std::string& msg) { inline void throw_on_error(cudnnStatus_t stat, const std::string& msg) {
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) + msg); throw std::runtime_error(msg);
#else #else
LOG(FATAL) << platform::dynload::cudnnGetErrorString(stat) << msg; LOG(FATAL) << msg;
#endif #endif
} }
...@@ -410,31 +423,36 @@ inline bool is_error(cublasStatus_t stat) { ...@@ -410,31 +423,36 @@ inline bool is_error(cublasStatus_t stat) {
return stat != CUBLAS_STATUS_SUCCESS; return stat != CUBLAS_STATUS_SUCCESS;
} }
inline void throw_on_error(cublasStatus_t stat, const std::string& msg) { inline std::string build_ex_string(cublasStatus_t stat,
const std::string& msg) {
std::string err; std::string err;
if (stat == CUBLAS_STATUS_NOT_INITIALIZED) { if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
err = "CUBLAS: not initialized, "; err = "CUBLAS: not initialized.";
} else if (stat == CUBLAS_STATUS_ALLOC_FAILED) { } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
err = "CUBLAS: alloc failed, "; err = "CUBLAS: alloc failed.";
} else if (stat == CUBLAS_STATUS_INVALID_VALUE) { } else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
err = "CUBLAS: invalid value, "; err = "CUBLAS: invalid value.";
} else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) { } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
err = "CUBLAS: arch mismatch, "; err = "CUBLAS: arch mismatch.";
} else if (stat == CUBLAS_STATUS_MAPPING_ERROR) { } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
err = "CUBLAS: mapping error, "; err = "CUBLAS: mapping error.";
} else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) { } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
err = "CUBLAS: execution failed, "; err = "CUBLAS: execution failed.";
} else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) { } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
err = "CUBLAS: internal error, "; err = "CUBLAS: internal error.";
} else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) { } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
err = "CUBLAS: not supported, "; err = "CUBLAS: not supported, ";
} else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
err = "CUBLAS: license error, "; err = "CUBLAS: license error.";
} }
return msg + "\n [" + err + "]";
}
inline void throw_on_error(cublasStatus_t stat, const std::string& msg) {
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
throw std::runtime_error(err + msg); throw std::runtime_error(msg);
#else #else
LOG(FATAL) << err << msg; LOG(FATAL) << msg;
#endif #endif
} }
...@@ -443,11 +461,17 @@ inline bool is_error(ncclResult_t nccl_result) { ...@@ -443,11 +461,17 @@ inline bool is_error(ncclResult_t nccl_result) {
return nccl_result != ncclSuccess; return nccl_result != ncclSuccess;
} }
inline void throw_on_error(ncclResult_t stat, const std::string& msg) { inline std::string build_ex_string(ncclResult_t nccl_result,
const std::string& msg) {
return msg + "\n [" + platform::dynload::ncclGetErrorString(nccl_result) +
"]";
}
inline void throw_on_error(ncclResult_t nccl_result, const std::string& msg) {
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + msg); throw std::runtime_error(msg);
#else #else
LOG(FATAL) << platform::dynload::ncclGetErrorString(stat) << msg; LOG(FATAL) << msg;
#endif #endif
} }
#endif // __APPLE__ and windows #endif // __APPLE__ and windows
...@@ -480,22 +504,25 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); ...@@ -480,22 +504,25 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess);
#endif // PADDLE_WITH_CUDA #endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#define PADDLE_ENFORCE_CUDA_SUCCESS(COND, ...) \ #define PADDLE_ENFORCE_CUDA_SUCCESS(COND, ...) \
do { \ do { \
auto __cond__ = (COND); \ auto __cond__ = (COND); \
using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ using __CUDA_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \ constexpr auto __success_type__ = \
::paddle::platform::details::CudaStatusType< \ ::paddle::platform::details::CudaStatusType< \
__CUDA_STATUS_TYPE__>::kSuccess; \ __CUDA_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \ if (UNLIKELY(__cond__ != __success_type__)) { \
try { \ try { \
::paddle::platform::throw_on_error( \ ::paddle::platform::throw_on_error( \
__cond__, ::paddle::string::Sprintf(__VA_ARGS__)); \ __cond__, \
} catch (...) { \ ::paddle::platform::build_ex_string( \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \ __cond__, \
__FILE__, __LINE__); \ ::paddle::platform::ErrorSummary(__VA_ARGS__).ToString())); \
} \ } catch (...) { \
} \ throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
} \
} \
} while (0) } while (0)
#undef DEFINE_CUDA_STATUS_TYPE #undef DEFINE_CUDA_STATUS_TYPE
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册