提交 3b5f3548 编写于 作者: S silingtong123 提交者: liuwei1031

Modify PADDLE_ENFORCE to PADDLE_ENFORCE_CUDA_SUCCESS (#19247)

* add PADDLE_ENFORCE_CUDA_SUCCESS, test=develop (#19211)

* test=develop,Modify PADDLE_ENFORCE to PADDLE_ENFORCE_CUDA_SUCCESS
上级 305bd25b
......@@ -31,23 +31,24 @@ template <>
struct CUBlas<float> {
template <typename... ARGS>
static void GEMM(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasSgemm(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasSgemm(args...));
}
template <typename... ARGS>
static void AXPY(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasSaxpy(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasSaxpy(args...));
}
template <typename... ARGS>
static void GEMV(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasSgemv(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasSgemv(args...));
}
template <typename... ARGS>
static void GEMM_STRIDED_BATCH(ARGS... args) {
#if CUDA_VERSION >= 8000
PADDLE_ENFORCE(platform::dynload::cublasSgemmStridedBatched(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cublasSgemmStridedBatched(args...));
#else
PADDLE_THROW("SgemmStridedBatched is not supported on cuda <= 7.5");
#endif
......@@ -69,7 +70,7 @@ struct CUBlas<float> {
VLOG(5) << "use_tensor_op_math: "
<< (dev_ctx->tensor_core_available() ? "True" : "False");
dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) {
PADDLE_ENFORCE(platform::dynload::cublasSgemmEx(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasSgemmEx(
handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb,
beta, C, Ctype, ldc));
});
......@@ -83,23 +84,24 @@ template <>
struct CUBlas<double> {
template <typename... ARGS>
static void GEMM(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasDgemm(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasDgemm(args...));
}
template <typename... ARGS>
static void AXPY(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasDaxpy(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasDaxpy(args...));
}
template <typename... ARGS>
static void GEMV(ARGS... args) {
PADDLE_ENFORCE(platform::dynload::cublasDgemv(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasDgemv(args...));
}
template <typename... ARGS>
static void GEMM_STRIDED_BATCH(ARGS... args) {
#if CUDA_VERSION >= 8000
PADDLE_ENFORCE(platform::dynload::cublasDgemmStridedBatched(args...));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cublasDgemmStridedBatched(args...));
#else
PADDLE_THROW("DgemmStridedBatched is not supported on cuda <= 7.5");
#endif
......@@ -120,7 +122,7 @@ struct CUBlas<platform::float16> {
const float16 *alpha, const float16 *A, int lda,
const float16 *B, int ldb, const float16 *beta, float16 *C,
int ldc) {
PADDLE_ENFORCE(
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cublasHgemm(handle, transa, transb, m, n, k,
reinterpret_cast<const __half *>(alpha),
reinterpret_cast<const __half *>(A), lda,
......@@ -140,7 +142,7 @@ struct CUBlas<platform::float16> {
long long int strideC, // NOLINT
int batchCount) {
#if CUDA_VERSION >= 8000
PADDLE_ENFORCE(platform::dynload::cublasHgemmStridedBatched(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasHgemmStridedBatched(
handle, transa, transb, m, n, k,
reinterpret_cast<const __half *>(alpha),
reinterpret_cast<const __half *>(A), lda, strideA,
......@@ -174,7 +176,7 @@ struct CUBlas<platform::float16> {
#endif // CUDA_VERSION >= 9000
dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) {
PADDLE_ENFORCE(platform::dynload::cublasGemmEx(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasGemmEx(
handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb,
beta, C, Ctype, ldc, computeType, algo));
});
......@@ -356,7 +358,7 @@ void Blas<platform::CUDADeviceContext>::BatchedGEMM(
<< (use_tensor_op_math ? "True" : "False");
context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) {
PADDLE_ENFORCE(platform::dynload::cublasGemmStridedBatchedEx(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cublasGemmStridedBatchedEx(
handle, cuTransB, cuTransA, N, M, K, &alpha, B, CUDA_R_32F, ldb,
strideB, A, CUDA_R_32F, lda, strideA, &beta, C, CUDA_R_32F, ldc,
strideC, batchCount, CUDA_R_32F, algo));
......
......@@ -236,6 +236,31 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
#endif // __APPLE__ and windows
#endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
namespace details {
template <typename T>
struct CudaStatusType {};
#define DEFINE_CUDA_STATUS_TYPE(type, success_value) \
template <> \
struct CudaStatusType<type> { \
using Type = type; \
static constexpr Type kSuccess = success_value; \
}
DEFINE_CUDA_STATUS_TYPE(cudaError_t, cudaSuccess);
DEFINE_CUDA_STATUS_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS);
DEFINE_CUDA_STATUS_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS);
DEFINE_CUDA_STATUS_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS);
#if !defined(__APPLE__) && !defined(_WIN32)
DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess);
#endif
} // namespace details
#endif
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
......@@ -256,6 +281,28 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
} \
} while (0)
#ifdef PADDLE_WITH_CUDA
#define PADDLE_ENFORCE_CUDA_SUCCESS(COND, ...) \
do { \
auto __cond__ = (COND); \
using __CUDA_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::paddle::platform::details::CudaStatusType< \
__CUDA_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \
try { \
::paddle::platform::throw_on_error( \
__cond__, ::paddle::string::Sprintf(__VA_ARGS__)); \
} catch (...) { \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
} \
} \
} while (0)
#undef DEFINE_CUDA_STATUS_TYPE
#endif
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
......
......@@ -253,3 +253,46 @@ TEST(EOF_EXCEPTION, THROW_EOF) {
}
EXPECT_TRUE(caught_eof);
}
#ifdef PADDLE_WITH_CUDA
template <typename T>
bool CheckCudaStatusSuccess(T value, const std::string& msg = "success") {
PADDLE_ENFORCE_CUDA_SUCCESS(value, msg);
return true;
}
template <typename T>
bool CheckCudaStatusFailure(
T value, const std::string& msg = "self-defined cuda status failed") {
try {
PADDLE_ENFORCE_CUDA_SUCCESS(value, msg);
return false;
} catch (paddle::platform::EnforceNotMet& error) {
std::string ex_msg = error.what();
return ex_msg.find(msg) != std::string::npos;
}
}
TEST(enforce, cuda_success) {
EXPECT_TRUE(CheckCudaStatusSuccess(cudaSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(cudaErrorInvalidValue));
EXPECT_TRUE(CheckCudaStatusFailure(cudaErrorMemoryAllocation));
EXPECT_TRUE(CheckCudaStatusSuccess(CURAND_STATUS_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CURAND_STATUS_VERSION_MISMATCH));
EXPECT_TRUE(CheckCudaStatusFailure(CURAND_STATUS_NOT_INITIALIZED));
EXPECT_TRUE(CheckCudaStatusSuccess(CUDNN_STATUS_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CUDNN_STATUS_NOT_INITIALIZED));
EXPECT_TRUE(CheckCudaStatusFailure(CUDNN_STATUS_ALLOC_FAILED));
EXPECT_TRUE(CheckCudaStatusSuccess(CUBLAS_STATUS_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CUBLAS_STATUS_NOT_INITIALIZED));
EXPECT_TRUE(CheckCudaStatusFailure(CUBLAS_STATUS_INVALID_VALUE));
#if !defined(__APPLE__) && !defined(_WIN32)
EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError));
EXPECT_TRUE(CheckCudaStatusFailure(ncclSystemError));
#endif
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册