From 404c16763ada64389b067193267b75004d792321 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Thu, 7 Jan 2021 14:24:04 +0800 Subject: [PATCH] Add detailed error message for curandStatus_t, cublasStatus_t, cusolverStatus_t (#30161) --- paddle/fluid/platform/enforce.h | 93 +++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 9ece502281..421f11dd0b 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -755,31 +755,37 @@ inline bool is_error(curandStatus_t stat) { inline const char* curandGetErrorString(curandStatus_t stat) { switch (stat) { case CURAND_STATUS_SUCCESS: - return "CURAND_STATUS_SUCCESS"; + return "`CURAND_STATUS_SUCCESS`. No errors."; case CURAND_STATUS_VERSION_MISMATCH: - return "CURAND_STATUS_VERSION_MISMATCH"; + return "`CURAND_STATUS_VERSION_MISMATCH`. Header file and linked library " + "version do not match."; case CURAND_STATUS_NOT_INITIALIZED: - return "CURAND_STATUS_NOT_INITIALIZED"; + return "`CURAND_STATUS_NOT_INITIALIZED`. Generator not initialized."; case CURAND_STATUS_ALLOCATION_FAILED: - return "CURAND_STATUS_ALLOCATION_FAILED"; + return "`CURAND_STATUS_ALLOCATION_FAILED`. Memory allocation failed."; case CURAND_STATUS_TYPE_ERROR: - return "CURAND_STATUS_TYPE_ERROR"; + return "`CURAND_STATUS_TYPE_ERROR`. Generator is wrong type."; case CURAND_STATUS_OUT_OF_RANGE: - return "CURAND_STATUS_OUT_OF_RANGE"; + return "`CURAND_STATUS_OUT_OF_RANGE`. Argument out of range."; case CURAND_STATUS_LENGTH_NOT_MULTIPLE: - return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; + return "`CURAND_STATUS_LENGTH_NOT_MULTIPLE`. Length requested is not a " + "multple of dimension."; case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: - return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; + return "`CURAND_STATUS_DOUBLE_PRECISION_REQUIRED`. GPU does not have " + "double precision required by MRG32k3a."; case CURAND_STATUS_LAUNCH_FAILURE: - return "CURAND_STATUS_LAUNCH_FAILURE"; + return "`CURAND_STATUS_LAUNCH_FAILURE`. Kernel launch failure."; case CURAND_STATUS_PREEXISTING_FAILURE: - return "CURAND_STATUS_PREEXISTING_FAILURE"; + return "`CURAND_STATUS_PREEXISTING_FAILURE`. Preexisting failure on " + "library entry."; case CURAND_STATUS_INITIALIZATION_FAILED: - return "CURAND_STATUS_INITIALIZATION_FAILED"; + return "`CURAND_STATUS_INITIALIZATION_FAILED`. Initialization of CUDA " + "failed."; case CURAND_STATUS_ARCH_MISMATCH: - return "CURAND_STATUS_ARCH_MISMATCH"; + return "`CURAND_STATUS_ARCH_MISMATCH`. Architecture mismatch, GPU does " + "not support requested feature."; case CURAND_STATUS_INTERNAL_ERROR: - return "CURAND_STATUS_INTERNAL_ERROR"; + return "`CURAND_STATUS_INTERNAL_ERROR`. Internal library error."; default: return "Unknown curand status"; } @@ -808,23 +814,37 @@ inline bool is_error(cublasStatus_t stat) { inline const char* cublasGetErrorString(cublasStatus_t stat) { switch (stat) { case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; + return "`CUBLAS_STATUS_NOT_INITIALIZED`. The cuBLAS library was not " + "initialized."; case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; + return "`CUBLAS_STATUS_ALLOC_FAILED`. Resource allocation failed inside " + "the cuBLAS library."; case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; + return "`CUBLAS_STATUS_INVALID_VALUE`. An unsupported value or parameter " + "was passed to the function (a negative vector size, for " + "example)."; case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; + return "`CUBLAS_STATUS_ARCH_MISMATCH`. The function requires a feature " + "absent from the device architecture; usually caused by the lack " + "of support for double precision."; case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; + return "`CUBLAS_STATUS_MAPPING_ERROR`. An access to GPU memory space " + "failed, which is usually caused by a failure to bind a texture."; case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; + return "`CUBLAS_STATUS_EXECUTION_FAILED`. The GPU program failed to " + "execute. This is often caused by a launch failure of the kernel " + "on the GPU, which can be caused by multiple reasons."; case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; + return "`CUBLAS_STATUS_INTERNAL_ERROR`. An internal cuBLAS operation " + "failed. This error is usually caused by a cudaMemcpyAsync() " + "failure."; case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; + return "`CUBLAS_STATUS_NOT_SUPPORTED`. The functionality requested is " + "not supported."; case CUBLAS_STATUS_LICENSE_ERROR: - return "CUBLAS_STATUS_LICENSE_ERROR"; + return "`CUBLAS_STATUS_LICENSE_ERROR`. The functionality requested " + "requires some license and an error was detected when trying to " + "check the current licensing."; default: return "Unknown cublas status"; } @@ -843,19 +863,34 @@ inline bool is_error(cusolverStatus_t stat) { inline const char* cusolverGetErrorString(cusolverStatus_t stat) { switch (stat) { case CUSOLVER_STATUS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_NOT_INITIALIZED"; + return "`CUSOLVER_STATUS_NOT_INITIALIZED`. The cuSolver library was not " + "initialized. This is usually caused by the lack of a prior call, " + "an error in the CUDA Runtime API called by the cuSolver routine, " + "or an error in the hardware setup."; case CUSOLVER_STATUS_ALLOC_FAILED: - return "CUSOLVER_STATUS_ALLOC_FAILED"; + return "`CUSOLVER_STATUS_ALLOC_FAILED`. Resource allocation failed " + "inside the cuSolver library. This is usually caused by a " + "cudaMalloc() failure."; case CUSOLVER_STATUS_INVALID_VALUE: - return "CUSOLVER_STATUS_INVALID_VALUE"; + return "`CUSOLVER_STATUS_INVALID_VALUE`. An unsupported value or " + "parameter was passed to the function (a negative vector size, " + "for example)."; case CUSOLVER_STATUS_ARCH_MISMATCH: - return "CUSOLVER_STATUS_ARCH_MISMATCH"; + return "`CUSOLVER_STATUS_ARCH_MISMATCH`. The function requires a feature " + "absent from the device architecture; usually caused by the lack " + "of support for atomic operations or double precision."; case CUSOLVER_STATUS_EXECUTION_FAILED: - return "CUSOLVER_STATUS_EXECUTION_FAILED"; + return "`CUSOLVER_STATUS_EXECUTION_FAILED`. The GPU program failed to " + "execute. This is often caused by a launch failure of the kernel " + "on the GPU, which can be caused by multiple reasons."; case CUSOLVER_STATUS_INTERNAL_ERROR: - return "CUSOLVER_STATUS_INTERNAL_ERROR"; + return "`CUSOLVER_STATUS_INTERNAL_ERROR`. An internal cuSolver operation " + "failed. This error is usually caused by a cudaMemcpyAsync() " + "failure."; case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + return "`CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED`. The matrix type is " + "not supported by this function. This is usually caused by " + "passing an invalid matrix descriptor to the function."; default: return "Unknown cusolver status"; } -- GitLab