未验证 提交 4c4d4ba5 编写于 作者: H Huihuang Zheng 提交者: GitHub

Modify CublasHandleHolder to Fix Random Unittest Failure. test=develop (#29617)

Modify CublasHandleHolder from using PADDLE_ENFORCE_CUDA_SUCCESS to PADDLE_RETRY_CUDA_SUCCESS to fix random unittest failure. We checked that the unittest log showed CUDA allocation error at this file, which may due to GPU not enough. We fixed similar failure in the past, so we applied PADDLE_RETRY_CUDA_SUCCESS here.
上级 6cfa59de
......@@ -78,11 +78,11 @@ namespace platform {
class CublasHandleHolder {
public:
CublasHandleHolder(cudaStream_t stream, cublasMath_t math_type) {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasCreate(&handle_));
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasSetStream(handle_, stream));
PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasCreate(&handle_));
PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasSetStream(handle_, stream));
#if CUDA_VERSION >= 9000
if (math_type == CUBLAS_TENSOR_OP_MATH) {
PADDLE_ENFORCE_CUDA_SUCCESS(
PADDLE_RETRY_CUDA_SUCCESS(
dynload::cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH));
#if CUDA_VERSION >= 11000
} else if (math_type == CUBLAS_TF32_TENSOR_OP_MATH) {
......@@ -94,7 +94,7 @@ class CublasHandleHolder {
}
~CublasHandleHolder() PADDLE_MAY_THROW {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasDestroy(handle_));
PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasDestroy(handle_));
}
template <typename Callback>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册