From 4c4d4ba5e064e10aa30e41e72677d3fba76f85c8 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Thu, 17 Dec 2020 17:23:42 +0800 Subject: [PATCH] Modify CublasHandleHolder to Fix Random Unittest Failure. test=develop (#29617) Modify CublasHandleHolder from using PADDLE_ENFORCE_CUDA_SUCCESS to PADDLE_RETRY_CUDA_SUCCESS to fix random unittest failure. We checked that the unittest log showed CUDA allocation error at this file, which may due to GPU not enough. We fixed similar failure in the past, so we applied PADDLE_RETRY_CUDA_SUCCESS here. --- paddle/fluid/platform/cuda_helper.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index d6da830c9c..2a1f0b9ac5 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -78,11 +78,11 @@ namespace platform { class CublasHandleHolder { public: CublasHandleHolder(cudaStream_t stream, cublasMath_t math_type) { - PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasCreate(&handle_)); - PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasSetStream(handle_, stream)); + PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasCreate(&handle_)); + PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasSetStream(handle_, stream)); #if CUDA_VERSION >= 9000 if (math_type == CUBLAS_TENSOR_OP_MATH) { - PADDLE_ENFORCE_CUDA_SUCCESS( + PADDLE_RETRY_CUDA_SUCCESS( dynload::cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH)); #if CUDA_VERSION >= 11000 } else if (math_type == CUBLAS_TF32_TENSOR_OP_MATH) { @@ -94,7 +94,7 @@ class CublasHandleHolder { } ~CublasHandleHolder() PADDLE_MAY_THROW { - PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasDestroy(handle_)); + PADDLE_RETRY_CUDA_SUCCESS(dynload::cublasDestroy(handle_)); } template -- GitLab