From a1909affc6735164fd49d84fbfa85d9554238b11 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Wed, 9 Dec 2020 14:30:11 +0800 Subject: [PATCH] Fix Unit Test: Add Sleep Time for CUDA Retry (#29442) Add Sleep Time for CUDA Retry, which is similar to our GPU retry logic. This is a try to avoid init GPU allocation random failure in unit test. --- paddle/fluid/platform/enforce.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 3e25d6897cd..944fd75b2a2 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -19,12 +19,13 @@ limitations under the License. */ #endif // __GNUC__ #if !defined(_WIN32) -#include // dladdr -#else // _WIN32 +#include // dladdr +#include // sleep +#else // _WIN32 #ifndef NOMINMAX #define NOMINMAX // msvc max/min macro conflict with std::min/max #endif -#include // GetModuleFileName +#include // GetModuleFileName, Sleep #endif #ifdef PADDLE_WITH_CUDA @@ -80,6 +81,9 @@ class ErrorSummary; } // namespace platform } // namespace paddle +#ifdef PADDLE_WITH_CUDA +DECLARE_int64(gpu_allocator_retry_time); +#endif DECLARE_int32(call_stack_level); namespace paddle { @@ -924,6 +928,14 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); } \ } while (0) +inline void retry_sleep(unsigned millisecond) { +#ifdef _WIN32 + Sleep(millisecond); +#else + sleep(millisecond); +#endif +} + #define PADDLE_RETRY_CUDA_SUCCESS(COND) \ do { \ auto __cond__ = (COND); \ @@ -933,6 +945,7 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); ::paddle::platform::details::CudaStatusType< \ __CUDA_STATUS_TYPE__>::kSuccess; \ while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ + retry_sleep(FLAGS_gpu_allocator_retry_time); \ __cond__ = (COND); \ ++retry_count; \ } \ -- GitLab