diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 3e25d6897cd9c4fd6f8a9a858041ad1414a12e68..944fd75b2a219163da58c5f6781c698376099a79 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -19,12 +19,13 @@ limitations under the License. */ #endif // __GNUC__ #if !defined(_WIN32) -#include // dladdr -#else // _WIN32 +#include // dladdr +#include // sleep +#else // _WIN32 #ifndef NOMINMAX #define NOMINMAX // msvc max/min macro conflict with std::min/max #endif -#include // GetModuleFileName +#include // GetModuleFileName, Sleep #endif #ifdef PADDLE_WITH_CUDA @@ -80,6 +81,9 @@ class ErrorSummary; } // namespace platform } // namespace paddle +#ifdef PADDLE_WITH_CUDA +DECLARE_int64(gpu_allocator_retry_time); +#endif DECLARE_int32(call_stack_level); namespace paddle { @@ -924,6 +928,14 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); } \ } while (0) +inline void retry_sleep(unsigned millisecond) { +#ifdef _WIN32 + Sleep(millisecond); +#else + sleep(millisecond); +#endif +} + #define PADDLE_RETRY_CUDA_SUCCESS(COND) \ do { \ auto __cond__ = (COND); \ @@ -933,6 +945,7 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); ::paddle::platform::details::CudaStatusType< \ __CUDA_STATUS_TYPE__>::kSuccess; \ while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ + retry_sleep(FLAGS_gpu_allocator_retry_time); \ __cond__ = (COND); \ ++retry_count; \ } \