From 756f4639800712209ca44a3ee0b9a00abe04cbac Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 26 Apr 2021 10:22:22 +0800 Subject: [PATCH] refine error msg when out of memory (#32527) --- paddle/fluid/memory/allocation/cuda_allocator.cc | 6 ++++-- paddle/fluid/memory/detail/system_allocator.cc | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index c1b12f5c0ec..b1a45afa99d 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -54,6 +54,7 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) { size_t avail, total, actual_avail, actual_total; bool is_limited = platform::RecordedCudaMemGetInfo( &avail, &total, &actual_avail, &actual_total, place_.device); + size_t allocated = total - avail; std::string err_msg; if (is_limited) { @@ -68,13 +69,14 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) { PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( "\n\nOut of memory error on GPU %d. " - "Cannot allocate %s memory on GPU %d, " + "Cannot allocate %s memory on GPU %d, %s memory has been allocated and " "available memory is only %s.\n\n" "Please check whether there is any other process using GPU %d.\n" "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n" "2. If no, please decrease the batch size of your model. %s\n\n", place_.device, string::HumanReadableSize(size), place_.device, - string::HumanReadableSize(avail), place_.device, err_msg)); + string::HumanReadableSize(allocated), string::HumanReadableSize(avail), + place_.device, err_msg)); } } // namespace allocation diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index c733ba5c68c..0d7065d8bfb 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -125,6 +125,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { size_t avail, total, actual_avail, actual_total; bool is_limited = platform::RecordedCudaMemGetInfo( &avail, &total, &actual_avail, &actual_total, gpu_id_); + size_t allocated = total - avail; std::string err_msg; if (is_limited) { @@ -139,7 +140,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( "\n\nOut of memory error on GPU %d. " - "Cannot allocate %s memory on GPU %d, " + "Cannot allocate %s memory on GPU %d, %s memory has been allocated and " "available memory is only %s.\n\n" "Please check whether there is any other process using GPU %d.\n" "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n" @@ -150,8 +151,8 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { " The command is " "`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n", gpu_id_, string::HumanReadableSize(size), gpu_id_, - string::HumanReadableSize(avail), gpu_id_, - FLAGS_fraction_of_gpu_memory_to_use, err_msg)); + string::HumanReadableSize(allocated), string::HumanReadableSize(avail), + gpu_id_, FLAGS_fraction_of_gpu_memory_to_use, err_msg)); } } -- GitLab