From 756f4639800712209ca44a3ee0b9a00abe04cbac Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Mon, 26 Apr 2021 10:22:22 +0800
Subject: [PATCH] refine error msg when out of memory (#32527)

---
 paddle/fluid/memory/allocation/cuda_allocator.cc | 6 ++++--
 paddle/fluid/memory/detail/system_allocator.cc   | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc
index c1b12f5c0ec..b1a45afa99d 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@@ -54,6 +54,7 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
   size_t avail, total, actual_avail, actual_total;
   bool is_limited = platform::RecordedCudaMemGetInfo(
       &avail, &total, &actual_avail, &actual_total, place_.device);
+  size_t allocated = total - avail;
 
   std::string err_msg;
   if (is_limited) {
@@ -68,13 +69,14 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
 
   PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
       "\n\nOut of memory error on GPU %d. "
-      "Cannot allocate %s memory on GPU %d, "
+      "Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
       "available memory is only %s.\n\n"
       "Please check whether there is any other process using GPU %d.\n"
       "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
       "2. If no, please decrease the batch size of your model. %s\n\n",
       place_.device, string::HumanReadableSize(size), place_.device,
-      string::HumanReadableSize(avail), place_.device, err_msg));
+      string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
+      place_.device, err_msg));
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc
index c733ba5c68c..0d7065d8bfb 100644
--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -125,6 +125,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
     size_t avail, total, actual_avail, actual_total;
     bool is_limited = platform::RecordedCudaMemGetInfo(
         &avail, &total, &actual_avail, &actual_total, gpu_id_);
+    size_t allocated = total - avail;
 
     std::string err_msg;
     if (is_limited) {
@@ -139,7 +140,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
 
     PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
         "\n\nOut of memory error on GPU %d. "
-        "Cannot allocate %s memory on GPU %d, "
+        "Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
         "available memory is only %s.\n\n"
         "Please check whether there is any other process using GPU %d.\n"
         "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
@@ -150,8 +151,8 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
         "      The command is "
         "`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n",
         gpu_id_, string::HumanReadableSize(size), gpu_id_,
-        string::HumanReadableSize(avail), gpu_id_,
-        FLAGS_fraction_of_gpu_memory_to_use, err_msg));
+        string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
+        gpu_id_, FLAGS_fraction_of_gpu_memory_to_use, err_msg));
   }
 }
 
-- 
GitLab