未验证 提交 756f4639 编写于 作者: L Leo Chen 提交者: GitHub

refine error msg when out of memory (#32527)

上级 6c03ea5a
...@@ -54,6 +54,7 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) { ...@@ -54,6 +54,7 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
size_t avail, total, actual_avail, actual_total; size_t avail, total, actual_avail, actual_total;
bool is_limited = platform::RecordedCudaMemGetInfo( bool is_limited = platform::RecordedCudaMemGetInfo(
&avail, &total, &actual_avail, &actual_total, place_.device); &avail, &total, &actual_avail, &actual_total, place_.device);
size_t allocated = total - avail;
std::string err_msg; std::string err_msg;
if (is_limited) { if (is_limited) {
...@@ -68,13 +69,14 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) { ...@@ -68,13 +69,14 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"\n\nOut of memory error on GPU %d. " "\n\nOut of memory error on GPU %d. "
"Cannot allocate %s memory on GPU %d, " "Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
"available memory is only %s.\n\n" "available memory is only %s.\n\n"
"Please check whether there is any other process using GPU %d.\n" "Please check whether there is any other process using GPU %d.\n"
"1. If yes, please stop them, or start PaddlePaddle on another GPU.\n" "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
"2. If no, please decrease the batch size of your model. %s\n\n", "2. If no, please decrease the batch size of your model. %s\n\n",
place_.device, string::HumanReadableSize(size), place_.device, place_.device, string::HumanReadableSize(size), place_.device,
string::HumanReadableSize(avail), place_.device, err_msg)); string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
place_.device, err_msg));
} }
} // namespace allocation } // namespace allocation
......
...@@ -125,6 +125,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -125,6 +125,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
size_t avail, total, actual_avail, actual_total; size_t avail, total, actual_avail, actual_total;
bool is_limited = platform::RecordedCudaMemGetInfo( bool is_limited = platform::RecordedCudaMemGetInfo(
&avail, &total, &actual_avail, &actual_total, gpu_id_); &avail, &total, &actual_avail, &actual_total, gpu_id_);
size_t allocated = total - avail;
std::string err_msg; std::string err_msg;
if (is_limited) { if (is_limited) {
...@@ -139,7 +140,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -139,7 +140,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"\n\nOut of memory error on GPU %d. " "\n\nOut of memory error on GPU %d. "
"Cannot allocate %s memory on GPU %d, " "Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
"available memory is only %s.\n\n" "available memory is only %s.\n\n"
"Please check whether there is any other process using GPU %d.\n" "Please check whether there is any other process using GPU %d.\n"
"1. If yes, please stop them, or start PaddlePaddle on another GPU.\n" "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
...@@ -150,8 +151,8 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -150,8 +151,8 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
" The command is " " The command is "
"`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n", "`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n",
gpu_id_, string::HumanReadableSize(size), gpu_id_, gpu_id_, string::HumanReadableSize(size), gpu_id_,
string::HumanReadableSize(avail), gpu_id_, string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
FLAGS_fraction_of_gpu_memory_to_use, err_msg)); gpu_id_, FLAGS_fraction_of_gpu_memory_to_use, err_msg));
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册