未验证 提交 756f4639 编写于 作者: L Leo Chen 提交者: GitHub

refine error msg when out of memory (#32527)

上级 6c03ea5a
......@@ -54,6 +54,7 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
size_t avail, total, actual_avail, actual_total;
bool is_limited = platform::RecordedCudaMemGetInfo(
&avail, &total, &actual_avail, &actual_total, place_.device);
size_t allocated = total - avail;
std::string err_msg;
if (is_limited) {
......@@ -68,13 +69,14 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"\n\nOut of memory error on GPU %d. "
"Cannot allocate %s memory on GPU %d, "
"Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
"available memory is only %s.\n\n"
"Please check whether there is any other process using GPU %d.\n"
"1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
"2. If no, please decrease the batch size of your model. %s\n\n",
place_.device, string::HumanReadableSize(size), place_.device,
string::HumanReadableSize(avail), place_.device, err_msg));
string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
place_.device, err_msg));
}
} // namespace allocation
......
......@@ -125,6 +125,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
size_t avail, total, actual_avail, actual_total;
bool is_limited = platform::RecordedCudaMemGetInfo(
&avail, &total, &actual_avail, &actual_total, gpu_id_);
size_t allocated = total - avail;
std::string err_msg;
if (is_limited) {
......@@ -139,7 +140,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"\n\nOut of memory error on GPU %d. "
"Cannot allocate %s memory on GPU %d, "
"Cannot allocate %s memory on GPU %d, %s memory has been allocated and "
"available memory is only %s.\n\n"
"Please check whether there is any other process using GPU %d.\n"
"1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
......@@ -150,8 +151,8 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
" The command is "
"`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n",
gpu_id_, string::HumanReadableSize(size), gpu_id_,
string::HumanReadableSize(avail), gpu_id_,
FLAGS_fraction_of_gpu_memory_to_use, err_msg));
string::HumanReadableSize(allocated), string::HumanReadableSize(avail),
gpu_id_, FLAGS_fraction_of_gpu_memory_to_use, err_msg));
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册