未验证 提交 f56967c4 编写于 作者: Z Zeng Jinle 提交者: GitHub

refine error message of gpu allocator, test=develop (#21008)

上级 bc8e600c
......@@ -105,6 +105,21 @@ bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_CUDA
static void ClearCUDAOutOfMemoryError(cudaError_t* status) {
if (*status == cudaErrorMemoryAllocation) {
*status = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(*status);
*status = cudaGetLastError();
if (*status == cudaErrorMemoryAllocation) {
*status = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(*status);
}
void* GPUAllocator::Alloc(size_t* index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does.
......@@ -120,19 +135,19 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_ += size;
return p;
} else {
if (result == cudaErrorMemoryAllocation) {
result = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(result);
result = cudaGetLastError();
if (result == cudaErrorMemoryAllocation) {
result = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(result);
size_t avail, total;
platform::GpuMemoryUsage(&avail, &total);
ClearCUDAOutOfMemoryError(&result);
/**
* NOTE(zjl): Sometimes cudaMemGetInfo would raise OOM error
* if there is very little GPU memory left. In this case, we
* should consider the available GPU memory to be 0, and throw
* exception inside this function instead of throwing exception
* inside cudaMemGetInfo.
*/
size_t avail = 0, total = 0;
result = cudaMemGetInfo(&avail, &total);
if (result != cudaSuccess) avail = 0;
ClearCUDAOutOfMemoryError(&result);
PADDLE_THROW_BAD_ALLOC(
"\n\nOut of memory error on GPU %d. "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册