diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 6a815a1b57db1d833781ca224f34e4559af9b9a5..509250debc2b2fd2e87078ab5f233ae2db6fd898 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include // for malloc and free #include // for mlock and munlock +#include // for std::max #include "gflags/gflags.h" @@ -28,7 +29,7 @@ limitations under the License. */ // of memory available to the system for paging. So, by default, we // should set false to use_pinned_memory. DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); - +DECLARE_double(fraction_of_gpu_memory_to_use); namespace paddle { namespace memory { namespace detail { @@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { // CUDA documentation doesn't explain if cudaMalloc returns nullptr // if size is 0. We just make sure it does. if (size <= 0) return nullptr; - - size_t available = 0; - size_t capacity = 0; - paddle::platform::GpuMemoryUsage(available, capacity); - - // Reserve memory for page tables, etc. - size_t reserving = 0.05 * capacity + paddle::platform::GpuMinChunkSize(); - size_t usable = available > reserving ? available - reserving : 0; - - // If remaining size no less than expected size, using general - // cudaMalloc to allocate GPU memory. - void* p = 0; - if (size <= usable) { - cudaError_t result = cudaMalloc(&p, size); - if (result == cudaSuccess) { - index = 0; - gpu_alloc_size_ += size; - return p; - } - } - - // If remaining size less than expected size or cudaMalloc failed, - // cudaMallocHost will be considered as a fallback allocator. - // - // NOTE: here, we use GpuMaxAllocSize() as the maximum memory size - // of host fallback allocation. Allocates too much would reduce - // the amount of memory available to the underlying system for paging. - usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_; - - if (size > usable) return nullptr; - - cudaError_t result = cudaMallocHost(&p, size); + void* p; + cudaError_t result = cudaMalloc(&p, size); if (result == cudaSuccess) { - index = 1; - fallback_alloc_size_ += size; + index = 0; + gpu_alloc_size_ += size; return p; + } else { + LOG(WARNING) + << "Cannot malloc " << size / 1024.0 / 1024.0 + << " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use " + "environment variable to a lower value. Current value is " + << FLAGS_fraction_of_gpu_memory_to_use; + return nullptr; } - - return nullptr; } void GPUAllocator::Free(void* p, size_t size, size_t index) { diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 4fa2eaed31c6e9368459c2da6f8b0667b453d58c..541eca5f39c2e6a4b464aec79fd8a920ab4c7732 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() { size_t available = 0; GpuMemoryUsage(available, total); - - // Reserving the rest memory for page tables, etc. - size_t reserving = 0.05 * total; - + VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/" + << total / 1024 / 1024 << "M"; + size_t reserving = static_cast(0.05 * total); // If available less than minimum chunk size, no usable memory exists. available = - std::max(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(), - reserving) - - reserving; + std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(), + total - reserving); + + // Reserving the rest memory for page tables, etc. - size_t allocating = FLAGS_fraction_of_gpu_memory_to_use * total; + size_t allocating = static_cast(FLAGS_fraction_of_gpu_memory_to_use * + (total - reserving)); - PADDLE_ENFORCE_LT(allocating, available); + PADDLE_ENFORCE_LE(allocating, available); return allocating; }