未验证 提交 1b0c7d7c 编写于 作者: Y Yu Yang 提交者: GitHub

Simplize system_allocator and fix GPU_INFO (#6653)

上级 c13805e9
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <stdlib.h> // for malloc and free #include <stdlib.h> // for malloc and free
#include <sys/mman.h> // for mlock and munlock #include <sys/mman.h> // for mlock and munlock
#include <algorithm> // for std::max
#include "gflags/gflags.h" #include "gflags/gflags.h"
...@@ -28,7 +29,7 @@ limitations under the License. */ ...@@ -28,7 +29,7 @@ limitations under the License. */
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
DECLARE_double(fraction_of_gpu_memory_to_use);
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
...@@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { ...@@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does. // if size is 0. We just make sure it does.
if (size <= 0) return nullptr; if (size <= 0) return nullptr;
void* p;
size_t available = 0;
size_t capacity = 0;
paddle::platform::GpuMemoryUsage(available, capacity);
// Reserve memory for page tables, etc.
size_t reserving = 0.05 * capacity + paddle::platform::GpuMinChunkSize();
size_t usable = available > reserving ? available - reserving : 0;
// If remaining size no less than expected size, using general
// cudaMalloc to allocate GPU memory.
void* p = 0;
if (size <= usable) {
cudaError_t result = cudaMalloc(&p, size); cudaError_t result = cudaMalloc(&p, size);
if (result == cudaSuccess) { if (result == cudaSuccess) {
index = 0; index = 0;
gpu_alloc_size_ += size; gpu_alloc_size_ += size;
return p; return p;
} } else {
} LOG(WARNING)
<< "Cannot malloc " << size / 1024.0 / 1024.0
// If remaining size less than expected size or cudaMalloc failed, << " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
// cudaMallocHost will be considered as a fallback allocator. "environment variable to a lower value. Current value is "
// << FLAGS_fraction_of_gpu_memory_to_use;
// NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
// of host fallback allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_;
if (size > usable) return nullptr;
cudaError_t result = cudaMallocHost(&p, size);
if (result == cudaSuccess) {
index = 1;
fallback_alloc_size_ += size;
return p;
}
return nullptr; return nullptr;
}
} }
void GPUAllocator::Free(void* p, size_t size, size_t index) { void GPUAllocator::Free(void* p, size_t size, size_t index) {
......
...@@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() { ...@@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() {
size_t available = 0; size_t available = 0;
GpuMemoryUsage(available, total); GpuMemoryUsage(available, total);
VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
// Reserving the rest memory for page tables, etc. << total / 1024 / 1024 << "M";
size_t reserving = 0.05 * total; size_t reserving = static_cast<size_t>(0.05 * total);
// If available less than minimum chunk size, no usable memory exists. // If available less than minimum chunk size, no usable memory exists.
available = available =
std::max(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(), std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
reserving) - total - reserving);
reserving;
// Reserving the rest memory for page tables, etc.
size_t allocating = FLAGS_fraction_of_gpu_memory_to_use * total; size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
(total - reserving));
PADDLE_ENFORCE_LT(allocating, available); PADDLE_ENFORCE_LE(allocating, available);
return allocating; return allocating;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册