提交 58a9f9f7 编写于 作者: C chengduoZH

set the max size of cudapinned memory

上级 ab601c19
......@@ -125,11 +125,11 @@ bool GPUAllocator::UseGpu() const { return true; }
void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) {
if (size <= 0) return nullptr;
// NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size
// NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size
// of host pinned allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
size_t usable =
paddle::platform::CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_;
paddle::platform::CUDAPinnedMaxAllocSize() - cuda_pinnd_alloc_size_;
if (size > usable) return nullptr;
......
......@@ -116,8 +116,8 @@ BuddyAllocator* GetCUDAPinnedBuddyAllocator() {
static BuddyAllocator* ba = NULL;
if (ba == NULL) {
ba = new BuddyAllocator(new detail::CUDAPinnedAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
platform::CUDAPinnedMinChunkSize(),
platform::CUDAPinnedMaxChunkSize());
}
return ba;
}
......
......@@ -143,7 +143,7 @@ TEST(BuddyAllocator, GPUMultAlloc) {
size_t align(size_t size, paddle::platform::CUDAPinnedPlace place) {
size += sizeof(paddle::memory::detail::Metadata);
size_t alignment = paddle::platform::CpuMinChunkSize();
size_t alignment = paddle::platform::CUDAPinnedMinChunkSize();
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
......
......@@ -27,6 +27,10 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
DEFINE_double(fraction_of_cuda_pinned_memory_to_use, 0.5,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
namespace paddle {
namespace platform {
......@@ -62,5 +66,22 @@ size_t CpuMaxChunkSize() {
return CpuMaxAllocSize() / 32;
}
size_t CUDAPinnedMaxAllocSize() {
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return FLAGS_fraction_of_cuda_pinned_memory_to_use * CpuTotalPhysicalMemory();
}
size_t CUDAPinnedMinChunkSize() {
// Allow to allocate the minimum chunk size is 64 KB.
return 1 << 16;
}
size_t CUDAPinnedMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 0.39% of CUDA_PINNED
// memory.
return CUDAPinnedMaxAllocSize() / 256;
}
} // namespace platform
} // namespace paddle
......@@ -22,11 +22,20 @@ namespace platform {
//! Get the maximum allocation size for a machine.
size_t CpuMaxAllocSize();
//! Get the maximum allocation size for a machine.
size_t CUDAPinnedMaxAllocSize();
//! Get the minimum chunk size for buddy allocator.
size_t CpuMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t CpuMaxChunkSize();
//! Get the minimum chunk size for buddy allocator.
size_t CUDAPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t CUDAPinnedMaxChunkSize();
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册