From b8f5922d88e5f7949eb9a469f761ad49981d677a Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 27 Jun 2017 16:32:24 -0700 Subject: [PATCH] Make CPUAllocator and GPUAllocator subclasses of SystemAllocator --- paddle/memory/detail/CMakeLists.txt | 6 +- paddle/memory/detail/system_allocator.h | 80 +++++-------------- paddle/memory/detail/system_allocator_test.cc | 57 +++++++------ 3 files changed, 59 insertions(+), 84 deletions(-) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index cd5622203ff..72d3749ad78 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1,7 @@ if(${WITH_GPU}) - nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) + nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) + nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) else(${WITH_GPU}) - cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) + cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) + cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) endif(${WITH_GPU}) diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index f411019854e..184b383f7f7 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -14,76 +14,38 @@ limitations under the License. */ #pragma once -#include // for size_t -#include // for mlock and munlock -#include // for malloc and free - -#include -#include "paddle/platform/assert.h" -#include "paddle/platform/cuda.h" - -DEFINE_bool(uses_pinned_memory, false, - "If set, allocate cpu/gpu pinned memory."); +#include // for size_t namespace paddle { namespace memory { namespace detail { -// If uses_pinned_memory is true, CPUAllocator calls mlock, which -// returns pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the amount -// of memory available to the system for paging. So, by default, we -// should set false to uses_pinned_memory. -class CPUAllocator { +// SystemAllocator is the parent class of CPUAllocator and +// GPUAllocator. A BuddyAllocator object uses a SystemAllocator* +// pointing to the underlying system allocator. An alternative to +// this class hierarchy is to pass a system allocator class to +// BuddyAllocator as a template parameter. This approach makes +// BuddyAllocator a class template, and it's very complicated +// algorithm would make the buddy_allocator.h messy. +class SystemAllocator { public: - static void* Alloc(size_t size) { - void* p = std::malloc(size); - if (p != nullptr && FLAGS_uses_pinned_memory) { - mlock(p, size); - } - return p; - } - - static void Free(void* p, size_t size) { - if (p != nullptr && FLAGS_uses_pinned_memory) { - munlock(p, size); - } - std::free(p); - } + virtual ~SystemAllocator() {} + virtual void* Alloc(size_t size) = 0; + virtual void Free(void* p, size_t size) = 0; }; -#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. - -// GPUAllocator calls cudaHostMalloc, which returns -// pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the -// amount of memory available to the system for paging. So, by -// default, we should use GPUAllocator. -class GPUAllocator { +class CPUAllocator : public SystemAllocator { public: - static void* Alloc(size_t size) { - void* p = 0; - cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size) - : cudaMalloc(&p, size); - if (result != cudaSuccess) { - cudaGetLastError(); // clear error if there is any. - } - return result == cudaSuccess ? p : nullptr; - } - - static void Free(void* p, size_t size) { - // Purposefully allow cudaErrorCudartUnloading, because - // that is returned if you ever call cudaFree after the - // driver has already shutdown. This happens only if the - // process is terminating, in which case we don't care if - // cudaFree succeeds. - cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p); - if (err != cudaErrorCudartUnloading) { - platform::throw_on_error(err, "cudaFree{Host} failed"); - } - } + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); }; +#ifndef PADDLE_ONLY_CPU +class GPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); +}; #endif // PADDLE_ONLY_CPU } // namespace detail diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index 829d3558ba4..c461d8ac626 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -17,44 +17,55 @@ limitations under the License. */ #include #include -#include "glog/logging.h" +#include "gflags/gflags.h" #include "gtest/gtest.h" -template -void TestAllocator(void* p) { - p = Allocator::Alloc(1024); +DECLARE_bool(use_pinned_memory); - int* i = static_cast(p); - std::shared_ptr ptr(i, [](int* p) { Allocator::Free(p, 1024); }); +void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { + bool freed = false; + { + void* p = a->Alloc(size); + if (size > 0) { + EXPECT_NE(p, nullptr); + } else { + EXPECT_EQ(p, nullptr); + } - EXPECT_NE(p, nullptr); + int* i = static_cast(p); + std::shared_ptr ptr(i, [&freed, a, size](void* p) { + freed = true; + a->Free(p, size); + }); + } + EXPECT_TRUE(freed); } TEST(CPUAllocator, NoLockMem) { - void* p = nullptr; - FLAGS_uses_pinned_memory = false; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = false; + paddle::memory::detail::CPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } TEST(CPUAllocator, LockMem) { - void* p = nullptr; - FLAGS_uses_pinned_memory = true; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = true; + paddle::memory::detail::CPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #ifndef PADDLE_ONLY_CPU TEST(GPUAllocator, NoStaging) { - void* p = nullptr; - FLAGS_uses_pinned_memory = false; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = false; + paddle::memory::detail::GPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } TEST(GPUAllocator, Staging) { - void* p = nullptr; - FLAGS_uses_pinned_memory = true; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = true; + paddle::memory::detail::GPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #endif // PADDLE_ONLY_CPU -- GitLab