提交 b8f5922d 编写于 作者: Y Yi Wang

Make CPUAllocator and GPUAllocator subclasses of SystemAllocator

上级 79373dab
if(${WITH_GPU}) if(${WITH_GPU})
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) nv_library(system_allocator SRCS system_allocator.cc DEPS gflags)
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
else(${WITH_GPU}) else(${WITH_GPU})
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags)
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
endif(${WITH_GPU}) endif(${WITH_GPU})
...@@ -14,76 +14,38 @@ limitations under the License. */ ...@@ -14,76 +14,38 @@ limitations under the License. */
#pragma once #pragma once
#include <stddef.h> // for size_t #include <stddef.h> // for size_t
#include <sys/mman.h> // for mlock and munlock
#include <cstdlib> // for malloc and free
#include <gflags/gflags.h>
#include "paddle/platform/assert.h"
#include "paddle/platform/cuda.h"
DEFINE_bool(uses_pinned_memory, false,
"If set, allocate cpu/gpu pinned memory.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
// If uses_pinned_memory is true, CPUAllocator calls mlock, which // SystemAllocator is the parent class of CPUAllocator and
// returns pinned and locked memory as staging areas for data exchange // GPUAllocator. A BuddyAllocator object uses a SystemAllocator*
// between host and device. Allocates too much would reduce the amount // pointing to the underlying system allocator. An alternative to
// of memory available to the system for paging. So, by default, we // this class hierarchy is to pass a system allocator class to
// should set false to uses_pinned_memory. // BuddyAllocator as a template parameter. This approach makes
class CPUAllocator { // BuddyAllocator a class template, and it's very complicated
// algorithm would make the buddy_allocator.h messy.
class SystemAllocator {
public: public:
static void* Alloc(size_t size) { virtual ~SystemAllocator() {}
void* p = std::malloc(size); virtual void* Alloc(size_t size) = 0;
if (p != nullptr && FLAGS_uses_pinned_memory) { virtual void Free(void* p, size_t size) = 0;
mlock(p, size);
}
return p;
}
static void Free(void* p, size_t size) {
if (p != nullptr && FLAGS_uses_pinned_memory) {
munlock(p, size);
}
std::free(p);
}
}; };
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. class CPUAllocator : public SystemAllocator {
// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
// pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the
// amount of memory available to the system for paging. So, by
// default, we should use GPUAllocator<staging=false>.
class GPUAllocator {
public: public:
static void* Alloc(size_t size) { virtual void* Alloc(size_t size);
void* p = 0; virtual void Free(void* p, size_t size);
cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size)
: cudaMalloc(&p, size);
if (result != cudaSuccess) {
cudaGetLastError(); // clear error if there is any.
}
return result == cudaSuccess ? p : nullptr;
}
static void Free(void* p, size_t size) {
// Purposefully allow cudaErrorCudartUnloading, because
// that is returned if you ever call cudaFree after the
// driver has already shutdown. This happens only if the
// process is terminating, in which case we don't care if
// cudaFree succeeds.
cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
if (err != cudaErrorCudartUnloading) {
platform::throw_on_error(err, "cudaFree{Host} failed");
}
}
}; };
#ifndef PADDLE_ONLY_CPU
class GPUAllocator : public SystemAllocator {
public:
virtual void* Alloc(size_t size);
virtual void Free(void* p, size_t size);
};
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
} // namespace detail } // namespace detail
......
...@@ -17,44 +17,55 @@ limitations under the License. */ ...@@ -17,44 +17,55 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "glog/logging.h" #include "gflags/gflags.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
template <typename Allocator> DECLARE_bool(use_pinned_memory);
void TestAllocator(void* p) {
p = Allocator::Alloc(1024);
int* i = static_cast<int*>(p); void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
std::shared_ptr<int> ptr(i, [](int* p) { Allocator::Free(p, 1024); }); bool freed = false;
{
void* p = a->Alloc(size);
if (size > 0) {
EXPECT_NE(p, nullptr);
} else {
EXPECT_EQ(p, nullptr);
}
EXPECT_NE(p, nullptr); int* i = static_cast<int*>(p);
std::shared_ptr<int> ptr(i, [&freed, a, size](void* p) {
freed = true;
a->Free(p, size);
});
}
EXPECT_TRUE(freed);
} }
TEST(CPUAllocator, NoLockMem) { TEST(CPUAllocator, NoLockMem) {
void* p = nullptr; FLAGS_use_pinned_memory = false;
FLAGS_uses_pinned_memory = false; paddle::memory::detail::CPUAllocator a;
TestAllocator<paddle::memory::detail::CPUAllocator>(p); TestAllocator(&a, 2048);
EXPECT_EQ(p, nullptr); TestAllocator(&a, 0);
} }
TEST(CPUAllocator, LockMem) { TEST(CPUAllocator, LockMem) {
void* p = nullptr; FLAGS_use_pinned_memory = true;
FLAGS_uses_pinned_memory = true; paddle::memory::detail::CPUAllocator a;
TestAllocator<paddle::memory::detail::CPUAllocator>(p); TestAllocator(&a, 2048);
EXPECT_EQ(p, nullptr); TestAllocator(&a, 0);
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(GPUAllocator, NoStaging) { TEST(GPUAllocator, NoStaging) {
void* p = nullptr; FLAGS_use_pinned_memory = false;
FLAGS_uses_pinned_memory = false; paddle::memory::detail::GPUAllocator a;
TestAllocator<paddle::memory::detail::GPUAllocator>(p); TestAllocator(&a, 2048);
EXPECT_EQ(p, nullptr); TestAllocator(&a, 0);
} }
TEST(GPUAllocator, Staging) { TEST(GPUAllocator, Staging) {
void* p = nullptr; FLAGS_use_pinned_memory = true;
FLAGS_uses_pinned_memory = true; paddle::memory::detail::GPUAllocator a;
TestAllocator<paddle::memory::detail::GPUAllocator>(p); TestAllocator(&a, 2048);
EXPECT_EQ(p, nullptr); TestAllocator(&a, 0);
} }
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册