提交 b8f5922d 编写于 作者: Y Yi Wang

Make CPUAllocator and GPUAllocator subclasses of SystemAllocator

上级 79373dab
if(${WITH_GPU})
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
nv_library(system_allocator SRCS system_allocator.cc DEPS gflags)
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
else(${WITH_GPU})
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
cc_library(system_allocator SRCS system_allocator.cc DEPS gflags)
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
endif(${WITH_GPU})
......@@ -14,76 +14,38 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <sys/mman.h> // for mlock and munlock
#include <cstdlib> // for malloc and free
#include <gflags/gflags.h>
#include "paddle/platform/assert.h"
#include "paddle/platform/cuda.h"
DEFINE_bool(uses_pinned_memory, false,
"If set, allocate cpu/gpu pinned memory.");
#include <stddef.h> // for size_t
namespace paddle {
namespace memory {
namespace detail {
// If uses_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we
// should set false to uses_pinned_memory.
class CPUAllocator {
// SystemAllocator is the parent class of CPUAllocator and
// GPUAllocator. A BuddyAllocator object uses a SystemAllocator*
// pointing to the underlying system allocator. An alternative to
// this class hierarchy is to pass a system allocator class to
// BuddyAllocator as a template parameter. This approach makes
// BuddyAllocator a class template, and it's very complicated
// algorithm would make the buddy_allocator.h messy.
class SystemAllocator {
public:
static void* Alloc(size_t size) {
void* p = std::malloc(size);
if (p != nullptr && FLAGS_uses_pinned_memory) {
mlock(p, size);
}
return p;
}
static void Free(void* p, size_t size) {
if (p != nullptr && FLAGS_uses_pinned_memory) {
munlock(p, size);
}
std::free(p);
}
virtual ~SystemAllocator() {}
virtual void* Alloc(size_t size) = 0;
virtual void Free(void* p, size_t size) = 0;
};
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA.
// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
// pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the
// amount of memory available to the system for paging. So, by
// default, we should use GPUAllocator<staging=false>.
class GPUAllocator {
class CPUAllocator : public SystemAllocator {
public:
static void* Alloc(size_t size) {
void* p = 0;
cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size)
: cudaMalloc(&p, size);
if (result != cudaSuccess) {
cudaGetLastError(); // clear error if there is any.
}
return result == cudaSuccess ? p : nullptr;
}
static void Free(void* p, size_t size) {
// Purposefully allow cudaErrorCudartUnloading, because
// that is returned if you ever call cudaFree after the
// driver has already shutdown. This happens only if the
// process is terminating, in which case we don't care if
// cudaFree succeeds.
cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
if (err != cudaErrorCudartUnloading) {
platform::throw_on_error(err, "cudaFree{Host} failed");
}
}
virtual void* Alloc(size_t size);
virtual void Free(void* p, size_t size);
};
#ifndef PADDLE_ONLY_CPU
class GPUAllocator : public SystemAllocator {
public:
virtual void* Alloc(size_t size);
virtual void Free(void* p, size_t size);
};
#endif // PADDLE_ONLY_CPU
} // namespace detail
......
......@@ -17,44 +17,55 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "glog/logging.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
template <typename Allocator>
void TestAllocator(void* p) {
p = Allocator::Alloc(1024);
DECLARE_bool(use_pinned_memory);
int* i = static_cast<int*>(p);
std::shared_ptr<int> ptr(i, [](int* p) { Allocator::Free(p, 1024); });
void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
bool freed = false;
{
void* p = a->Alloc(size);
if (size > 0) {
EXPECT_NE(p, nullptr);
} else {
EXPECT_EQ(p, nullptr);
}
EXPECT_NE(p, nullptr);
int* i = static_cast<int*>(p);
std::shared_ptr<int> ptr(i, [&freed, a, size](void* p) {
freed = true;
a->Free(p, size);
});
}
EXPECT_TRUE(freed);
}
TEST(CPUAllocator, NoLockMem) {
void* p = nullptr;
FLAGS_uses_pinned_memory = false;
TestAllocator<paddle::memory::detail::CPUAllocator>(p);
EXPECT_EQ(p, nullptr);
FLAGS_use_pinned_memory = false;
paddle::memory::detail::CPUAllocator a;
TestAllocator(&a, 2048);
TestAllocator(&a, 0);
}
TEST(CPUAllocator, LockMem) {
void* p = nullptr;
FLAGS_uses_pinned_memory = true;
TestAllocator<paddle::memory::detail::CPUAllocator>(p);
EXPECT_EQ(p, nullptr);
FLAGS_use_pinned_memory = true;
paddle::memory::detail::CPUAllocator a;
TestAllocator(&a, 2048);
TestAllocator(&a, 0);
}
#ifndef PADDLE_ONLY_CPU
TEST(GPUAllocator, NoStaging) {
void* p = nullptr;
FLAGS_uses_pinned_memory = false;
TestAllocator<paddle::memory::detail::GPUAllocator>(p);
EXPECT_EQ(p, nullptr);
FLAGS_use_pinned_memory = false;
paddle::memory::detail::GPUAllocator a;
TestAllocator(&a, 2048);
TestAllocator(&a, 0);
}
TEST(GPUAllocator, Staging) {
void* p = nullptr;
FLAGS_uses_pinned_memory = true;
TestAllocator<paddle::memory::detail::GPUAllocator>(p);
EXPECT_EQ(p, nullptr);
FLAGS_use_pinned_memory = true;
paddle::memory::detail::GPUAllocator a;
TestAllocator(&a, 2048);
TestAllocator(&a, 0);
}
#endif // PADDLE_ONLY_CPU
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册