提交 ce938ae5 编写于 作者: L liaogang

FIX: Pinned memory

上级 db128c45
......@@ -97,6 +97,7 @@ class BuddyAllocator {
struct Block {
size_t size;
Block* left, right;
size_t index; // allocator id
};
...
};
......
if(${WITH_GPU})
nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but
else(${WITH_GPU})
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't.
endif(${WITH_GPU})
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
......@@ -14,20 +14,19 @@ limitations under the License. */
#pragma once
#include <malloc.h> // for malloc and free
#include <stddef.h> // for size_t
#include <cstdlib> // for malloc and free
#ifdef PADDLE_WITH_GPU
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif // PADDLE_WITH_GPU
#ifndef _WIN32
#include <sys/mman.h> // for mlock and munlock
#endif
namespace paddle {
namespace memory {
namespace detail {
// CPUAllocator<staging=true> calls cudaMallocHost, which returns
// pinned and mlocked memory as staging areas for data exchange
// CPUAllocator<staging=true> calls mlock, which returns
// pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the
// amount of memory available to the system for paging. So, by
// default, we should use CPUAllocator<staging=false>.
......@@ -35,33 +34,37 @@ template <bool staging>
class CPUAllocator {
public:
void* Alloc(size_t size);
void Free(void* p);
void Free(void* p, size_t size);
};
template <>
class CPUAllocator<false> {
public:
void* Alloc(size_t size) { return malloc(size); }
void Free(void* p) { free(p); }
void* Alloc(size_t size) { return std::malloc(size); }
void Free(void* p, size_t size) { std::free(p); }
};
// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the
// following specialization that depends on the CUDA library.
#ifdef PADDLE_WITH_GPU
template <>
class CPUAllocator<true> {
public:
void* Alloc(size_t size) {
void* p;
if (cudaMallocHost(&p, size) != cudaSuccess) {
return NULL;
void* p = std::malloc(size);
if (p == nullptr) {
return p;
}
#ifndef _WIN32
mlock(p, size);
#endif
return p;
}
void Free(void* p) { cudaFreeHost(p); }
void Free(void* p, size_t size) {
#ifndef _WIN32
munlock(p, size);
#endif
std::free(p);
}
};
#endif // PADDLE_WITH_GPU
} // namespace detail
} // namespace memory
......
......@@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) {
paddle::memory::detail::CPUAllocator<false> a;
void* p = a.Alloc(4096);
EXPECT_NE(p, nullptr);
a.Free(p);
a.Free(p, 4096);
}
#ifdef PADDLE_WITH_GPU
TEST(CPUAllocator, Staging) {
paddle::memory::detail::CPUAllocator<true> a;
int devices;
if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) {
void* p = a.Alloc(4096);
EXPECT_NE(p, nullptr);
a.Free(p);
} else {
EXPECT_EQ(a.Alloc(4096), nullptr);
}
a.Free(p, 4096);
}
#endif // PADDLE_WITH_GPU
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册