提交 ce938ae5 编写于 作者: L liaogang

FIX: Pinned memory

上级 db128c45
...@@ -97,6 +97,7 @@ class BuddyAllocator { ...@@ -97,6 +97,7 @@ class BuddyAllocator {
struct Block { struct Block {
size_t size; size_t size;
Block* left, right; Block* left, right;
size_t index; // allocator id
}; };
... ...
}; };
......
if(${WITH_GPU}) cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but
else(${WITH_GPU})
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't.
endif(${WITH_GPU})
...@@ -14,20 +14,19 @@ limitations under the License. */ ...@@ -14,20 +14,19 @@ limitations under the License. */
#pragma once #pragma once
#include <malloc.h> // for malloc and free
#include <stddef.h> // for size_t #include <stddef.h> // for size_t
#include <cstdlib> // for malloc and free
#ifdef PADDLE_WITH_GPU #ifndef _WIN32
#include <cuda.h> #include <sys/mman.h> // for mlock and munlock
#include <cuda_runtime_api.h> #endif
#endif // PADDLE_WITH_GPU
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
// CPUAllocator<staging=true> calls cudaMallocHost, which returns // CPUAllocator<staging=true> calls mlock, which returns
// pinned and mlocked memory as staging areas for data exchange // pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the // between host and device. Allocates too much would reduce the
// amount of memory available to the system for paging. So, by // amount of memory available to the system for paging. So, by
// default, we should use CPUAllocator<staging=false>. // default, we should use CPUAllocator<staging=false>.
...@@ -35,33 +34,37 @@ template <bool staging> ...@@ -35,33 +34,37 @@ template <bool staging>
class CPUAllocator { class CPUAllocator {
public: public:
void* Alloc(size_t size); void* Alloc(size_t size);
void Free(void* p); void Free(void* p, size_t size);
}; };
template <> template <>
class CPUAllocator<false> { class CPUAllocator<false> {
public: public:
void* Alloc(size_t size) { return malloc(size); } void* Alloc(size_t size) { return std::malloc(size); }
void Free(void* p) { free(p); } void Free(void* p, size_t size) { std::free(p); }
}; };
// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the
// following specialization that depends on the CUDA library.
#ifdef PADDLE_WITH_GPU
template <> template <>
class CPUAllocator<true> { class CPUAllocator<true> {
public: public:
void* Alloc(size_t size) { void* Alloc(size_t size) {
void* p; void* p = std::malloc(size);
if (cudaMallocHost(&p, size) != cudaSuccess) { if (p == nullptr) {
return NULL; return p;
} }
#ifndef _WIN32
mlock(p, size);
#endif
return p; return p;
} }
void Free(void* p) { cudaFreeHost(p); } void Free(void* p, size_t size) {
#ifndef _WIN32
munlock(p, size);
#endif
std::free(p);
}
}; };
#endif // PADDLE_WITH_GPU
} // namespace detail } // namespace detail
} // namespace memory } // namespace memory
......
...@@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) { ...@@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) {
paddle::memory::detail::CPUAllocator<false> a; paddle::memory::detail::CPUAllocator<false> a;
void* p = a.Alloc(4096); void* p = a.Alloc(4096);
EXPECT_NE(p, nullptr); EXPECT_NE(p, nullptr);
a.Free(p); a.Free(p, 4096);
} }
#ifdef PADDLE_WITH_GPU
TEST(CPUAllocator, Staging) { TEST(CPUAllocator, Staging) {
paddle::memory::detail::CPUAllocator<true> a; paddle::memory::detail::CPUAllocator<true> a;
void* p = a.Alloc(4096);
int devices; EXPECT_NE(p, nullptr);
if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { a.Free(p, 4096);
void* p = a.Alloc(4096);
EXPECT_NE(p, nullptr);
a.Free(p);
} else {
EXPECT_EQ(a.Alloc(4096), nullptr);
}
} }
#endif // PADDLE_WITH_GPU
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册