From ce938ae5f9baea2b2d136154ee9a696b394929e1 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 26 Jun 2017 23:32:46 +0800 Subject: [PATCH] FIX: Pinned memory --- paddle/memory/README.md | 1 + paddle/memory/detail/CMakeLists.txt | 6 +--- paddle/memory/detail/cpu_allocator.h | 39 ++++++++++++---------- paddle/memory/detail/cpu_allocator_test.cc | 16 +++------ 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index e5f7880e4..96a331a48 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -97,6 +97,7 @@ class BuddyAllocator { struct Block { size_t size; Block* left, right; + size_t index; // allocator id }; ... }; diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index c425e9f94..fb8a11062 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1 @@ -if(${WITH_GPU}) - nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but -else(${WITH_GPU}) - cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't. -endif(${WITH_GPU}) +cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index 0d8ea3f52..a487fecef 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -14,20 +14,19 @@ limitations under the License. */ #pragma once -#include // for malloc and free #include // for size_t +#include // for malloc and free -#ifdef PADDLE_WITH_GPU -#include -#include -#endif // PADDLE_WITH_GPU +#ifndef _WIN32 +#include // for mlock and munlock +#endif namespace paddle { namespace memory { namespace detail { -// CPUAllocator calls cudaMallocHost, which returns -// pinned and mlocked memory as staging areas for data exchange +// CPUAllocator calls mlock, which returns +// pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the // amount of memory available to the system for paging. So, by // default, we should use CPUAllocator. @@ -35,33 +34,37 @@ template class CPUAllocator { public: void* Alloc(size_t size); - void Free(void* p); + void Free(void* p, size_t size); }; template <> class CPUAllocator { public: - void* Alloc(size_t size) { return malloc(size); } - void Free(void* p) { free(p); } + void* Alloc(size_t size) { return std::malloc(size); } + void Free(void* p, size_t size) { std::free(p); } }; -// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the -// following specialization that depends on the CUDA library. -#ifdef PADDLE_WITH_GPU template <> class CPUAllocator { public: void* Alloc(size_t size) { - void* p; - if (cudaMallocHost(&p, size) != cudaSuccess) { - return NULL; + void* p = std::malloc(size); + if (p == nullptr) { + return p; } +#ifndef _WIN32 + mlock(p, size); +#endif return p; } - void Free(void* p) { cudaFreeHost(p); } + void Free(void* p, size_t size) { +#ifndef _WIN32 + munlock(p, size); +#endif + std::free(p); + } }; -#endif // PADDLE_WITH_GPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc index 464bc84e5..4e45266cd 100644 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) { paddle::memory::detail::CPUAllocator a; void* p = a.Alloc(4096); EXPECT_NE(p, nullptr); - a.Free(p); + a.Free(p, 4096); } -#ifdef PADDLE_WITH_GPU TEST(CPUAllocator, Staging) { paddle::memory::detail::CPUAllocator a; - - int devices; - if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p); - } else { - EXPECT_EQ(a.Alloc(4096), nullptr); - } + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); } -#endif // PADDLE_WITH_GPU -- GitLab