diff --git a/paddle/memory/README.md b/paddle/memory/README.md index e5f7880e4cad346da5399815f5e76b7b9b99bdea..96a331a486f57d3e030408fee182199bad5b38c2 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -97,6 +97,7 @@ class BuddyAllocator { struct Block { size_t size; Block* left, right; + size_t index; // allocator id }; ... }; diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index c425e9f947d07009a474c5dfd05f55c48f290fa0..fb8a11062da91a87723d445acfb592f715951f14 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1 @@ -if(${WITH_GPU}) - nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but -else(${WITH_GPU}) - cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't. -endif(${WITH_GPU}) +cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index 0d8ea3f52b92f24ce4e3fc8f5c2a9fcbba035e8f..a487fecef49b7efe3503411b8a7f48bc4b044ac9 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -14,20 +14,19 @@ limitations under the License. */ #pragma once -#include // for malloc and free #include // for size_t +#include // for malloc and free -#ifdef PADDLE_WITH_GPU -#include -#include -#endif // PADDLE_WITH_GPU +#ifndef _WIN32 +#include // for mlock and munlock +#endif namespace paddle { namespace memory { namespace detail { -// CPUAllocator calls cudaMallocHost, which returns -// pinned and mlocked memory as staging areas for data exchange +// CPUAllocator calls mlock, which returns +// pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the // amount of memory available to the system for paging. So, by // default, we should use CPUAllocator. @@ -35,33 +34,37 @@ template class CPUAllocator { public: void* Alloc(size_t size); - void Free(void* p); + void Free(void* p, size_t size); }; template <> class CPUAllocator { public: - void* Alloc(size_t size) { return malloc(size); } - void Free(void* p) { free(p); } + void* Alloc(size_t size) { return std::malloc(size); } + void Free(void* p, size_t size) { std::free(p); } }; -// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the -// following specialization that depends on the CUDA library. -#ifdef PADDLE_WITH_GPU template <> class CPUAllocator { public: void* Alloc(size_t size) { - void* p; - if (cudaMallocHost(&p, size) != cudaSuccess) { - return NULL; + void* p = std::malloc(size); + if (p == nullptr) { + return p; } +#ifndef _WIN32 + mlock(p, size); +#endif return p; } - void Free(void* p) { cudaFreeHost(p); } + void Free(void* p, size_t size) { +#ifndef _WIN32 + munlock(p, size); +#endif + std::free(p); + } }; -#endif // PADDLE_WITH_GPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc index 464bc84e5c7b5066bff4f3444c686473fc925746..4e45266cd8ad8a2bfd8f2135d259691559bbcbf4 100644 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) { paddle::memory::detail::CPUAllocator a; void* p = a.Alloc(4096); EXPECT_NE(p, nullptr); - a.Free(p); + a.Free(p, 4096); } -#ifdef PADDLE_WITH_GPU TEST(CPUAllocator, Staging) { paddle::memory::detail::CPUAllocator a; - - int devices; - if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p); - } else { - EXPECT_EQ(a.Alloc(4096), nullptr); - } + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); } -#endif // PADDLE_WITH_GPU