From db128c4586c3c925a6c53a9ae770cb07cdbea1bf Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sun, 25 Jun 2017 17:54:06 -0700 Subject: [PATCH] Pass cpu_allocator_test --- CMakeLists.txt | 2 +- cmake/generic.cmake | 4 ++++ paddle/memory/detail/CMakeLists.txt | 6 +++++- paddle/memory/detail/cpu_allocator.h | 13 +++++++++---- paddle/memory/detail/cpu_allocator_test.cc | 16 +++++++++++----- paddle/memory/memory.cc | 14 ++++++++++++-- paddle/memory/memory.h | 16 +++++++++++++--- 7 files changed, 55 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d7f2c7e..3c719d35e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ if(ANDROID) "Disable RDMA when cross-compiling for Android" FORCE) endif(ANDROID) -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") if (WITH_C_API AND WITH_PYTHON) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 69e8164a0..840155750 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -78,6 +78,10 @@ # # cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +if(WITH_GPU) + add_definitions(-DPADDLE_WITH_GPU) +endif() + if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index fb8a11062..c425e9f94 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1 +1,5 @@ -cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) +if(${WITH_GPU}) + nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but +else(${WITH_GPU}) + cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't. +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index 8a872d380..0d8ea3f52 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -17,6 +17,11 @@ limitations under the License. */ #include // for malloc and free #include // for size_t +#ifdef PADDLE_WITH_GPU +#include +#include +#endif // PADDLE_WITH_GPU + namespace paddle { namespace memory { namespace detail { @@ -40,9 +45,9 @@ public: void Free(void* p) { free(p); } }; -// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the +// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the // following specialization that depends on the CUDA library. -#ifdef WITH_GPU +#ifdef PADDLE_WITH_GPU template <> class CPUAllocator { public: @@ -51,12 +56,12 @@ public: if (cudaMallocHost(&p, size) != cudaSuccess) { return NULL; } - return *p; + return p; } void Free(void* p) { cudaFreeHost(p); } }; -#endif // WITH_GPU +#endif // PADDLE_WITH_GPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc index 0aa33a22f..464bc84e5 100644 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) { a.Free(p); } -#ifdef WITH_GPU +#ifdef PADDLE_WITH_GPU TEST(CPUAllocator, Staging) { paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p); + + int devices; + if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); + } else { + EXPECT_EQ(a.Alloc(4096), nullptr); + } } -#endif // WITH_GPU +#endif // PADDLE_WITH_GPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 5f1253ede..b61792373 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -19,7 +19,11 @@ namespace memory { template <> void* Alloc(CPUPlace, size_t size) { - return GetCPUBuddyAllocator()->Alloc(size); + return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size); +} + +void* AllocStaging(CPUPlace, size_t size) { + return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size); } template <> @@ -29,9 +33,14 @@ void* Alloc(GPUPlace pl, size_t size) { template <> void Free(CPUPlace, void* p) { - return GetCPUBuddyAllocator()->Free(p); + return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); +} + +void FreeStaging(CPUPlace, void* p) { + return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); } +#ifdef PADDLE_WITH_GPU template <> void* Alloc(GPUPlace pl, void* p) { return GetGPUBuddyAllocator(pl.device)->Free(p); @@ -46,6 +55,7 @@ template <> size_t Alloc(GPUPlace pl) { return GetGPUBuddyAllocator(pl.device)->Used(); } +#endif // PADDLE_WITH_GPU } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index ae8ac6ca5..8c15a133b 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,9 +19,19 @@ limitations under the License. */ namespace paddle { namespace memory { -typename void* Alloc(Place, size_t); -typename void Free(Place, void*); -typename size_t Used(Place); +template +void* Alloc(Place, size_t); +template +void Free(Place, void*); +template +size_t Used(Place); + +// Staging memory means "pinned" host memory that can be mapped into +// the CUDA memory space and accessed by the device rapidly. Don't +// allocate too much staging memory; otherwise system performance will +// degrade because the OS cannot find enough swap memory space. +void* AllocStaging(CPUPlace, size_t); +void* FreeStaging(CPUPlace, size_t); } // namespace memory } // namespace paddle -- GitLab