Pass cpu_allocator_test

db128c45 · Yi Wang · 67481ca8 · db128c45 · db128c45 · db128c45
7 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,7 +71,7 @@ if(ANDROID)
        "Disable RDMA when cross-compiling for Android" FORCE)
 endif(ANDROID)

-set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
+set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
  "A path setting third party libraries download & build directories.")

 if (WITH_C_API AND WITH_PYTHON)

--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -78,6 +78,10 @@
 #
 #   cc_test(example_test SRCS example_test.cc DEPS example glog gflags)

+if(WITH_GPU)
+  add_definitions(-DPADDLE_WITH_GPU)
+endif()
+
 if(NOT APPLE)
    find_package(Threads REQUIRED)
    link_libraries(${CMAKE_THREAD_LIBS_INIT})

--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
-cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
+if(${WITH_GPU})
+  nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but
+else(${WITH_GPU})
+  cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't.
+endif(${WITH_GPU})
--- a/paddle/memory/detail/cpu_allocator.h
+++ b/paddle/memory/detail/cpu_allocator.h
@@ -17,6 +17,11 @@ limitations under the License. */
 #include <malloc.h>  // for malloc and free
 #include <stddef.h>  // for size_t

+#ifdef PADDLE_WITH_GPU
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#endif  // PADDLE_WITH_GPU
+
 namespace paddle {
 namespace memory {
 namespace detail {
@@ -40,9 +45,9 @@ public:
  void Free(void* p) { free(p); }
 };

-// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the
+// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the
 // following specialization that depends on the CUDA library.
-#ifdef WITH_GPU
+#ifdef PADDLE_WITH_GPU
 template <>
 class CPUAllocator<true> {
 public:
@@ -51,12 +56,12 @@ public:
    if (cudaMallocHost(&p, size) != cudaSuccess) {
      return NULL;
    }
-    return *p;
+    return p;
  }

  void Free(void* p) { cudaFreeHost(p); }
 };
-#endif  // WITH_GPU
+#endif  // PADDLE_WITH_GPU

 }  // namespace detail
 }  // namespace memory

--- a/paddle/memory/detail/cpu_allocator_test.cc
+++ b/paddle/memory/detail/cpu_allocator_test.cc
@@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) {
  a.Free(p);
 }

-#ifdef WITH_GPU
+#ifdef PADDLE_WITH_GPU
 TEST(CPUAllocator, Staging) {
  paddle::memory::detail::CPUAllocator<true> a;
+
+  int devices;
+  if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) {
    void* p = a.Alloc(4096);
    EXPECT_NE(p, nullptr);
    a.Free(p);
+  } else {
+    EXPECT_EQ(a.Alloc(4096), nullptr);
+  }
 }
-#endif  // WITH_GPU
+#endif  // PADDLE_WITH_GPU
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@@ -19,7 +19,11 @@ namespace memory {

 template <>
 void* Alloc<CPUPlace>(CPUPlace, size_t size) {
-  return GetCPUBuddyAllocator()->Alloc(size);
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size);
+}
+
+void* AllocStaging(CPUPlace, size_t size) {
+  return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size);
 }

 template <>
@@ -29,9 +33,14 @@ void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {

 template <>
 void Free<CPUPlace>(CPUPlace, void* p) {
-  return GetCPUBuddyAllocator()->Free(p);
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
+}
+
+void FreeStaging(CPUPlace, void* p) {
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
 }

+#ifdef PADDLE_WITH_GPU
 template <>
 void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
  return GetGPUBuddyAllocator(pl.device)->Free(p);
@@ -46,6 +55,7 @@ template <>
 size_t Alloc<GPUPlace>(GPUPlace pl) {
  return GetGPUBuddyAllocator(pl.device)->Used();
 }
+#endif  // PADDLE_WITH_GPU

 }  // namespace memory
 }  // namespace paddle
--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@@ -19,9 +19,19 @@ limitations under the License. */
 namespace paddle {
 namespace memory {

-typename<typename paddle::framework::Place> void* Alloc(Place, size_t);
-typename<typename paddle::framework::Place> void Free(Place, void*);
-typename<typename paddle::framework::Place> size_t Used(Place);
+template <typename paddle::framework::Place>
+void* Alloc(Place, size_t);
+template <typename paddle::framework::Place>
+void Free(Place, void*);
+template <typename paddle::framework::Place>
+size_t Used(Place);
+
+// Staging memory means "pinned" host memory that can be mapped into
+// the CUDA memory space and accessed by the device rapidly.  Don't
+// allocate too much staging memory; otherwise system performance will
+// degrade because the OS cannot find enough swap memory space.
+void* AllocStaging(CPUPlace, size_t);
+void* FreeStaging(CPUPlace, size_t);

 }  // namespace memory
 }  // namespace paddle