From db128c4586c3c925a6c53a9ae770cb07cdbea1bf Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Sun, 25 Jun 2017 17:54:06 -0700
Subject: [PATCH] Pass cpu_allocator_test

---
 CMakeLists.txt                             |  2 +-
 cmake/generic.cmake                        |  4 ++++
 paddle/memory/detail/CMakeLists.txt        |  6 +++++-
 paddle/memory/detail/cpu_allocator.h       | 13 +++++++++----
 paddle/memory/detail/cpu_allocator_test.cc | 16 +++++++++++-----
 paddle/memory/memory.cc                    | 14 ++++++++++++--
 paddle/memory/memory.h                     | 16 +++++++++++++---
 7 files changed, 55 insertions(+), 16 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c5d7f2c7e..3c719d35e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,7 +71,7 @@ if(ANDROID)
         "Disable RDMA when cross-compiling for Android" FORCE)
 endif(ANDROID)
 
-set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
+set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
   "A path setting third party libraries download & build directories.")
 
 if (WITH_C_API AND WITH_PYTHON)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 69e8164a0..840155750 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -78,6 +78,10 @@
 #
 #   cc_test(example_test SRCS example_test.cc DEPS example glog gflags)
 
+if(WITH_GPU)
+  add_definitions(-DPADDLE_WITH_GPU)
+endif()
+
 if(NOT APPLE)
     find_package(Threads REQUIRED)
     link_libraries(${CMAKE_THREAD_LIBS_INIT})
diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
index fb8a11062..c425e9f94 100644
--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
@@ -1 +1,5 @@
-cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
+if(${WITH_GPU})
+  nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but
+else(${WITH_GPU})
+  cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't.
+endif(${WITH_GPU})
diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h
index 8a872d380..0d8ea3f52 100644
--- a/paddle/memory/detail/cpu_allocator.h
+++ b/paddle/memory/detail/cpu_allocator.h
@@ -17,6 +17,11 @@ limitations under the License. */
 #include <malloc.h>  // for malloc and free
 #include <stddef.h>  // for size_t
 
+#ifdef PADDLE_WITH_GPU
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#endif  // PADDLE_WITH_GPU
+
 namespace paddle {
 namespace memory {
 namespace detail {
@@ -40,9 +45,9 @@ public:
   void Free(void* p) { free(p); }
 };
 
-// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the
+// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the
 // following specialization that depends on the CUDA library.
-#ifdef WITH_GPU
+#ifdef PADDLE_WITH_GPU
 template <>
 class CPUAllocator<true> {
 public:
@@ -51,12 +56,12 @@ public:
     if (cudaMallocHost(&p, size) != cudaSuccess) {
       return NULL;
     }
-    return *p;
+    return p;
   }
 
   void Free(void* p) { cudaFreeHost(p); }
 };
-#endif  // WITH_GPU
+#endif  // PADDLE_WITH_GPU
 
 }  // namespace detail
 }  // namespace memory
diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc
index 0aa33a22f..464bc84e5 100644
--- a/paddle/memory/detail/cpu_allocator_test.cc
+++ b/paddle/memory/detail/cpu_allocator_test.cc
@@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) {
   a.Free(p);
 }
 
-#ifdef WITH_GPU
+#ifdef PADDLE_WITH_GPU
 TEST(CPUAllocator, Staging) {
   paddle::memory::detail::CPUAllocator<true> a;
-  void* p = a.Alloc(4096);
-  EXPECT_NE(p, nullptr);
-  a.Free(p);
+
+  int devices;
+  if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) {
+    void* p = a.Alloc(4096);
+    EXPECT_NE(p, nullptr);
+    a.Free(p);
+  } else {
+    EXPECT_EQ(a.Alloc(4096), nullptr);
+  }
 }
-#endif  // WITH_GPU
+#endif  // PADDLE_WITH_GPU
diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc
index 5f1253ede..b61792373 100644
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@@ -19,7 +19,11 @@ namespace memory {
 
 template <>
 void* Alloc<CPUPlace>(CPUPlace, size_t size) {
-  return GetCPUBuddyAllocator()->Alloc(size);
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size);
+}
+
+void* AllocStaging(CPUPlace, size_t size) {
+  return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size);
 }
 
 template <>
@@ -29,9 +33,14 @@ void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {
 
 template <>
 void Free<CPUPlace>(CPUPlace, void* p) {
-  return GetCPUBuddyAllocator()->Free(p);
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
+}
+
+void FreeStaging(CPUPlace, void* p) {
+  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
 }
 
+#ifdef PADDLE_WITH_GPU
 template <>
 void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
   return GetGPUBuddyAllocator(pl.device)->Free(p);
@@ -46,6 +55,7 @@ template <>
 size_t Alloc<GPUPlace>(GPUPlace pl) {
   return GetGPUBuddyAllocator(pl.device)->Used();
 }
+#endif  // PADDLE_WITH_GPU
 
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h
index ae8ac6ca5..8c15a133b 100644
--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@@ -19,9 +19,19 @@ limitations under the License. */
 namespace paddle {
 namespace memory {
 
-typename<typename paddle::framework::Place> void* Alloc(Place, size_t);
-typename<typename paddle::framework::Place> void Free(Place, void*);
-typename<typename paddle::framework::Place> size_t Used(Place);
+template <typename paddle::framework::Place>
+void* Alloc(Place, size_t);
+template <typename paddle::framework::Place>
+void Free(Place, void*);
+template <typename paddle::framework::Place>
+size_t Used(Place);
+
+// Staging memory means "pinned" host memory that can be mapped into
+// the CUDA memory space and accessed by the device rapidly.  Don't
+// allocate too much staging memory; otherwise system performance will
+// degrade because the OS cannot find enough swap memory space.
+void* AllocStaging(CPUPlace, size_t);
+void* FreeStaging(CPUPlace, size_t);
 
 }  // namespace memory
 }  // namespace paddle
-- 
GitLab