From b8f5922d88e5f7949eb9a469f761ad49981d677a Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Tue, 27 Jun 2017 16:32:24 -0700
Subject: [PATCH] Make CPUAllocator and GPUAllocator subclasses of
 SystemAllocator

---
 paddle/memory/detail/CMakeLists.txt           |  6 +-
 paddle/memory/detail/system_allocator.h       | 80 +++++--------------
 paddle/memory/detail/system_allocator_test.cc | 57 +++++++------
 3 files changed, 59 insertions(+), 84 deletions(-)
diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
index cd5622203ff..72d3749ad78 100644
--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
@@ -1,5 +1,7 @@
 if(${WITH_GPU})
-  nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
+  nv_library(system_allocator SRCS system_allocator.cc DEPS gflags)
+  nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
 else(${WITH_GPU})
-  cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
+  cc_library(system_allocator SRCS system_allocator.cc DEPS gflags)
+  cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
 endif(${WITH_GPU})
diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h
index f411019854e..184b383f7f7 100644
--- a/paddle/memory/detail/system_allocator.h
+++ b/paddle/memory/detail/system_allocator.h
@@ -14,76 +14,38 @@ limitations under the License. */
 
 #pragma once
 
-#include <stddef.h>    // for size_t
-#include <sys/mman.h>  // for mlock and munlock
-#include <cstdlib>     // for malloc and free
-
-#include <gflags/gflags.h>
-#include "paddle/platform/assert.h"
-#include "paddle/platform/cuda.h"
-
-DEFINE_bool(uses_pinned_memory, false,
-            "If set, allocate cpu/gpu pinned memory.");
+#include <stddef.h>  // for size_t
 
 namespace paddle {
 namespace memory {
 namespace detail {
 
-// If uses_pinned_memory is true, CPUAllocator calls mlock, which
-// returns pinned and locked memory as staging areas for data exchange
-// between host and device.  Allocates too much would reduce the amount
-// of memory available to the system for paging.  So, by default, we
-// should set false to uses_pinned_memory.
-class CPUAllocator {
+// SystemAllocator is the parent class of CPUAllocator and
+// GPUAllocator.  A BuddyAllocator object uses a SystemAllocator*
+// pointing to the underlying system allocator.  An alternative to
+// this class hierarchy is to pass a system allocator class to
+// BuddyAllocator as a template parameter.  This approach makes
+// BuddyAllocator a class template, and it's very complicated
+// algorithm would make the buddy_allocator.h messy.
+class SystemAllocator {
  public:
-  static void* Alloc(size_t size) {
-    void* p = std::malloc(size);
-    if (p != nullptr && FLAGS_uses_pinned_memory) {
-      mlock(p, size);
-    }
-    return p;
-  }
-
-  static void Free(void* p, size_t size) {
-    if (p != nullptr && FLAGS_uses_pinned_memory) {
-      munlock(p, size);
-    }
-    std::free(p);
-  }
+  virtual ~SystemAllocator() {}
+  virtual void* Alloc(size_t size) = 0;
+  virtual void Free(void* p, size_t size) = 0;
 };
 
-#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.
-
-// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
-// pinned and locked memory as staging areas for data exchange
-// between host and device.  Allocates too much would reduce the
-// amount of memory available to the system for paging.  So, by
-// default, we should use GPUAllocator<staging=false>.
-class GPUAllocator {
+class CPUAllocator : public SystemAllocator {
  public:
-  static void* Alloc(size_t size) {
-    void* p = 0;
-    cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size)
-                                                  : cudaMalloc(&p, size);
-    if (result != cudaSuccess) {
-      cudaGetLastError();  // clear error if there is any.
-    }
-    return result == cudaSuccess ? p : nullptr;
-  }
-
-  static void Free(void* p, size_t size) {
-    // Purposefully allow cudaErrorCudartUnloading, because
-    // that is returned if you ever call cudaFree after the
-    // driver has already shutdown. This happens only if the
-    // process is terminating, in which case we don't care if
-    // cudaFree succeeds.
-    cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
-    if (err != cudaErrorCudartUnloading) {
-      platform::throw_on_error(err, "cudaFree{Host} failed");
-    }
-  }
+  virtual void* Alloc(size_t size);
+  virtual void Free(void* p, size_t size);
 };
 
+#ifndef PADDLE_ONLY_CPU
+class GPUAllocator : public SystemAllocator {
+ public:
+  virtual void* Alloc(size_t size);
+  virtual void Free(void* p, size_t size);
+};
 #endif  // PADDLE_ONLY_CPU
 
 }  // namespace detail
diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc
index 829d3558ba4..c461d8ac626 100644
--- a/paddle/memory/detail/system_allocator_test.cc
+++ b/paddle/memory/detail/system_allocator_test.cc
@@ -17,44 +17,55 @@ limitations under the License. */
 #include <memory>
 #include <vector>
 
-#include "glog/logging.h"
+#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 
-template <typename Allocator>
-void TestAllocator(void* p) {
-  p = Allocator::Alloc(1024);
+DECLARE_bool(use_pinned_memory);
 
-  int* i = static_cast<int*>(p);
-  std::shared_ptr<int> ptr(i, [](int* p) { Allocator::Free(p, 1024); });
+void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
+  bool freed = false;
+  {
+    void* p = a->Alloc(size);
+    if (size > 0) {
+      EXPECT_NE(p, nullptr);
+    } else {
+      EXPECT_EQ(p, nullptr);
+    }
 
-  EXPECT_NE(p, nullptr);
+    int* i = static_cast<int*>(p);
+    std::shared_ptr<int> ptr(i, [&freed, a, size](void* p) {
+      freed = true;
+      a->Free(p, size);
+    });
+  }
+  EXPECT_TRUE(freed);
 }
 
 TEST(CPUAllocator, NoLockMem) {
-  void* p = nullptr;
-  FLAGS_uses_pinned_memory = false;
-  TestAllocator<paddle::memory::detail::CPUAllocator>(p);
-  EXPECT_EQ(p, nullptr);
+  FLAGS_use_pinned_memory = false;
+  paddle::memory::detail::CPUAllocator a;
+  TestAllocator(&a, 2048);
+  TestAllocator(&a, 0);
 }
 
 TEST(CPUAllocator, LockMem) {
-  void* p = nullptr;
-  FLAGS_uses_pinned_memory = true;
-  TestAllocator<paddle::memory::detail::CPUAllocator>(p);
-  EXPECT_EQ(p, nullptr);
+  FLAGS_use_pinned_memory = true;
+  paddle::memory::detail::CPUAllocator a;
+  TestAllocator(&a, 2048);
+  TestAllocator(&a, 0);
 }
 
 #ifndef PADDLE_ONLY_CPU
 TEST(GPUAllocator, NoStaging) {
-  void* p = nullptr;
-  FLAGS_uses_pinned_memory = false;
-  TestAllocator<paddle::memory::detail::GPUAllocator>(p);
-  EXPECT_EQ(p, nullptr);
+  FLAGS_use_pinned_memory = false;
+  paddle::memory::detail::GPUAllocator a;
+  TestAllocator(&a, 2048);
+  TestAllocator(&a, 0);
 }
 TEST(GPUAllocator, Staging) {
-  void* p = nullptr;
-  FLAGS_uses_pinned_memory = true;
-  TestAllocator<paddle::memory::detail::GPUAllocator>(p);
-  EXPECT_EQ(p, nullptr);
+  FLAGS_use_pinned_memory = true;
+  paddle::memory::detail::GPUAllocator a;
+  TestAllocator(&a, 2048);
+  TestAllocator(&a, 0);
 }
 #endif  // PADDLE_ONLY_CPU
-- 
GitLab