From 3e087f763e9c6c15a4f1d542fb3bdc327f7441c7 Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Tue, 27 Jun 2017 16:48:25 -0700
Subject: [PATCH] Add buddy_allocator.cc and system_allocator.cc

---
 paddle/memory/detail/buddy_allocator.cc       | 35 ++++++++
 paddle/memory/detail/buddy_allocator.h        | 76 ++++++++--------
 paddle/memory/detail/system_allocator.cc      | 90 +++++++++++++++++++
 paddle/memory/detail/system_allocator_test.cc | 24 ++---
 4 files changed, 177 insertions(+), 48 deletions(-)
 create mode 100644 paddle/memory/detail/buddy_allocator.cc
 create mode 100644 paddle/memory/detail/system_allocator.cc
diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc
new file mode 100644
index 00000000000..895bf319d77
--- /dev/null
+++ b/paddle/memory/detail/buddy_allocator.cc
@@ -0,0 +1,35 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/memory/detail/system_allocator.h"
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools,
+                               SystemAllocator* system_allocator)
+    : pool_size_(pool_size),
+      max_pools_(max_pools),
+      system_allocator_(system_allocator) {
+  PADDLE_ASSERT(pool_size > 0);
+  PADDLE_ASSERT(max_pools > 0);
+  PADDLE_ASSERT(system_allocator != nullptr);
+}
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h
index 35e96fd5078..129b137ed73 100644
--- a/paddle/memory/detail/buddy_allocator.h
+++ b/paddle/memory/detail/buddy_allocator.h
@@ -1,16 +1,16 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+   http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
 
 #pragma once
 
@@ -20,34 +20,38 @@ namespace paddle {
 namespace memory {
 namespace detail {
 
-template<typename Allocator>
 class BuddyAllocator {
-  public:
-    // TODO(gangliao): This is a draft, add Buddy Allocator Algorithm soon
-    BuddyAllocator() {}
-    ~BuddyAllocator() {}
-
-  public:
-    void* Alloc(size_t size) {
-        return Allocator::Alloc(size); 
-    }
-    void Free(void*) {
-      // Because all info like size are stored in meta data,
-      // thus it's duplicate if add the parameter `size` in
-      // `Free(void*)` interface.
-    }
-    size_t Used();
+ public:
+  BuddyAllocator(size_t pool_size, size_t max_pools,
+                 SystemAllocator* system_allocator);
+  ~BuddyAllocator();
+
+  void* Alloc(size_t size);
+  void Free(void*);
+  size_t Used();
+
+ private:
+  struct Block {
+    size_t size_;
+    Block* left_;   // left buddy
+    Block* right_;  // right buddy
+  };
+
+  // Initially, there is only one pool.  If a Alloc founds not enough
+  // memory from that pool, and there has not been max_num_pools_,
+  // create a new pool by calling system_allocator_.Alloc(pool_size_).
+  std::vector<void*> pools_;
+
+  size_t pool_size_;      // the size of each pool;
+  size_t max_num_pools_;  // the size of all pools;
 
-  public:
-    BuddyAllocator(const BuddyAllocator&) = delete;
-    BuddyAllocator& operator=(const BuddyAllocator&) = delete;
+  SystemAllocator* system_allocator_;
 
-  private:
-    size_t min_alloc_size_;
-    size_t max_alloc_size_;
+  std::mutex mutex_;
 
-  private:
-    std::mutex mutex_;
+  // Disable copy and assignment.
+  BuddyAllocator(const BuddyAllocator&) = delete;
+  BuddyAllocator& operator=(const BuddyAllocator&) = delete;
 };
 
 BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() {
@@ -63,16 +67,16 @@ BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() {
 BuddyAllocator<GPUAllocator>* GetGPUBuddyAllocator(int gpu_id) {
   static BuddyAllocator<GPUAllocator>** as = NULL;
   if (as == NULL) {
-    int gpu_num = platform::GetDeviceCount(); 
+    int gpu_num = platform::GetDeviceCount();
     as = new BuddyAllocator<GPUAllocator>*[gpu_num];
     for (int gpu = 0; gpu < gpu_num; gpu++) {
-        as[gpu] = new BuddyAllocator<GPUAllocator>();
+      as[gpu] = new BuddyAllocator<GPUAllocator>();
     }
   }
   return as[gpu_id];
 }
 
-#endif  // PADDLE_ONLY_CPU 
+#endif  // PADDLE_ONLY_CPU
 
 }  // namespace detail
 }  // namespace memory
diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc
new file mode 100644
index 00000000000..50bec926f83
--- /dev/null
+++ b/paddle/memory/detail/system_allocator.cc
@@ -0,0 +1,90 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/detail/system_allocator.h"
+
+#include <stdlib.h>    // for malloc and free
+#include <sys/mman.h>  // for mlock and munlock
+
+#include "gflags/gflags.h"
+#include "paddle/platform/assert.h"
+#include "paddle/platform/cuda.h"
+
+// If use_pinned_memory is true, CPUAllocator calls mlock, which
+// returns pinned and locked memory as staging areas for data exchange
+// between host and device.  Allocates too much would reduce the amount
+// of memory available to the system for paging.  So, by default, we
+// should set false to use_pinned_memory.
+DEFINE_bool(use_pinned_memory, false,
+            "If set, allocate cpu/gpu pinned memory.");
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+void* CPUAllocator::Alloc(size_t size) {
+  // According to http://www.cplusplus.com/reference/cstdlib/malloc/,
+  // malloc might not return nullptr if size is zero, but the returned
+  // pointer shall not be dereferenced -- so we make it nullptr.
+  if (size <= 0) return nullptr;
+
+  void* p = malloc(size);
+  if (p != nullptr && FLAGS_use_pinned_memory) {
+    mlock(p, size);
+  }
+  return p;
+}
+
+void CPUAllocator::Free(void* p, size_t size) {
+  if (p != nullptr && FLAGS_use_pinned_memory) {
+    munlock(p, size);
+  }
+  free(p);
+}
+
+#ifndef PADDLE_ONLY_CPU
+
+void* GPUAllocator::Alloc(size_t size) {
+  // CUDA documentation doesn't explain if cudaMalloc returns nullptr
+  // if size is 0.  We just make sure it does.
+  if (size <= 0) {
+    return nullptr;
+  }
+
+  void* p = 0;
+  cudaError_t result =
+      FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size);
+  if (result != cudaSuccess) {
+    cudaGetLastError();  // clear error if there is any.
+  }
+  return result == cudaSuccess ? p : nullptr;
+}
+
+void GPUAllocator::Free(void* p, size_t size) {
+  // Purposefully allow cudaErrorCudartUnloading, because
+  // that is returned if you ever call cudaFree after the
+  // driver has already shutdown. This happens only if the
+  // process is terminating, in which case we don't care if
+  // cudaFree succeeds.
+  cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
+  if (err != cudaErrorCudartUnloading) {
+    platform::throw_on_error(err, "cudaFree{Host} failed");
+  }
+}
+
+#endif  // PADDLE_ONLY_CPU
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc
index c461d8ac626..9bd5706a4e4 100644
--- a/paddle/memory/detail/system_allocator_test.cc
+++ b/paddle/memory/detail/system_allocator_test.cc
@@ -22,10 +22,10 @@ limitations under the License. */
 
 DECLARE_bool(use_pinned_memory);
 
-void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
+void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) {
   bool freed = false;
   {
-    void* p = a->Alloc(size);
+    void* p = a.Alloc(size);
     if (size > 0) {
       EXPECT_NE(p, nullptr);
     } else {
@@ -33,9 +33,9 @@ void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
     }
 
     int* i = static_cast<int*>(p);
-    std::shared_ptr<int> ptr(i, [&freed, a, size](void* p) {
+    std::shared_ptr<int> ptr(i, [&](void* p) {
       freed = true;
-      a->Free(p, size);
+      a.Free(p, size);
     });
   }
   EXPECT_TRUE(freed);
@@ -44,28 +44,28 @@ void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) {
 TEST(CPUAllocator, NoLockMem) {
   FLAGS_use_pinned_memory = false;
   paddle::memory::detail::CPUAllocator a;
-  TestAllocator(&a, 2048);
-  TestAllocator(&a, 0);
+  TestAllocator(a, 2048);
+  TestAllocator(a, 0);
 }
 
 TEST(CPUAllocator, LockMem) {
   FLAGS_use_pinned_memory = true;
   paddle::memory::detail::CPUAllocator a;
-  TestAllocator(&a, 2048);
-  TestAllocator(&a, 0);
+  TestAllocator(a, 2048);
+  TestAllocator(a, 0);
 }
 
 #ifndef PADDLE_ONLY_CPU
 TEST(GPUAllocator, NoStaging) {
   FLAGS_use_pinned_memory = false;
   paddle::memory::detail::GPUAllocator a;
-  TestAllocator(&a, 2048);
-  TestAllocator(&a, 0);
+  TestAllocator(a, 2048);
+  TestAllocator(a, 0);
 }
 TEST(GPUAllocator, Staging) {
   FLAGS_use_pinned_memory = true;
   paddle::memory::detail::GPUAllocator a;
-  TestAllocator(&a, 2048);
-  TestAllocator(&a, 0);
+  TestAllocator(a, 2048);
+  TestAllocator(a, 0);
 }
 #endif  // PADDLE_ONLY_CPU
-- 
GitLab