From 311b8f2f5b78003546cbd44c6d53739ebfcbfe96 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Sun, 30 Sep 2018 13:29:40 +0800
Subject: [PATCH] Refine Allocator facade

---
 paddle/fluid/memory/allocation/CMakeLists.txt |  3 +-
 .../memory/allocation/allocator_facade.cc     | 66 +++++++++++-----
 .../memory/allocation/allocator_facade.h      |  3 +
 .../allocation/auto_increment_allocator.cc    | 39 +++++++++
 .../allocation/auto_increment_allocator.h     | 79 +++++++++++++++++++
 5 files changed, 169 insertions(+), 21 deletions(-)
 create mode 100644 paddle/fluid/memory/allocation/auto_increment_allocator.cc
 create mode 100644 paddle/fluid/memory/allocation/auto_increment_allocator.h
diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt
index 44a354cf223..84d22ac96ca 100644
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -33,7 +33,7 @@ else ()
 endif()
 
 cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
-
+cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
 cc_library(allocator_facade SRCS allocator_facade.cc DEPS
         ${AllocatorFacadeDeps}
         cpu_allocator
@@ -41,6 +41,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
         best_fit_allocator
         naive_managed_allocator
         aligned_allocator
+        auto_increment_allocator
         cuda_device_guard)
 
 nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 2a5fd608bcc..260c787a740 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -17,6 +17,7 @@
 #include <vector>
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
+#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
 #include "paddle/fluid/memory/allocation/locked_allocator.h"
@@ -33,6 +34,7 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
+// TODO(yy): Dirty code here. This class should be configurable in runtime.
 class CPUManagedAllocator : public ManagedAllocator {
  public:
   CPUManagedAllocator()
@@ -56,24 +58,59 @@ class CPUManagedAllocator : public ManagedAllocator {
       return normal_allocator_->AllocateShared(size, attr);
     }
   }
+  bool IsAllocThreadSafe() const override { return true; }
 
  private:
   std::shared_ptr<ManagedAllocator> normal_allocator_;
   std::shared_ptr<ManagedAllocator> communication_allocator_;
 };
 
-class AllocatorFacadePrivate {
+// TODO(yy): Dirty code here. This class should be configurable in runtime.
+class CUDAManagedAllocator : public ManagedAllocator {
  public:
-  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
-  std::vector<std::unique_ptr<Allocation>> pre_allocations_;
-  std::vector<std::shared_ptr<Allocator>> holding_allocators_;
+  explicit CUDAManagedAllocator(int dev_id) {
+    platform::CUDADeviceGuard guard(dev_id);
+    max_chunk_size_ = platform::GpuMaxChunkSize();
+    raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
+        new CUDAAllocator(platform::CUDAPlace(dev_id))));
+    default_allocator_ = std::make_shared<AutoIncrementAllocator>(
+        [this] { return std::move(BestFitAllocatorCreator()); });
+  }
 
-  ~AllocatorFacadePrivate() {
+  ~CUDAManagedAllocator() {
     // Specify destruct order.
-    pre_allocations_.clear();
-    allocators_.clear();
-    holding_allocators_.clear();
+    default_allocator_.reset();
+    chunks_.clear();
+    raw_allocator_.reset();
+  }
+
+  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
+    return default_allocator_->Allocate(size, attr);
+  }
+  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
+    return default_allocator_->AllocateShared(size, attr);
+  }
+
+  std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
+    chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
+    auto* allocation = chunks_.back().get();
+    return NaiveManagedAllocator::Create(
+        std::unique_ptr<Allocator>(new BestFitAllocator(allocation)));
   }
+  bool IsAllocThreadSafe() const override { return true; }
+
+ private:
+  size_t max_chunk_size_;
+  std::vector<std::unique_ptr<Allocation>> chunks_;
+  std::shared_ptr<ManagedAllocator> raw_allocator_;
+  std::shared_ptr<ManagedAllocator> default_allocator_;
+};
+
+class AllocatorFacadePrivate {
+ public:
+  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
+
+  ~AllocatorFacadePrivate() {}
 
   AllocatorFacadePrivate() {
     InitCPUAllocator();
@@ -88,19 +125,8 @@ class AllocatorFacadePrivate {
   void InitCUDAAllocator() {
 #ifdef PADDLE_WITH_CUDA
     for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) {
-      platform::CUDADeviceGuard guard(dev_id);
-      auto cuda_allocator =
-          NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
-              new CUDAAllocator(platform::CUDAPlace(dev_id))));
-      auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize());
-      auto allocator = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
-          new LockedAllocator(std::unique_ptr<Allocator>(
-              new BestFitAllocator(allocation.get())))));
-
-      pre_allocations_.emplace_back(std::move(allocation));
-      holding_allocators_.emplace_back(cuda_allocator);
       allocators_[platform::CUDAPlace(dev_id)] =
-          std::make_shared<AlignedAllocator<64>>(std::move(allocator));
+          std::make_shared<CUDAManagedAllocator>(dev_id);
     }
 #endif
   }
diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h
index d780fb6e64b..a910e40badb 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -21,6 +21,9 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
+// Allocator Facade is the interface exposed to other modules.
+// All the configuration or dirty code under development should
+// be hidden behind this facade.
 class AllocatorFacadePrivate;
 class AllocatorFacade {
  public:
diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.cc b/paddle/fluid/memory/allocation/auto_increment_allocator.cc
new file mode 100644
index 00000000000..1fac71b8321
--- /dev/null
+++ b/paddle/fluid/memory/allocation/auto_increment_allocator.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
+
+namespace paddle {
+namespace memory {
+namespace allocation {
+
+std::unique_ptr<Allocation> AutoIncrementAllocator::Allocate(
+    size_t size, Allocator::Attr attr) {
+  return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
+    return allocator.Allocate(size, attr);
+  });
+}
+
+std::shared_ptr<Allocation> AutoIncrementAllocator::AllocateShared(
+    size_t size, Allocator::Attr attr) {
+  return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
+    return allocator.AllocateShared(size, attr);
+  });
+}
+
+bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; }
+
+}  // namespace allocation
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.h b/paddle/fluid/memory/allocation/auto_increment_allocator.h
new file mode 100644
index 00000000000..9fe370b08a7
--- /dev/null
+++ b/paddle/fluid/memory/allocation/auto_increment_allocator.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <thread>  // NOLINT
+#include <vector>
+#include "paddle/fluid/memory/allocation/allocator.h"
+
+namespace paddle {
+namespace memory {
+namespace allocation {
+
+class AutoIncrementAllocator : public ManagedAllocator {
+ public:
+  using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>;
+
+  template <typename Creator>
+  explicit AutoIncrementAllocator(Creator&& creator)
+      : creator_(std::move(creator)), prev_success_allocator_{0} {}
+  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override;
+  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
+  bool IsAllocThreadSafe() const override;
+
+ private:
+  // NOTE: here use template Callback, it can be inlined when -O3
+  template <typename Callback>
+  inline typename std::result_of<Callback(ManagedAllocator&)>::type
+  InvokeOrCreateUnderlyingAllocator(Callback callback) {
+    size_t retry_count = underlying_allocators_.size();
+    auto cur = prev_success_allocator_;
+    while (retry_count-- > 0) {  // until there retry count is zero
+      try {
+        auto res = callback(*underlying_allocators_[cur]);
+        {
+          std::lock_guard<std::mutex> guard(mtx_);
+          prev_success_allocator_ = cur;
+        }
+        return std::move(res);
+      } catch (BadAlloc&) {
+        ++cur;
+        if (cur >= underlying_allocators_.size()) {
+          cur = 0;
+        }
+      } catch (...) {
+        // if there is another type of allocation, just rethrow it.
+        throw;
+      }
+    }
+    // No suitable allocator
+    {
+      std::lock_guard<std::mutex> guard(mtx_);
+      underlying_allocators_.emplace_back(creator_());
+      prev_success_allocator_ = underlying_allocators_.size() - 1;
+      return callback(*underlying_allocators_[prev_success_allocator_]);
+    }
+  }
+
+  AllocatorCreator creator_;
+  std::vector<AllocatorCreator::result_type> underlying_allocators_;
+  size_t prev_success_allocator_{0};
+  std::mutex mtx_;  // NOLINT
+};
+}  // namespace allocation
+}  // namespace memory
+}  // namespace paddle
-- 
GitLab