split PR

test=develop

split PR
test=develop
c20db635 · sneaxiy · c75a8803 · c20db635 · c75a8803 · c75a8803
22 changed file
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -202,8 +202,6 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)

 cc_test(tuple_test SRCS tuple_test.cc )

-cc_test(inlined_vector_test SRCS inlined_vector_test.cc)
-
 if (NOT WIN32)
 cc_test(rw_lock_test SRCS rw_lock_test.cc)
 endif (NOT WIN32)

--- a/paddle/fluid/framework/inlined_vector.h
+++ b/paddle/fluid/framework/inlined_vector.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <vector>
-#include "paddle/fluid/platform/enforce.h"
-
-namespace paddle {
-namespace framework {
-
-template <typename T, size_t N>
-class InlinedVector {
-  static_assert(N > 0, "N must be larger than 0");
-
- public:
-  inline void push_back(const T& item) {
-    if (size_ < N) {
-      head_[size_] = item;
-    } else {
-      tail_.emplace_back(item);
-    }
-    ++size_;
-  }
-
-  inline void pop_back() {
-    PADDLE_ENFORCE(!empty(), "Try to pop back element from empty vector.");
-    if (size_ > N) {
-      tail_.pop_back();
-    }
-    --size_;
-  }
-
-  inline const T& back() const {
-    PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector.");
-    return size_ <= N ? head_[size_ - 1] : tail_.back();
-  }
-
-  inline T& back() {
-    PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector.");
-    return size_ <= N ? head_[size_ - 1] : tail_.back();
-  }
-
-  inline bool empty() const { return size_ == 0; }
-
-  inline size_t size() const { return size_; }
-
-  // This API can only be used in unittest
-  T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
-
-  const T& operator[](size_t i) const {
-    return i < N ? head_[i] : tail_[i - N];
-  }
-
-  operator std::vector<T>() const {
-    std::vector<T> ret;
-    ret.reserve(size_);
-    for (size_t i = 0; i < size_; ++i) {
-      ret.emplace_back((*this)[i]);
-    }
-    return ret;
-  }
-
- private:
-  T head_[N];
-  size_t size_{0};
-  std::vector<T> tail_;
-};
-
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/framework/inlined_vector_test.cc
+++ b/paddle/fluid/framework/inlined_vector_test.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/inlined_vector.h"
-#include <vector>
-#include "gtest/gtest.h"
-
-namespace paddle {
-namespace framework {
-
-TEST(inlined_stack, inlined_stack) {
-  size_t max_num = 10;
-
-  InlinedVector<size_t, 5> stack;
-
-  for (size_t i = 0; i < max_num; ++i) {
-    ASSERT_EQ(stack.size(), i);
-    stack.push_back(i);
-    ASSERT_EQ(stack.size(), i + 1);
-  }
-
-  std::vector<size_t> vec = stack;
-
-  ASSERT_EQ(stack.size(), vec.size());
-
-  for (size_t i = 0; i < vec.size(); ++i) {
-    ASSERT_EQ(stack[i], vec[i]);
-  }
-
-  for (size_t i = 0; i < max_num; ++i) {
-    ASSERT_EQ(stack[i], i);
-  }
-
-  for (size_t i = 0; i < max_num; ++i) {
-    ASSERT_EQ(stack.back(), max_num - 1 - i);
-    stack.pop_back();
-    ASSERT_EQ(stack.size(), max_num - 1 - i);
-  }
-}
-
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -3,18 +3,9 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
 cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
 cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
 cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
-cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator gflags)
 cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
 cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
 cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
-cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator)
-
-cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator)
-cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator)
-
-if (NOT WIN32)
-  cc_test(test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator)
-endif()

 if (WITH_GPU)
  nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
@@ -47,7 +38,7 @@ else ()
    set(AllocatorFacadeDeps)
 endif()

-list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator multi_bin_buffered_allocator auto_growth_best_fit_allocator legacy_allocator zero_size_allocator)
+list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)

 cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
 cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
@@ -59,8 +50,8 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat

 cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)

-cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
-
 cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)

-cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade)
+cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
+
+cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -17,7 +17,6 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "paddle/fluid/framework/inlined_vector.h"
 #include "paddle/fluid/platform/place.h"

 namespace paddle {
@@ -50,7 +49,9 @@ class Allocator;
 class Allocation {
 public:
  Allocation(void* ptr, size_t size, platform::Place place)
-      : ptr_(ptr), size_(size), place_(place) {}
+      : ptr_(ptr), size_(size), place_(place) {
+    decorated_allocators_.reserve(8);
+  }

  Allocation(const Allocation& o) = delete;
  Allocation& operator=(const Allocation& o) = delete;
@@ -80,8 +81,8 @@ class Allocation {
  virtual ~Allocation();

 private:
-  std::vector<Allocator*> DecoratedAllocators() const {
-    return static_cast<std::vector<Allocator*>>(decorated_allocators_);
+  const std::vector<Allocator*>& DecoratedAllocators() const {
+    return decorated_allocators_;
  }

  inline void RegisterDecoratedAllocator(Allocator* allocator) {
@@ -98,7 +99,7 @@ class Allocation {
  void* ptr_;
  size_t size_;
  platform::Place place_;
-  framework::InlinedVector<Allocator*, 8> decorated_allocators_;
+  std::vector<Allocator*> decorated_allocators_;

  friend class Allocator;
  friend class AllocationDeleter;

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -22,14 +22,12 @@
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
-#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/conditional_allocator.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
 #include "paddle/fluid/memory/allocation/legacy_allocator.h"
 #include "paddle/fluid/memory/allocation/locked_allocator.h"
-#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
 #include "paddle/fluid/memory/allocation/retry_allocator.h"
 #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
 #include "paddle/fluid/platform/cpu_info.h"
@@ -47,24 +45,18 @@ DEFINE_int64(
    "The retry time (milliseconds) when allocator fails "
    "to allocate memory. No retry if this value is not greater than 0");

-DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator");
-
 namespace paddle {
 namespace memory {
 namespace allocation {

-static inline std::shared_ptr<Allocator> WrapRetryAndBufferedAllocator(
-    std::shared_ptr<Allocator> allocator, int64_t retry_time,
-    bool enable_buffered) {
+static inline std::shared_ptr<Allocator> WrapRetryAllocator(
+    std::shared_ptr<Allocator> allocator, int64_t retry_time) {
  if (retry_time > 0) {
    auto* retry_allocator =
        new RetryAllocator(std::move(allocator), retry_time);
    allocator.reset(retry_allocator);
  }

-  if (enable_buffered) {
-    allocator.reset(new MultiBinBufferedAllocator(allocator));
-  }
  return allocator;
 }

@@ -134,8 +126,7 @@ class ChunkedAllocator : public Allocator {
    std::shared_ptr<Allocator> allocator(new LockedAllocator(
        std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));

-    allocator = WrapRetryAndBufferedAllocator(allocator, retry_time_,
-                                              FLAGS_enable_buffered_allocator);
+    allocator = WrapRetryAllocator(allocator, retry_time_);

    return std::make_shared<AlignedAllocator<4096>>(std::move(allocator));
  }
@@ -219,13 +210,6 @@ class AllocatorFacadePrivate {
        WrapZeroSizeAllocator();
        break;
      }
-      case AllocatorStrategy::kAutoGrowthBestFit: {
-        InitAutoGrowthCPUAllocator();
-        InitAutoGrowthCUDAAllocator();
-        InitAutoGrowthCUDAPinnedAllocator();
-        WrapZeroSizeAllocator();
-        break;
-      }
      default: {
        PADDLE_THROW("Unsupported allocator strategy: %d",
                     static_cast<int>(strategy));
@@ -234,39 +218,6 @@ class AllocatorFacadePrivate {
  }

 private:
-  void InitAutoGrowthCPUAllocator() {
-    auto cpu_allocator = std::make_shared<AlignedAllocator<4096>>(
-        std::make_shared<CPUAllocator>());
-    allocators_[platform::CPUPlace()] =
-        std::make_shared<AutoGrowthBestFitAllocator>(
-            cpu_allocator, platform::CpuMaxChunkSize(), 4096);
-  }
-
-  void InitAutoGrowthCUDAAllocator() {
-#ifdef PADDLE_WITH_CUDA
-    int dev_cnt = platform::GetCUDADeviceCount();
-    for (int dev_id = 0; dev_id < dev_cnt; ++dev_id) {
-      auto cuda_allocator = std::make_shared<AlignedAllocator<4096>>(
-          std::make_shared<CUDAAllocator>(platform::CUDAPlace(dev_id)));
-      auto allocator = std::make_shared<AutoGrowthBestFitAllocator>(
-          cuda_allocator, platform::GpuMaxChunkSize(), 4096);
-
-      allocators_[platform::CUDAPlace(dev_id)] = WrapRetryAndBufferedAllocator(
-          allocator, FLAGS_gpu_allocator_retry_time, false);
-    }
-#endif
-  }
-
-  void InitAutoGrowthCUDAPinnedAllocator() {
-#ifdef PADDLE_WITH_CUDA
-    auto cuda_pinned_allocator = std::make_shared<AlignedAllocator<4096>>(
-        std::make_shared<CPUPinnedAllocator>());
-    allocators_[platform::CUDAPinnedPlace()] =
-        std::make_shared<AutoGrowthBestFitAllocator>(
-            cuda_pinned_allocator, platform::CUDAPinnedMaxChunkSize(), 4096);
-#endif
-  }
-
  void InitLegacyAllocator() {
    std::vector<platform::Place> places{platform::CPUPlace()};
 #ifdef PADDLE_WITH_CUDA

--- a/paddle/fluid/memory/allocation/allocator_strategy.cc
+++ b/paddle/fluid/memory/allocation/allocator_strategy.cc
@@ -20,8 +20,7 @@ DEFINE_string(
    allocator_strategy, "legacy",
    "The allocation strategy. Legacy means the original allocator of Fluid."
    "naive_best_fit means the experimental best fit allocator. "
-    "auto_growth_best_fit means the experimental auto growth best fit "
-    "allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit].");
+    "allocator. Enum in [legacy, naive_best_fit].");

 namespace paddle {
 namespace memory {
@@ -32,8 +31,6 @@ static AllocatorStrategy GetStrategyFromFlag() {
    return AllocatorStrategy::kLegacy;
  } else if (FLAGS_allocator_strategy == "naive_best_fit") {
    return AllocatorStrategy::kNaiveBestFit;
-  } else if (FLAGS_allocator_strategy == "auto_growth_best_fit") {
-    return AllocatorStrategy::kAutoGrowthBestFit;
  } else {
    PADDLE_THROW("Unsupported allocator strategy: %s",
                 FLAGS_allocator_strategy);

--- a/paddle/fluid/memory/allocation/allocator_strategy.h
+++ b/paddle/fluid/memory/allocation/allocator_strategy.h
@@ -18,7 +18,7 @@ namespace paddle {
 namespace memory {
 namespace allocation {

-enum class AllocatorStrategy { kLegacy, kNaiveBestFit, kAutoGrowthBestFit };
+enum class AllocatorStrategy { kLegacy, kNaiveBestFit };

 extern AllocatorStrategy GetAllocatorStrategy();


--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
-#include <algorithm>
-#include <list>
-#include <map>
-#include <memory>
-#include <mutex>  // NOLINT
-#include <unordered_map>
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-static size_t align(size_t size, size_t alignment) {
-  auto remaining = size % alignment;
-  return remaining == 0 ? size : size + alignment - remaining;
-}
-
-AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
-    const std::shared_ptr<Allocator> &underlying_allocator, size_t chunk_size,
-    size_t alignment)
-    : underlying_allocator_(underlying_allocator),
-      chunk_size_(align(chunk_size, alignment)),
-      alignment_(alignment) {}
-
-Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size, Attr attr) {
-  size = align(size, alignment_);
-  std::lock_guard<std::mutex> guard(mtx_);
-  auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
-  BlockIt block_it;
-  if (iter != free_blocks_.end()) {
-    VLOG(2) << "Found " << iter->second->size_ << " for " << size;
-    block_it = iter->second;
-    free_blocks_.erase(iter);
-    auto *chunk = block_it->chunk_;
-    size_t remaining_size = block_it->size_ - size;
-    if (remaining_size == 0) {
-      block_it->is_free_ = false;
-      VLOG(2) << "Found and no remaining";
-    } else {
-      auto remaining_free_block = chunk->blocks_.insert(
-          block_it, Chunk::Block(block_it->ptr_, remaining_size, true, chunk));
-      free_blocks_.emplace(std::make_pair(remaining_size, block_it->ptr_),
-                           remaining_free_block);
-      block_it->ptr_ =
-          reinterpret_cast<uint8_t *>(block_it->ptr_) + remaining_size;
-      block_it->size_ = size;
-      block_it->is_free_ = false;
-      VLOG(2) << "Found and remaining " << remaining_size;
-    }
-  } else {
-    size_t alloc_size = size;
-    if (!underlying_allocator_exhaustive_ && chunk_size_ > size) {
-      alloc_size = chunk_size_;
-    }
-
-    try {
-      chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr));
-    } catch (BadAlloc &ex) {
-      if (size == alloc_size) throw ex;
-      underlying_allocator_exhaustive_ = true;
-      alloc_size = size;
-      chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr));
-    }
-    auto *chunk = &(*chunks_.rbegin());
-    uint8_t *p = reinterpret_cast<uint8_t *>(chunk->allocation_->ptr());
-    auto &blocks = chunk->blocks_;
-
-    size_t remaining_size = alloc_size - size;
-    if (remaining_size > 0) {
-      blocks.emplace_back(p, remaining_size, true, chunk);
-      free_blocks_.emplace(std::make_pair(remaining_size, p), --(blocks.end()));
-    }
-    blocks.emplace_back(p + remaining_size, size, false, chunk);
-    block_it = --(blocks.end());
-    VLOG(2) << "Not found and allocate " << alloc_size << ", and remaining "
-            << remaining_size;
-  }
-  VLOG(2) << "After allocate, free blocks " << free_blocks_.size();
-  return new Chunk::BlockAllocation(block_it);
-}
-
-void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
-  auto &block_it = static_cast<Chunk::BlockAllocation *>(allocation)->block_it_;
-  auto &blocks = block_it->chunk_->blocks_;
-
-  std::lock_guard<std::mutex> guard(mtx_);
-  block_it->is_free_ = true;
-
-  if (block_it != blocks.begin()) {
-    auto prev_it = block_it;
-    --prev_it;
-
-    if (prev_it->is_free_) {
-      free_blocks_.erase(std::make_pair(prev_it->size_, prev_it->ptr_));
-      prev_it->size_ += block_it->size_;
-      blocks.erase(block_it);
-      block_it = prev_it;
-    }
-  }
-
-  auto next_it = block_it;
-  ++next_it;
-
-  if (next_it != blocks.end() && next_it->is_free_) {
-    free_blocks_.erase(std::make_pair(next_it->size_, next_it->ptr_));
-    block_it->size_ += next_it->size_;
-    blocks.erase(next_it);
-  }
-
-  free_blocks_.emplace(std::make_pair(block_it->size_, block_it->ptr_),
-                       block_it);
-
-  VLOG(2) << "Combine " << block_it->size_ << ", " << blocks.size() << ", "
-          << free_blocks_.size();
-  delete allocation;
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <list>
-#include <map>
-#include <memory>
-#include <mutex>  // NOLINT
-#include <utility>
-#include "paddle/fluid/memory/allocation/allocator.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-class AutoGrowthBestFitAllocator : public Allocator {
- public:
-  explicit AutoGrowthBestFitAllocator(
-      const std::shared_ptr<Allocator> &underlying_allocator, size_t chunk_size,
-      size_t alignment);
-
-  bool IsAllocThreadSafe() const override { return true; }
-
-  using AllocationList = std::list<AllocationPtr>;
-  using AllocationListIt = AllocationList::iterator;
-
-  struct Chunk {
-    struct Block {
-      Block(void *ptr, size_t size, bool is_free, Chunk *chunk)
-          : ptr_(ptr), size_(size), is_free_(is_free), chunk_(chunk) {}
-
-      void *ptr_;
-      size_t size_;
-      bool is_free_;
-      Chunk *chunk_;  // which chunk it is from
-    };
-
-    explicit Chunk(AllocationPtr allocation)
-        : allocation_(std::move(allocation)) {}
-
-    AllocationPtr allocation_;
-    std::list<Block> blocks_;
-    // std::mutex mtx_;
-
-    struct BlockAllocation : public Allocation {
-      explicit BlockAllocation(const std::list<Block>::iterator &it)
-          : Allocation(it->ptr_, it->size_, it->chunk_->allocation_->place()),
-            block_it_(it) {}
-
-      std::list<Block>::iterator block_it_;
-    };
-  };
-
- protected:
-  Allocation *AllocateImpl(size_t size, Attr attr) override;
-
-  void FreeImpl(Allocation *allocation) override;
-
- private:
-  using BlockIt = std::list<Chunk::Block>::iterator;
-
-  std::shared_ptr<Allocator> underlying_allocator_;
-  std::list<Chunk> chunks_;
-  std::map<std::pair<size_t, void *>, BlockIt> free_blocks_;
-  size_t chunk_size_;
-  size_t alignment_;
-
-  bool underlying_allocator_exhaustive_{false};
-
-  mutable std::mutex mtx_;
-};
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <gflags/gflags.h>
-#include <gtest/gtest.h>
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-
-#ifdef PADDLE_WITH_CUDA
-DECLARE_double(fraction_of_gpu_memory_to_use);
-DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
-DECLARE_int64(gpu_allocator_retry_time);
-#endif
-
-DECLARE_string(allocator_strategy);
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-static inline size_t AlignTo(size_t size, size_t alignment = 4096) {
-  auto remaining = size % alignment;
-  return remaining == 0 ? size : size + alignment - remaining;
-}
-
-TEST(allocator, allocator) {
-#ifdef PADDLE_WITH_CUDA
-  FLAGS_fraction_of_gpu_memory_to_use = 0.01;
-  FLAGS_gpu_allocator_retry_time = 500;
-  FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
-#endif
-
-  FLAGS_allocator_strategy = "auto_growth_best_fit";
-
-  auto &instance = AllocatorFacade::Instance();
-  size_t size = 1024;
-  platform::Place place;
-
-  {
-    place = platform::CPUPlace();
-    size = 1024;
-    auto cpu_allocation = instance.Alloc(place, size);
-    ASSERT_NE(cpu_allocation, nullptr);
-    ASSERT_NE(cpu_allocation->ptr(), nullptr);
-    ASSERT_EQ(cpu_allocation->place(), place);
-    ASSERT_EQ(cpu_allocation->size(), AlignTo(size));
-  }
-
-#ifdef PADDLE_WITH_CUDA
-  {
-    place = platform::CUDAPlace(0);
-    size = 1024;
-    auto gpu_allocation = instance.Alloc(place, size);
-    ASSERT_NE(gpu_allocation, nullptr);
-    ASSERT_NE(gpu_allocation->ptr(), nullptr);
-    ASSERT_EQ(gpu_allocation->place(), place);
-    ASSERT_GE(gpu_allocation->size(), AlignTo(size));
-  }
-
-  {
-    // Allocate 2GB gpu memory
-    place = platform::CUDAPlace(0);
-    size = 2 * static_cast<size_t>(1 << 30);
-    auto gpu_allocation = instance.Alloc(place, size);
-    ASSERT_NE(gpu_allocation, nullptr);
-    ASSERT_NE(gpu_allocation->ptr(), nullptr);
-    ASSERT_EQ(gpu_allocation->place(), place);
-    ASSERT_GE(gpu_allocation->size(), AlignTo(size));
-  }
-
-  {
-    place = platform::CUDAPinnedPlace();
-    size = (1 << 20);
-    auto cuda_pinned_allocation =
-        instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
-    ASSERT_NE(cuda_pinned_allocation, nullptr);
-    ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
-    ASSERT_EQ(cuda_pinned_allocation->place(), place);
-    ASSERT_GE(cuda_pinned_allocation->size(), AlignTo(size));
-  }
-#endif
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <gflags/gflags.h>
-#include <gtest/gtest.h>
-
-#include <condition_variable>  // NOLINT
-#include <mutex>               // NOLINT
-#include <thread>              // NOLINT
-#include <vector>
-
-#include <iostream>
-
-#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
-#include "paddle/fluid/memory/allocation/cpu_allocator.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-TEST(allocator, auto_growth_best_fit_allocator) {
-  auto cpu_allocator = std::make_shared<CPUAllocator>();
-
-  auto allocator =
-      std::make_shared<AutoGrowthBestFitAllocator>(cpu_allocator, 0, 4096);
-
-  std::mutex mtx;
-  std::condition_variable cv;
-  bool flag = false;
-
-  auto thread_main = [&] {
-    {
-      std::unique_lock<std::mutex> lock(mtx);
-      cv.wait(lock, [&] { return flag; });
-    }
-    for (size_t i = 10; i > 0; --i) {
-      allocator->Allocate((i + 1) * 1000);
-    }
-  };
-
-  std::vector<std::thread> ths;
-  for (size_t i = 10; i < 10; ++i) {
-    ths.emplace_back(thread_main);
-  }
-
-  {
-    std::lock_guard<std::mutex> lock(mtx);
-    flag = true;
-  }
-  cv.notify_all();
-
-  for (auto &th : ths) {
-    th.join();
-  }
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/cpu_allocator.cc
+++ b/paddle/fluid/memory/allocation/cpu_allocator.cc
@@ -35,9 +35,9 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
 Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
  void *p;
 #ifdef _WIN32
-  p = _aligned_malloc(size, 4096);
+  p = _aligned_malloc(size, kAlignment);
 #else
-  PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096, size), 0, "Alloc %ld error!",
+  PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!",
                    size);
 #endif
  return new Allocation(p, size, platform::CPUPlace());

--- a/paddle/fluid/memory/allocation/cpu_allocator.h
+++ b/paddle/fluid/memory/allocation/cpu_allocator.h
@@ -33,7 +33,7 @@ namespace allocation {
 // an open-sourced allocator into Paddle.
 class CPUAllocator : public Allocator {
 public:
-  constexpr static size_t kAlignment = 64u;
+  constexpr static size_t kAlignment = 4096UL;
  bool IsAllocThreadSafe() const override;

 protected:

--- a/paddle/fluid/memory/allocation/legacy_allocator.cc
+++ b/paddle/fluid/memory/allocation/legacy_allocator.cc
@@ -148,12 +148,18 @@ class GPUBuddyAllocatorList {
          std::unique_ptr<detail::SystemAllocator>(
              new detail::GPUAllocator(dev_id)),
          platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
-      VLOG(10) << "\n\nNOTE: each GPU device use "
-               << FLAGS_fraction_of_gpu_memory_to_use * 100
-               << "% of GPU memory.\n"
-               << "You can set GFlags environment variable '"
-               << "FLAGS_fraction_of_gpu_memory_to_use"
-               << "' to change the fraction of GPU usage.\n\n";
+      VLOG(10) << "\n\nNOTE:\n"
+               << "You can set GFlags environment variable "
+               << "'FLAGS_fraction_of_gpu_memory_to_use' "
+               << "or 'FLAGS_initial_gpu_memory_in_mb' "
+               << "or 'FLAGS_reallocate_gpu_memory_in_mb' "
+               << "to change the memory size for GPU usage.\n"
+               << "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
+               << FLAGS_fraction_of_gpu_memory_to_use
+               << ". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
+               << FLAGS_initial_gpu_memory_in_mb
+               << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
+               << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
    });
    return allocators_[dev_id];
  }

--- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
+++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
-#include <algorithm>
-#include <cctype>
-#include <fstream>
-#include <limits>
-#include <mutex>  // NOLINT
-#include <sstream>
-#include <string>
-#include <utility>
-#include "paddle/fluid/platform/lock_guard_ptr.h"
-
-DEFINE_double(
-    buffered_allocator_excess_times, 2,
-    "Excess memory size times of buffered_allocator. BufferedAllocator"
-    " would try to reuse memory freed previously, but the size of freed"
-    " allocation may not be exactly the same as the requested. Here, we"
-    " use a flag to control the excess times of reused memory size. "
-    "Not quite sure what is the best excess times value.");
-
-DEFINE_string(
-    buffered_allocator_division_plan_path, "",
-    "The file path which "
-    "determines the memory size division plans of BufferedAllocator."
-    "If it is empty, use the default division plan. The file must be a "
-    "text file which each lines indicates the bound of division plan. "
-    "For example, if the text file has 3 lines, which are '500M', '1G', "
-    " '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) "
-    "and [2G, +inf). Allocation request whose requested memory size is "
-    "inside the last interval of division plan would be dispatched to "
-    " underlying_allocator directly without caching when freed.");
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-static std::string TrimStringAndToUpperCase(const std::string &str) {
-  auto not_space = [](char ch) { return std::isspace(ch) == 0; };
-  auto first_idx = static_cast<size_t>(
-      std::find_if(str.begin(), str.end(), not_space) - str.begin());
-  auto last_idx = static_cast<size_t>(
-      std::find_if(str.rbegin(), str.rend(), not_space) - str.rbegin());
-  if (first_idx == str.size() || last_idx == str.size()) return "";
-
-  last_idx = str.size() - last_idx;
-  auto ret = str.substr(first_idx, last_idx - first_idx);
-  std::for_each(ret.begin(), ret.end(),
-                [](char &ch) { ch = std::toupper(ch); });
-  return ret;
-}
-
-namespace {
-
-enum DivisionPlanFileStatus { kEOF, kException, kNormal };
-
-}  // NOLINT
-
-static size_t ParseStringToBytes(const std::string &original_str,
-                                 DivisionPlanFileStatus *ret_code) {
-  std::string str = TrimStringAndToUpperCase(original_str);
-
-  if (str.empty()) {
-    *ret_code = kEOF;
-    return 0;
-  }
-
-  if (str.back() == 'B') {
-    str.pop_back();
-    if (str.empty()) {
-      *ret_code = kException;
-      return 0;
-    }
-  }
-
-  size_t multiples = 1;
-  switch (str.back()) {
-    case 'G':
-      multiples *= (static_cast<size_t>(1) << 30);
-      break;
-    case 'M':
-      multiples *= (static_cast<size_t>(1) << 20);
-      break;
-    case 'K':
-      multiples *= (static_cast<size_t>(1) << 10);
-      break;
-    default:
-      break;
-  }
-
-  if (multiples != 1) {
-    str.pop_back();
-    if (str.empty()) {
-      *ret_code = kException;
-      return 0;
-    }
-  }
-
-  str = TrimStringAndToUpperCase(str);
-  double mem_val = -1.0;
-  std::stringstream ss(str);
-  if (!(ss >> mem_val) || mem_val < 0) {
-    *ret_code = kException;
-    return 0;
-  }
-
-  *ret_code = kNormal;
-  return static_cast<size_t>(mem_val * multiples);
-}
-
-static std::string GetDebugStringOfPlan(const std::vector<size_t> &plan) {
-  std::string ret("[");
-  for (auto sz : plan) {
-    ret += string::HumanReadableSize(sz);
-    ret += ", ";
-  }
-  return ret + "]";
-}
-
-std::vector<size_t> ReadBufferedAllocatorDivisionPlanFromFile(
-    const std::string &filepath) {
-  std::ifstream is(filepath.c_str());
-  PADDLE_ENFORCE(is.good(), "File %s not exist", filepath);
-  std::string str;
-  std::vector<size_t> plan;
-  size_t line_num = 1;
-  while (std::getline(is, str).good()) {
-    DivisionPlanFileStatus status;
-    size_t ret = ParseStringToBytes(str, &status);
-    if (status == kEOF) {
-      break;
-    }
-    if (status == kException) {
-      PADDLE_THROW(
-          "Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, "
-          "GB.",
-          line_num, filepath, str);
-    }
-    plan.push_back(ret);
-    ++line_num;
-  }
-  return plan;
-}
-
-static void CheckAndModifyMemoryDivisionPlan(
-    std::vector<size_t> *division_plan) {
-  // Check whether the division plan is strictly sorted
-  bool is_strictly_sorted = true;
-  for (size_t i = 1; i < division_plan->size(); ++i) {
-    if ((*division_plan)[i - 1] >= (*division_plan)[i]) {
-      is_strictly_sorted = false;
-      break;
-    }
-  }
-  PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted");
-
-  // Insert 0 to disivion plan for clean binary searching code
-  if (division_plan->empty() || division_plan->front() != 0) {
-    division_plan->insert(division_plan->begin(), 0);
-  }
-
-  // Remove MAX from disivion plan for clean binary searching code
-  constexpr auto kSizeTypeMax = std::numeric_limits<size_t>::max();
-  if (division_plan->back() == kSizeTypeMax) {
-    division_plan->pop_back();
-  }
-
-  PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty");
-}
-
-static std::vector<size_t> GetDefaultDivisionPlan() {
-  if (!FLAGS_buffered_allocator_division_plan_path.empty()) {
-    return ReadBufferedAllocatorDivisionPlanFromFile(
-        FLAGS_buffered_allocator_division_plan_path);
-  }
-
-  // Default division plan is 4K, 8K, 16K, ..., 500M, 1G
-  constexpr size_t kMaxLogSize = 30;
-  std::vector<size_t> plan;
-  for (size_t i = 12; i <= kMaxLogSize; ++i) {
-    plan.push_back(static_cast<size_t>(1) << i);
-  }
-  return plan;
-}
-
-inline static size_t FindDivisionPlanBinIndex(const std::vector<size_t> &bins,
-                                              size_t size) {
-  return static_cast<size_t>(std::upper_bound(bins.begin(), bins.end(), size) -
-                             bins.begin() - 1);
-}
-
-inline static size_t TolerantUpperSize(size_t size) {
-  return static_cast<size_t>(size * FLAGS_buffered_allocator_excess_times);
-}
-
-MultiBinBufferedAllocator::MultiBinBufferedAllocator(
-    std::shared_ptr<Allocator> underlying_allocator)
-    : MultiBinBufferedAllocator(std::move(underlying_allocator),
-                                GetDefaultDivisionPlan()) {}
-
-MultiBinBufferedAllocator::MultiBinBufferedAllocator(
-    std::shared_ptr<Allocator> underlying_allocator,
-    const std::vector<size_t> &division_plan)
-    : underlying_allocator_(std::move(underlying_allocator)),
-      division_plan_(division_plan) {
-  CheckAndModifyMemoryDivisionPlan(&division_plan_);
-  allocations_.resize(division_plan_.size() - 1);
-  accumulated_cache_size_.assign(division_plan_.size() - 1, 0UL);
-  mtx_.resize(division_plan_.size() - 1);
-  if (underlying_allocator_->IsAllocThreadSafe()) {
-    for (auto &mtx : mtx_) {
-      mtx.reset(new std::mutex());
-    }
-  }
-
-  VLOG(1) << "Division plan is: " << GetDebugStringOfPlan(division_plan_);
-  VLOG(1) << "FLAGS_buffered_allocator_excess_times = "
-          << FLAGS_buffered_allocator_excess_times;
-}
-
-void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) {
-  auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size());
-  if (bin_index < allocations_.size()) {
-    platform::LockGuardPtr<std::mutex> guard(mtx_[bin_index]);
-    allocations_[bin_index].emplace(allocation->size(),
-                                    AllocationPtr(allocation));
-    accumulated_cache_size_[bin_index] += allocation->size();
-  } else {
-    underlying_allocator_->Free(allocation);
-  }
-}
-
-// Maybe we can design more flexible FreeCache strategy based on bin_index
-// and require size.
-size_t MultiBinBufferedAllocator::ClearCache() {
-  size_t accumulated_size = 0;
-  // FIXME(zjl): free the largest first when there is no extra
-  for (size_t i = allocations_.size() - 1; i != static_cast<size_t>(-1); --i) {
-    platform::LockGuardPtr<std::mutex> lock(mtx_[i]);
-    allocations_[i].clear();
-    accumulated_size += accumulated_cache_size_[i];
-    accumulated_cache_size_[i] = 0;
-  }
-  return accumulated_size;
-}
-
-Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
-  auto bin_index = FindDivisionPlanBinIndex(division_plan_, size);
-  auto upper_size = TolerantUpperSize(size);
-
-  for (; bin_index < allocations_.size() &&
-         upper_size >= division_plan_[bin_index];
-       ++bin_index) {
-    auto &allocation = allocations_[bin_index];
-    platform::LockGuardPtr<std::mutex> lock(mtx_[bin_index]);
-    auto it = allocation.lower_bound(size);
-    if (it != allocation.end() && it->second->size() <= upper_size) {
-      size_t sz = it->second->size();
-      auto ret = std::move(it->second);
-      allocation.erase(it);
-      accumulated_cache_size_[bin_index] -= sz;
-      VLOG(3) << "Allocate " << sz << "(required " << size
-              << ") from cache directly";
-      return ret.release();
-    }
-  }
-
-  size_t retry_time = 1;
-  while (true) {
-    try {
-      auto ret = underlying_allocator_->Allocate(size, attr).release();
-      VLOG(2) << "Allocate " << size << " from underlying directly";
-      return ret;
-    } catch (BadAlloc &) {
-      size_t actual_free_size = ClearCache();
-      VLOG(1) << retry_time << "-th free " << actual_free_size
-              << " bytes caches";
-      if (actual_free_size == 0) throw;
-    }
-    ++retry_time;
-  }
-}
-
-void UseMultiBinBufferedAllocatorGFlags() {}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
+++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <map>
-#include <memory>
-#include <mutex>  // NOLINT
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/memory/allocation/allocator.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-std::vector<size_t> ReadBufferedAllocatorDivisionPlanFromFile(
-    const std::string& filepath);
-
-class MultiBinBufferedAllocator : public Allocator {
- public:
-  explicit MultiBinBufferedAllocator(
-      std::shared_ptr<Allocator> underlying_allocator);
-
-  MultiBinBufferedAllocator(std::shared_ptr<Allocator> underlying_allocator,
-                            const std::vector<size_t>& division_plan);
-
-  bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; }
-
-  size_t ClearCache();
-
-  const std::vector<size_t>& DivisionPlan() const { return division_plan_; }
-
- protected:
-  Allocation* AllocateImpl(size_t size, Attr attr) override;
-  void FreeImpl(Allocation* allocation) override;
-
- private:
-  std::shared_ptr<Allocator> underlying_allocator_;
-  std::vector<std::multimap<size_t, AllocationPtr>> allocations_;
-  std::vector<size_t> accumulated_cache_size_;
-  std::vector<size_t> division_plan_;
-  std::vector<std::unique_ptr<std::mutex>> mtx_;
-};
-
-extern void UseMultiBinBufferedAllocatorGFlags();
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
-#include <gtest/gtest.h>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
-#include "paddle/fluid/memory/allocation/cpu_allocator.h"
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-inline std::shared_ptr<MultiBinBufferedAllocator> GetBufferedAllocator(
-    Allocation *allocation, bool thread_safe) {
-  std::shared_ptr<Allocator> allocator(new BestFitAllocator(allocation));
-  if (thread_safe) {
-    allocator.reset(new LockedAllocator(std::move(allocator)));
-  }
-
-  return std::make_shared<MultiBinBufferedAllocator>(allocator);
-}
-
-TEST(buffered_allocator, thread_safety) {
-  std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
-  auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
-  {
-    auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
-    ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
-  }
-
-  {
-    auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
-    ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
-  }
-}
-
-class StubAllocation : public Allocation {
- public:
-  using Allocation::Allocation;
-};
-
-class StubAllocator : public Allocator {
- public:
-  void ResetCounter() {
-    construct_count_ = 0;
-    destruct_count_ = 0;
-  }
-
-  size_t GetAllocCount() const { return construct_count_; }
-
-  size_t GetFreeCount() const { return destruct_count_; }
-
- protected:
-  void FreeImpl(Allocation *allocation) override {
-    auto *alloc = dynamic_cast<StubAllocation *>(allocation);
-    PADDLE_ENFORCE_NOT_NULL(alloc);
-    if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
-    ++destruct_count_;
-    delete allocation;
-  }
-
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
-    ++construct_count_;
-    if (size == 0) {
-      return new StubAllocation(nullptr, 0, platform::CPUPlace());
-    } else {
-      return new StubAllocation(new uint8_t[size], size, platform::CPUPlace());
-    }
-  }
-
- private:
-  size_t construct_count_ = 0;
-  size_t destruct_count_ = 0;
-};
-
-constexpr size_t kZero = 0;
-constexpr size_t kOne = 1;
-constexpr size_t kTwo = 2;
-
-TEST(buffered_allocator, lazy_free) {
-  std::vector<int> original_alloc_size({1022, 1023, 1024, 1025, 1026});
-  for (auto alloc_size : original_alloc_size) {
-    auto stub_allocator = std::make_shared<StubAllocator>();
-    auto *underlying_allocator = stub_allocator.get();
-    auto allocator =
-        std::make_shared<MultiBinBufferedAllocator>(stub_allocator);
-
-    {
-      underlying_allocator->ResetCounter();
-      auto x = allocator->Allocate(alloc_size, allocator->kDefault);
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-      x = nullptr;
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    }
-
-    {
-      underlying_allocator->ResetCounter();
-      auto x = allocator->Allocate(900, allocator->kDefault);
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-      auto y = allocator->Allocate(2048, allocator->kDefault);
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-      x = nullptr;
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-      y = nullptr;
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    }
-
-    {
-      underlying_allocator->ResetCounter();
-      size_t cache_size = allocator->ClearCache();
-      ASSERT_EQ(cache_size, static_cast<size_t>(alloc_size + 2048));
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo);
-    }
-
-    {
-      underlying_allocator->ResetCounter();
-      auto p = allocator->Allocate(allocator->DivisionPlan().back(),
-                                   allocator->kDefault);
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    }
-
-    ASSERT_EQ(underlying_allocator->GetFreeCount(), kOne);
-
-    {
-      underlying_allocator->ResetCounter();
-      auto p = allocator->Allocate(allocator->DivisionPlan().back() - 1,
-                                   allocator->kDefault);
-      ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
-      ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    }
-
-    ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-  }
-}
-
-TEST(buffered_allocator, garbage_collection) {
-  std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
-  auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
-  auto allocator = GetBufferedAllocator(chunk.get(), false);
-  auto x1 = allocator->Allocate(1600, allocator->kDefault);
-  auto x2 = allocator->Allocate(400, allocator->kDefault);
-  x1 = nullptr;
-  x2 = nullptr;
-  auto x3 = allocator->Allocate(1600, allocator->kDefault);
-  ASSERT_NE(x3, nullptr);
-  ASSERT_NE(x3->ptr(), nullptr);
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
@@ -22,8 +22,6 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
 DECLARE_int64(gpu_allocator_retry_time);
 #endif

-DECLARE_bool(enable_buffered_allocator);
-
 DECLARE_string(allocator_strategy);

 namespace paddle {
@@ -38,7 +36,6 @@ TEST(allocator, allocator) {
 #endif

  FLAGS_allocator_strategy = "naive_best_fit";
-  FLAGS_enable_buffered_allocator = true;

  auto &instance = AllocatorFacade::Instance();
  platform::Place place;

--- a/paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
+++ b/paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-#include "gtest/gtest.h"
-#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
-
-DECLARE_string(buffered_allocator_division_plan_path);
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-TEST(buffered_allocator, division_plan) {
-  std::string path = "/tmp/buffered_allocator_divison_plan";
-  FLAGS_buffered_allocator_division_plan_path = path;
-
-  {
-    std::vector<std::string> plan(
-        {"100b", "300.7K", "500.3m", "1.02gB", "2g", "4G"});
-
-    std::ofstream os(path);
-    for (auto &p : plan) {
-      os << p << std::endl;
-    }
-    os.close();
-  }
-
-  auto plan = ReadBufferedAllocatorDivisionPlanFromFile(
-      FLAGS_buffered_allocator_division_plan_path);
-  ASSERT_EQ(plan.size(), 6UL);
-  ASSERT_EQ(plan[0], 100UL);
-  ASSERT_EQ(plan[1], static_cast<size_t>(300.7 * 1024));
-  ASSERT_EQ(plan[2], static_cast<size_t>(500.3 * 1024 * 1024));
-  ASSERT_EQ(plan[3], static_cast<size_t>(1.02 * 1024 * 1024 * 1024));
-  ASSERT_EQ(plan[4], static_cast<size_t>(2.0 * 1024 * 1024 * 1024));
-  ASSERT_EQ(plan[5], static_cast<size_t>(4.0 * 1024 * 1024 * 1024));
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -39,7 +39,6 @@ limitations under the License. */
 #include "paddle/fluid/imperative/profiler.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
 #include "paddle/fluid/memory/allocation/legacy_allocator.h"
-#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
 #include "paddle/fluid/operators/activation_op.h"
 #include "paddle/fluid/operators/py_func_op.h"
 #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
@@ -135,8 +134,6 @@ PYBIND11_MODULE(core, m) {

  paddle::memory::allocation::UseAllocatorStrategyGFlag();

-  paddle::memory::allocation::UseMultiBinBufferedAllocatorGFlags();
-
  m.doc() = "C++ core of PaddlePaddle";

  // using framework in this function. Since it is inside a function, it will

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -129,9 +129,7 @@ def __bootstrap__():
        'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
        'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
        'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion',
-        'allocator_strategy', 'enable_buffered_allocator',
-        'buffered_allocator_excess_times',
-        'buffered_allocator_division_plan_path', 'reader_queue_speed_test_mode',
+        'allocator_strategy', 'reader_queue_speed_test_mode',
        'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
        'inner_op_parallelism', 'enable_parallel_graph',
        'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize',