diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc
index 89ce628c5d51bda7e819a3a8e9ebdb3822a26f53..ca67765044c8819911756a11b37751e84e910e3c 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator.cc
@@ -22,41 +22,6 @@ namespace memory {
 namespace allocation {
 
 BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator>&& allocator) {
-  std::vector<size_t> division_plan(8 * sizeof(size_t));
-  for (size_t i = 0; i < 8 * sizeof(size_t); ++i) {
-    division_plan[i] = (static_cast<size_t>(1) << i);
-  }
-  InitAndEnforceCheck(std::move(allocator), division_plan);
-}
-
-BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator>&& allocator,
-                                     const std::vector<size_t>& division_plan) {
-  InitAndEnforceCheck(std::move(allocator), division_plan);
-}
-
-BufferedAllocator::~BufferedAllocator() { FlushImpl(); }
-
-void BufferedAllocator::FlushImpl() {
-  for (auto& v : allocations_) {
-    for (auto& pair : v) {
-      underlying_allocator_->FreeUniquePtr(std::move(pair.second));
-    }
-    v.clear();
-  }
-}
-
-void BufferedAllocator::Flush() {
-  if (mtx_) {
-    std::lock_guard<std::mutex> lock(*mtx_);
-    FlushImpl();
-  } else {
-    FlushImpl();
-  }
-}
-
-void BufferedAllocator::InitAndEnforceCheck(
-    std::unique_ptr<Allocator>&& allocator,
-    const std::vector<size_t>& division_plan) {
   underlying_allocator_.reset(
       dynamic_cast<UnmanagedAllocator*>(allocator.release()));
   PADDLE_ENFORCE_NOT_NULL(
@@ -65,141 +30,54 @@ void BufferedAllocator::InitAndEnforceCheck(
   if (underlying_allocator_->IsAllocThreadSafe()) {
     mtx_.reset(new std::mutex());
   }
-  constexpr size_t kMax = std::numeric_limits<size_t>::max();
-  if (division_plan.empty()) {
-    division_plan_.assign({0, kMax});
-  } else {
-    auto from = division_plan.front() == 0 ? division_plan.begin() + 1
-                                           : division_plan.begin();
-    auto to = division_plan.back() == kMax ? division_plan.end() - 1
-                                           : division_plan.end();
-    division_plan_.reserve(to - from + 2);
-    division_plan_.push_back(0);
-    division_plan_.insert(division_plan_.end(), from, to);
-    division_plan_.push_back(kMax);
-    for (size_t i = 1; i < division_plan_.size(); ++i) {
-      PADDLE_ENFORCE_LT(division_plan_[i - 1], division_plan_[i],
-                        "Division plan must be strictly sorted");
-    }
-  }
-  allocations_.resize(division_plan_.size() - 1);
-}
-
-void BufferedAllocator::InsertAllocationImpl(
-    std::unique_ptr<Allocation>&& allocation) {
-  auto size = allocation->size();
-  auto idx = GetListIndex(size);
-  allocations_[idx].emplace(size, std::move(allocation));
-}
-
-void BufferedAllocator::InsertAllocation(
-    std::unique_ptr<Allocation>&& allocation) {
-  if (mtx_) {
-    std::lock_guard<std::mutex> lock(*mtx_);
-    InsertAllocationImpl(std::move(allocation));
-  } else {
-    InsertAllocationImpl(std::move(allocation));
-  }
 }
 
-bool BufferedAllocator::Match(size_t actual_size, size_t requested_size) {
-  return (actual_size >> 1) < requested_size;
-}
-
-size_t BufferedAllocator::GetListIndex(size_t size) {
-  auto it =
-      std::upper_bound(division_plan_.begin(), division_plan_.end(), size);
-  return static_cast<size_t>(it - division_plan_.begin()) - 1;
-}
+BufferedAllocator::~BufferedAllocator() { FreeCache(-1UL); }
 
-std::unique_ptr<Allocation> BufferedAllocator::RemoveAllocationImpl(
-    size_t size) {
-  auto idx = GetListIndex(size);
-  auto& allocation_map = allocations_[idx];
-  auto it = allocation_map.lower_bound(size);
-  // Only remove allocation whose size is not more than twice of requested size
-  if (it != allocation_map.end()) {
-    if (Match(it->second->size(), size)) {
-      auto ret = std::move(it->second);
-      allocation_map.erase(it);
-      return ret;
-    } else {
-      return nullptr;
-    }
-  } else {
-    while (++idx < allocations_.size() && Match(division_plan_[idx], size)) {
-      auto& allocation_map = allocations_[idx];
-      if (!allocation_map.empty()) {
-        auto it = allocation_map.begin();
-        if (Match(it->second->size(), size)) {
-          auto ret = std::move(it->second);
-          allocation_map.erase(it);
-          return ret;
-        } else {
-          return nullptr;
-        }
-      }
+std::unique_ptr<Allocation> BufferedAllocator::Allocate(size_t size,
+                                                        Allocator::Attr attr) {
+  std::unique_ptr<Allocation> result;
+  {
+    platform::LockGuardPtr<std::mutex> guard(mtx_);
+    auto it = allocations_.lower_bound(size);
+    if (it != allocations_.end() && it->first < size * 2) {
+      result = std::move(it->second);
+      allocations_.erase(it);
     }
-    return nullptr;
   }
-}
 
-std::unique_ptr<Allocation> BufferedAllocator::RemoveAllocation(size_t size) {
-  if (mtx_) {
-    std::lock_guard<std::mutex> lock(*mtx_);
-    return RemoveAllocationImpl(size);
-  } else {
-    return RemoveAllocationImpl(size);
+  if (result) {
+    return result;
   }
-}
 
-std::unique_ptr<Allocation> BufferedAllocator::Allocate(size_t size,
-                                                        Allocator::Attr attr) {
-  auto ret = RemoveAllocation(size);
-  if (!ret) {
-    try {
-      return underlying_allocator_->Allocate(size, attr);
-    } catch (BadAlloc&) {
-      // if allocation failed, try to free some memorys from buffers
-      FreeAllocations(size);
-      return underlying_allocator_->Allocate(size, attr);
-    }
+  try {
+    return underlying_allocator_->Allocate(size, attr);
+  } catch (BadAlloc&) {
+    FreeCache(size);
+    return underlying_allocator_->Allocate(size, attr);
   }
-  return ret;
 }
 
-void BufferedAllocator::FreeAllocationsImpl(size_t size) {
+void BufferedAllocator::FreeCache(size_t size) {
+  platform::LockGuardPtr<std::mutex> guard(mtx_);
   if (UNLIKELY(size == 0)) return;
   size_t cur = 0;
-  for (auto& alloc_map : allocations_) {
-    // use reverse iterator to free large allocations first
-    while (!alloc_map.empty()) {
-      auto it = --(alloc_map.end());
-      cur += it->second->size();
-      underlying_allocator_->FreeUniquePtr(std::move(it->second));
-      alloc_map.erase(it);
-      if (cur >= size) return;
-    }
-  }
-}
-
-void BufferedAllocator::FreeAllocations(size_t size) {
-  if (mtx_) {
-    std::lock_guard<std::mutex> lock(*mtx_);
-    FreeAllocationsImpl(size);
-  } else {
-    FreeAllocationsImpl(size);
+  while (!allocations_.empty()) {  // free the largest
+    auto it = --allocations_.end();
+    cur += it->second->size();
+    underlying_allocator_->FreeUniquePtr(std::move(it->second));
+    allocations_.erase(it);
+    if (cur >= size) return;
   }
 }
 
 void BufferedAllocator::FreeUniquePtr(std::unique_ptr<Allocation> allocation) {
-  InsertAllocation(std::move(allocation));
+  platform::LockGuardPtr<std::mutex> guard(mtx_);
+  allocations_.emplace(allocation->size(), std::move(allocation));
 }
 
-bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
-
-const std::vector<size_t>& BufferedAllocator::GetDivisionPlan() const {
-  return division_plan_;
+bool BufferedAllocator::IsAllocThreadSafe() const {
+  return this->underlying_allocator_->IsAllocThreadSafe();
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h
index 0fe6e5a19a84995a9d143f4c3803ff54b77a1f92..1284661df1aba9ba756799372a6cb8cca1e2bc8a 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.h
+++ b/paddle/fluid/memory/allocation/buffered_allocator.h
@@ -19,6 +19,7 @@
 #include <memory>
 #include <vector>
 #include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/platform/lock_guard_ptr.h"
 
 namespace paddle {
 namespace memory {
@@ -32,9 +33,6 @@ class BufferedAllocator : public UnmanagedAllocator {
  public:
   explicit BufferedAllocator(std::unique_ptr<Allocator>&& allocator);
 
-  BufferedAllocator(std::unique_ptr<Allocator>&& allocator,
-                    const std::vector<size_t>& division_plan);
-
   ~BufferedAllocator();
 
   std::unique_ptr<Allocation> Allocate(
@@ -44,31 +42,14 @@ class BufferedAllocator : public UnmanagedAllocator {
 
   bool IsAllocThreadSafe() const override;
 
-  const std::vector<size_t>& GetDivisionPlan() const;
-
-  void Flush();
+  // only used in unittest
+  inline void ClearCache() { FreeCache(-1UL); }
 
  private:
-  void InitAndEnforceCheck(std::unique_ptr<Allocator>&& allocator,
-                           const std::vector<size_t>& division_plan);
-
-  void InsertAllocation(std::unique_ptr<Allocation>&& allocation);
-  void InsertAllocationImpl(std::unique_ptr<Allocation>&& allocation);
-
-  static bool Match(size_t actual_size, size_t requested_size);
-  std::unique_ptr<Allocation> RemoveAllocation(size_t size);
-  std::unique_ptr<Allocation> RemoveAllocationImpl(size_t size);
-
-  void FreeAllocations(size_t size);
-  void FreeAllocationsImpl(size_t size);
-
-  void FlushImpl();
-
-  size_t GetListIndex(size_t size);
+  void FreeCache(size_t size);
 
   std::unique_ptr<UnmanagedAllocator> underlying_allocator_;
-  std::vector<std::multimap<size_t, std::unique_ptr<Allocation>>> allocations_;
-  std::vector<size_t> division_plan_;
+  std::multimap<size_t, std::unique_ptr<Allocation>> allocations_;
   std::unique_ptr<std::mutex> mtx_;
 };
 
diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
index a9fb4f3926c7b9fab50abb9c6b9f25ddd44b093f..9445d305ce108ca4b5dc0239c0c1681917f51e72 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
@@ -124,7 +124,7 @@ TEST(buffered_allocator, lazy_free) {
 
   {
     underlying_allocator->ResetCounter();
-    allocator->Flush();
+    allocator->ClearCache();
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo);
   }
diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc
index 75686df4341aafc238b619c4d859ee001a6218d9..20f3bfbd3e8449d759360b1cc6c64ae3a141df2d 100644
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -30,9 +30,10 @@ DEFINE_bool(init_allocated_mem, false,
             "during unit testing.");
 DECLARE_double(fraction_of_gpu_memory_to_use);
 
-DEFINE_bool(use_legacy_allocator, true,
-            "Whether to use the legacy allocator. If the new allocators have"
-            "been well tested, we should remove these flag.");
+DEFINE_string(
+    allocator_strategy, "legacy",
+    "The allocation strategy. Legacy means the original allocator of Fluid."
+    "New means the experimental allocators of Fluid. in [legacy, new]");
 
 namespace paddle {
 namespace memory {
@@ -274,15 +275,11 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const {
 #endif
 }
 
-size_t memory_usage(const platform::Place& p) {
-  return boost::apply_visitor(Usage(), p);
-}
-
 class LegacyAllocation : public Allocation {
  public:
   using Allocation::Allocation;
 
-  ~LegacyAllocation() {
+  ~LegacyAllocation() final {
     boost::apply_visitor(FreeVisitor(this->ptr()), this->place());
   }
 };
@@ -291,7 +288,7 @@ class LegacyAllocation : public Allocation {
 
 std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
                                         size_t size, Allocator::Attr attr) {
-  if (FLAGS_use_legacy_allocator) {
+  if (FLAGS_allocator_strategy == "legacy") {
     void* p = boost::apply_visitor(legacy::AllocVisitor(size), place);
     return std::shared_ptr<Allocation>(
         new legacy::LegacyAllocation(p, size, place));
@@ -303,7 +300,7 @@ std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
 
 std::unique_ptr<Allocation> Alloc(const platform::Place& place, size_t size,
                                   Allocator::Attr attr) {
-  if (FLAGS_use_legacy_allocator) {
+  if (FLAGS_allocator_strategy == "legacy") {
     void* p = boost::apply_visitor(legacy::AllocVisitor(size), place);
     return std::unique_ptr<Allocation>(
         new legacy::LegacyAllocation(p, size, place));
diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
index a08a9dbd0da46e73082cdd24c019e8d210d8bcc4..d7a048257f92c1c58c34decf1a93ff95f5f736c7 100644
--- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/operators/reader/reader_op_registry.h"
+#include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/recordio/scanner.h"
 
 namespace paddle {
@@ -33,11 +34,7 @@ class RecordIOFileReader : public framework::FileReader {
 
  protected:
   void ReadNextImpl(std::vector<framework::LoDTensor>* out) override {
-    std::unique_ptr<std::lock_guard<std::mutex>> guard;
-    if (ThreadSafe) {
-      guard.reset(new std::lock_guard<std::mutex>(*mutex_));
-    }
-
+    platform::LockGuardPtr<std::mutex> guard(mutex_);
     bool ok = framework::ReadFromRecordIO(&scanner_, dev_ctx_, out);
     if (!ok) {
       out->clear();
diff --git a/paddle/fluid/platform/lock_guard_ptr.h b/paddle/fluid/platform/lock_guard_ptr.h
new file mode 100644
index 0000000000000000000000000000000000000000..220c538bc78101d397f08ce5617886b381169542
--- /dev/null
+++ b/paddle/fluid/platform/lock_guard_ptr.h
@@ -0,0 +1,55 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <stdint.h>
+#include <memory>
+#include <mutex>  // NOLINT
+namespace paddle {
+namespace platform {
+
+/**
+ * LockGuard for std::unique_ptr<LockType>. It will do nothing when guarded ptr
+ * is nullptr.
+ *
+ * The advantage of using `LockGuardPtr` instead of
+ * std::unique<std::lock_guard<lock_type>> is this type is totally a stack
+ * variable. There is no heap allocation at all.
+ */
+template <typename LockType>
+class LockGuardPtr {
+  using LockGuardType = std::lock_guard<LockType>;
+
+ public:
+  class LockGuardDeleter {
+   public:
+    void operator()(LockGuardType* guard) { guard->~LockGuardType(); }
+  };
+
+  explicit LockGuardPtr(std::unique_ptr<LockType>& lock_ptr)  // NOLINT
+      : guard_ptr_(lock_ptr ? new (guard_buffer_) LockGuardType(*lock_ptr)
+                            : nullptr) {}
+
+  LockGuardPtr(const LockGuardPtr&) = delete;
+  LockGuardPtr& operator=(const LockGuardPtr&) = delete;
+  LockGuardPtr(LockGuardPtr&&) = delete;
+  LockGuardPtr& operator=(LockGuardPtr&&) = delete;
+
+ private:
+  uint8_t guard_buffer_[sizeof(LockGuardType)];
+  std::unique_ptr<LockGuardType, LockGuardDeleter> guard_ptr_;
+};
+
+}  // namespace platform
+}  // namespace paddle
diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc
index b18bd70005c07596c928377c6606d02cc962a0d7..32d433b698566b7cfafe83937afc8118a3c0c527 100644
--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -27,10 +27,12 @@ int main(int argc, char** argv) {
     new_argv.push_back(argv[i]);
   }
 #ifdef PADDLE_WITH_CUDA
-  new_argv.push_back(strdup("--tryfromenv=fraction_of_gpu_memory_to_use"));
+  new_argv.push_back(
+      strdup("--tryfromenv=fraction_of_gpu_memory_to_use,allocator_strategy"));
 #else
-  new_argv.push_back(strdup(
-      "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb"));
+  new_argv.push_back(
+      strdup("--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_"
+             "mb,allocator_strategy"));
   new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb"));
 #endif
   int new_argc = static_cast<int>(new_argv.size());
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index ce792664920d44c3d9510e625d57f80b6559fc7f..a57c3287afa53a8fa8e671919ea54ff2447093fc 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -114,7 +114,7 @@ def __bootstrap__():
         'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
         'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
         "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb',
-        'use_legacy_allocator', 'reader_queue_speed_test_mode'
+        'allocator_strategy', 'reader_queue_speed_test_mode'
     ]
     if core.is_compiled_with_dist():
         read_env_flags.append('rpc_deadline')