diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h
index 800ed3c9de45aeaa7bee5be3478bbc40001cf921..6940250c3f9663bbb734d5a6eb78135aecbc3a3b 100644
--- a/paddle/fluid/framework/mixed_vector.h
+++ b/paddle/fluid/framework/mixed_vector.h
@@ -284,7 +284,7 @@ class Vector {
     bool IsInCPU() const { return flag_ & kDataInCPU; }
 
     mutable std::vector<T> cpu_;
-    mutable std::unique_ptr<memory::Allocation> gpu_;
+    mutable memory::AllocationPtr gpu_;
     mutable int flag_;
 
     mutable std::mutex mtx_;
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
index 9af903a128dba301c47bbed47a30b4cc69012016..4122b3d709e095c08b4fb2667103649a03eee64f 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
@@ -32,13 +32,10 @@ class StubAllocation : public Allocation {
 TEST(BestFitAllocator, test_allocation) {
   StubAllocation stub(4UL * 1024 * 1024 * 1024);
   BestFitAllocator allocator(&stub);
-  {
-    auto allocation = allocator.Allocate(64);
-    allocator.FreeUniquePtr(std::move(allocation));
-  }
+  { auto allocation = allocator.Allocate(64, allocator.kDefault); }
 
   {
-    auto allocation = allocator.Allocate(80);
+    auto allocation = allocator.Allocate(80, allocator.kDefault);
 
     {
       auto best_fit_allocation =
@@ -50,19 +47,18 @@ TEST(BestFitAllocator, test_allocation) {
       ASSERT_EQ(allocation->ptr(), nullptr);
     }
 
-    auto allocation2 = allocator.Allocate(60);
-    auto allocation3 = allocator.Allocate(90);
-    allocator.FreeUniquePtr(std::move(allocation2));
-    allocation2 = allocator.Allocate(30);
+    auto allocation2 = allocator.Allocate(60, allocator.kDefault);
+    auto allocation3 = allocator.Allocate(90, allocator.kDefault);
+    allocation2.reset();
+    allocation2 = allocator.Allocate(30, allocator.kDefault);
 
     {
       auto best_fit_allocation =
           dynamic_cast<BestFitAllocation*>(allocation2.get());
       ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
     }
-    allocator.FreeUniquePtr(std::move(allocation2));
-
-    allocation2 = allocator.Allocate(60);
+    allocation2.reset();
+    allocation2 = allocator.Allocate(60, allocator.kDefault);
 
     {
       auto best_fit_allocation =
@@ -70,23 +66,23 @@ TEST(BestFitAllocator, test_allocation) {
       ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
     }
 
-    allocator.FreeUniquePtr(std::move(allocation));
-    allocator.FreeUniquePtr(std::move(allocation2));
+    allocation.reset();
+    allocation2.reset();
 
-    allocation = allocator.Allocate(80 + 60);
+    allocation = allocator.Allocate(80 + 60, allocator.kDefault);
     {
       auto best_fit_allocation =
           dynamic_cast<BestFitAllocation*>(allocation.get());
       ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
     }
 
-    allocator.FreeUniquePtr(std::move(allocation));
+    allocation.reset();
 
-    allocation = allocator.Allocate(80);
-    allocation2 = allocator.Allocate(60);
-    allocator.FreeUniquePtr(std::move(allocation));
-    allocator.FreeUniquePtr(std::move(allocation3));
-    allocator.FreeUniquePtr(std::move(allocation2));
+    allocation = allocator.Allocate(80, allocator.kDefault);
+    allocation2 = allocator.Allocate(60, allocator.kDefault);
+    allocation = nullptr;
+    allocation2 = nullptr;
+    allocation3 = nullptr;
 
     ASSERT_EQ(allocator.NumFreeChunks(), 1U);
   }
@@ -94,7 +90,8 @@ TEST(BestFitAllocator, test_allocation) {
 
 TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
   CPUAllocator allocator;
-  auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
+  auto global_allocation =
+      allocator.Allocate(256UL * 1024 * 1024, allocator.kDefault);
 
   std::unique_ptr<Allocator> best_fit_allocator(
       new BestFitAllocator(global_allocation.get()));
@@ -109,8 +106,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
     for (size_t i = 0; i < 128; ++i) {
       size_t allocate_size = dist(engine);
 
-      auto allocation =
-          locked_allocator.Allocate(sizeof(size_t) * allocate_size);
+      auto allocation = locked_allocator.Allocate(
+          sizeof(size_t) * allocate_size, locked_allocator.kDefault);
 
       size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 
@@ -122,8 +119,6 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
       for (size_t j = 0; j < allocate_size; ++j) {
         ASSERT_EQ(data[j], j);
       }
-
-      locked_allocator.FreeUniquePtr(std::move(allocation));
     }
   };
   {
@@ -135,8 +130,6 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
       th.join();
     }
   }
-
-  allocator.FreeUniquePtr(std::move(global_allocation));
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
index a3dcb8b2aef58bbf0f74fd9011ee1efd15c4b638..eb200ffdcd67732b92497115986184158bebce8f 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
@@ -35,7 +35,8 @@ struct ForEachFill {
 TEST(BestFitAllocator, concurrent_cuda) {
   CUDAAllocator allocator(platform::CUDAPlace(0));
   // 256 MB
-  auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
+  auto cuda_allocation =
+      allocator.Allocate(256U * 1024 * 1024, allocator.kDefault);
   LockedAllocator concurrent_allocator(
       std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
 
@@ -49,8 +50,8 @@ TEST(BestFitAllocator, concurrent_cuda) {
     for (size_t i = 0; i < 128; ++i) {
       size_t allocate_size = dist(engine);
 
-      auto allocation =
-          concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
+      auto allocation = concurrent_allocator.Allocate(
+          sizeof(size_t) * allocate_size, concurrent_allocator.kDefault);
 
       size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 
@@ -66,8 +67,7 @@ TEST(BestFitAllocator, concurrent_cuda) {
       for (size_t j = 0; j < allocate_size; ++j) {
         ASSERT_EQ(buf[j], j);
       }
-
-      concurrent_allocator.FreeUniquePtr(std::move(allocation));
+      allocation = nullptr;
     }
   };
 
@@ -80,7 +80,7 @@ TEST(BestFitAllocator, concurrent_cuda) {
       th.join();
     }
   }
-  allocator.FreeUniquePtr(std::move(cuda_allocation));
+  //  allocator.FreeUniquePtr(std::move(cuda_allocation));
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
index 9445d305ce108ca4b5dc0239c0c1681917f51e72..f1a57ea2e9884571ff902d85bbddf13b59a6cb72 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
@@ -35,7 +35,7 @@ inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
 
 TEST(buffered_allocator, thread_safety) {
   std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
-  auto chunk = allocator->Allocate(1 << 20);
+  auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
   {
     auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
     ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
@@ -45,8 +45,6 @@ TEST(buffered_allocator, thread_safety) {
     auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
     ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
   }
-
-  allocator->FreeUniquePtr(std::move(chunk));
 }
 
 class StubAllocation : public Allocation {
@@ -54,27 +52,8 @@ class StubAllocation : public Allocation {
   using Allocation::Allocation;
 };
 
-class StubAllocator : public UnmanagedAllocator {
+class StubAllocator : public MannualFreeAllocator {
  public:
-  std::unique_ptr<Allocation> Allocate(size_t size,
-                                       Allocator::Attr attr) override {
-    ++construct_count_;
-    if (size == 0) {
-      return std::unique_ptr<Allocation>(
-          new StubAllocation(nullptr, 0, platform::CPUPlace()));
-    } else {
-      return std::unique_ptr<Allocation>(
-          new StubAllocation(new uint8_t[size], size, platform::CPUPlace()));
-    }
-  }
-
-  void FreeUniquePtr(std::unique_ptr<Allocation> allocation) {
-    StubAllocation *alloc = dynamic_cast<StubAllocation *>(allocation.get());
-    PADDLE_ENFORCE_NOT_NULL(alloc);
-    if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
-    ++destruct_count_;
-  }
-
   void ResetCounter() {
     construct_count_ = 0;
     destruct_count_ = 0;
@@ -84,6 +63,23 @@ class StubAllocator : public UnmanagedAllocator {
 
   size_t GetFreeCount() const { return destruct_count_; }
 
+ protected:
+  void Free(Allocation *allocation) override {
+    auto *alloc = dynamic_cast<StubAllocation *>(allocation);
+    PADDLE_ENFORCE_NOT_NULL(alloc);
+    if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
+    ++destruct_count_;
+    delete allocation;
+  }
+  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
+    ++construct_count_;
+    if (size == 0) {
+      return new StubAllocation(nullptr, 0, platform::CPUPlace());
+    } else {
+      return new StubAllocation(new uint8_t[size], size, platform::CPUPlace());
+    }
+  }
+
  private:
   size_t construct_count_ = 0;
   size_t destruct_count_ = 0;
@@ -101,24 +97,24 @@ TEST(buffered_allocator, lazy_free) {
 
   {
     underlying_allocator->ResetCounter();
-    auto x = allocator->Allocate(1025);
+    auto x = allocator->Allocate(1025, allocator->kDefault);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    allocator->FreeUniquePtr(std::move(x));
+    x = nullptr;
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
   }
 
   {
     underlying_allocator->ResetCounter();
-    auto x = allocator->Allocate(900);
+    auto x = allocator->Allocate(900, allocator->kDefault);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    auto y = allocator->Allocate(2048);
+    auto y = allocator->Allocate(2048, allocator->kDefault);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    allocator->FreeUniquePtr(std::move(x));
+    x = nullptr;
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    allocator->FreeUniquePtr(std::move(y));
+    y = nullptr;
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
   }
 
@@ -132,13 +128,13 @@ TEST(buffered_allocator, lazy_free) {
 
 TEST(buffered_allocator, garbage_collection) {
   std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
-  auto chunk = cpu_allocator->Allocate(2048);
+  auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
   auto allocator = GetBufferedAllocator(chunk.get(), false);
-  auto x1 = allocator->Allocate(1600);
-  auto x2 = allocator->Allocate(400);
-  allocator->FreeUniquePtr(std::move(x1));
-  allocator->FreeUniquePtr(std::move(x2));
-  auto x3 = allocator->Allocate(1600);
+  auto x1 = allocator->Allocate(1600, allocator->kDefault);
+  auto x2 = allocator->Allocate(400, allocator->kDefault);
+  x1 = nullptr;
+  x2 = nullptr;
+  auto x3 = allocator->Allocate(1600, allocator->kDefault);
   ASSERT_NE(x3, nullptr);
   ASSERT_NE(x3->ptr(), nullptr);
 }
diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc
index c55742c7befd5738973c2f07a60d2f9d3b8afa52..a0ce2875cb8337a59ec03730e5cf66d2fc622001 100644
--- a/paddle/fluid/memory/allocation/retry_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc
@@ -32,7 +32,7 @@ TEST(RetryAllocator, RetryAllocator) {
   CPUAllocator cpu_allocator;
 
   size_t size = (1 << 20);
-  auto cpu_allocation = cpu_allocator.Allocate(size);
+  auto cpu_allocation = cpu_allocator.Allocate(size, cpu_allocator.kDefault);
 
   std::unique_ptr<BestFitAllocator> best_fit_allocator(
       new BestFitAllocator(cpu_allocation.get()));
@@ -44,15 +44,15 @@ TEST(RetryAllocator, RetryAllocator) {
   size_t extra_time = 2;
 
   // Reserve to perform more tests in the future
-  std::vector<std::shared_ptr<ManagedAllocator>> allocators;
+  std::vector<std::shared_ptr<Allocator>> allocators;
   {
     std::unique_ptr<BestFitAllocator> best_fit_allocator(
         new BestFitAllocator(cpu_allocation.get()));
     std::unique_ptr<LockedAllocator> locked_allocator(
         new LockedAllocator(std::move(best_fit_allocator)));
-    allocators.push_back(
-        RetryAllocator::Create(std::move(locked_allocator),
-                               (thread_num - 1) * (sleep_time + extra_time)));
+    allocators.push_back(std::make_shared<RetryAllocator>(
+        std::move(locked_allocator),
+        (thread_num - 1) * (sleep_time + extra_time)));
   }
 
   for (auto &allocator : allocators) {
@@ -91,8 +91,6 @@ TEST(RetryAllocator, RetryAllocator) {
                                     [val](void *p) { return p == val; });
     ASSERT_TRUE(is_all_equal);
   }
-
-  cpu_allocator.FreeUniquePtr(std::move(cpu_allocation));
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 0e7799833582962395fd25db32a5307458886063..9a9018cdea6a9dcdebe20fd0faef8ff3d4e0e2a1 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -110,7 +110,7 @@ class CudnnHolder {
   std::mutex& Mutex() { return mtx_; }
 
   cudnnHandle_t cudnn_handle_;
-  std::unique_ptr<memory::Allocation> workspace_;
+  memory::AllocationPtr workspace_;
 
   const cudaStream_t* stream_;  // not owned;
   const CUDAPlace place_;