diff --git a/paddle/fluid/framework/ir/lock_free_optimize_pass.h b/paddle/fluid/framework/ir/lock_free_optimize_pass.h
index d1718857a5d84304c3c02e74c7ca79c24f367f8c..9c923480bac26fb8c68768c8365b0f899959ec64 100644
--- a/paddle/fluid/framework/ir/lock_free_optimize_pass.h
+++ b/paddle/fluid/framework/ir/lock_free_optimize_pass.h
@@ -12,8 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
-#define PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
+#pragma once
 
 #include <string>
 #include <vector>
@@ -126,5 +125,3 @@ class LockFreeOptimizePass : public Pass {
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
-
-#endif  // PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
index a6546cb452a1ae0939ca7a189b8a9ca45c876fd5..427d7bc9aeb15f4adb4a486c511630836bf2bb73 100644
--- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
@@ -81,8 +81,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32,
-                       ::paddle::memory::Allocator::kDefault, 1);
+  tensor->mutable_data(place, proto::VarType::FP32, 1);
 }
 
 void MainTest(bool convWithExistingBias) {
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
index c46ffad036dab8a32cc997552db1d9883c5a6d6e..0a68944186773f84f734d81cf29dc5214d16e173 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -110,8 +110,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32,
-                       ::paddle::memory::Allocator::kDefault, 1);
+  tensor->mutable_data(place, proto::VarType::FP32, 1);
 }
 
 void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
index 94cb42633f4bfbf33576e16cc685ed67cf0d2f1a..057a790ccb3147c6e366322cdb62d4665c946b33 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
@@ -102,8 +102,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32,
-                       ::paddle::memory::Allocator::kDefault, 1);
+  tensor->mutable_data(place, proto::VarType::FP32, 1);
 }
 
 void MainTest(const ProgramDesc& prog, int removed_nodes_num) {
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
index f216f3949425b4d2eef29692ee4207e66247954b..8e158e93063cb7620440b0af8433c0baa02eab22 100644
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -377,12 +377,12 @@ class ExecutionContext {
   }
 
   template <typename T>
-  T& GetKernelConfig(int idx) const {
+  T& GetKernelConfig(size_t idx) const {
     PADDLE_ENFORCE(
         kernel_configs_ && kernel_configs_->size() > static_cast<size_t>(idx),
-        "%s selected kernel doesn't have kernel config %lu <= %d",
+        "%s selected kernel doesn't have kernel config %lu <= %lu",
         op_.Type().c_str(), kernel_configs_->size(), idx);
-    return *boost::get<std::shared_ptr<T>>(kernel_configs_->at(idx));
+    return *boost::get<std::shared_ptr<T>>((*kernel_configs_)[idx]);
   }
 
  private:
diff --git a/paddle/fluid/framework/operator_kernel_configs.h b/paddle/fluid/framework/operator_kernel_configs.h
index c520c222350ceeef246dae756a7157872ae087fa..a350b8957d91ea21375e1942af2968277b10833e 100644
--- a/paddle/fluid/framework/operator_kernel_configs.h
+++ b/paddle/fluid/framework/operator_kernel_configs.h
@@ -103,7 +103,7 @@ TAlgorithm AlgorithmsCache<TAlgorithm>::GetAlgorithm(
     ++search_times_;
     return algo;
   }
-  TAlgorithm algo;
+  TAlgorithm algo{};
   int64_t min = static_cast<uint64_t>(INT_MAX);
   for (const auto& m : hash_) {
     if (m.first < min) {
diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc
index ea7f8c496a9fc3ff78fce06b69fb21e44e5be9ee..565b7d9d16cb4d048c57b841857390a3dea3ed7a 100644
--- a/paddle/fluid/framework/tensor.cc
+++ b/paddle/fluid/framework/tensor.cc
@@ -35,7 +35,6 @@ size_t Tensor::memory_size() const {
 }
 
 void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type,
-                           memory::Allocator::Attr attr,
                            size_t requested_size) {
   type_ = type;
   PADDLE_ENFORCE_GE(numel(), 0,
@@ -50,18 +49,17 @@ void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type,
   /* some versions of boost::variant don't have operator!= */
   if (holder_ == nullptr || !(holder_->place() == place) ||
       holder_->size() < size + offset_) {
-    holder_ = memory::AllocShared(place, size, attr);
+    holder_ = memory::AllocShared(place, size);
     offset_ = 0;
   }
   return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
                                  offset_);
 }
 
-void* Tensor::mutable_data(platform::Place place, memory::Allocator::Attr attr,
-                           size_t requested_size) {
+void* Tensor::mutable_data(platform::Place place, size_t requested_size) {
   PADDLE_ENFORCE(this->holder_ != nullptr,
                  "Cannot invoke mutable data if current hold nothing.");
-  return mutable_data(place, type_, attr, requested_size);
+  return mutable_data(place, type_, requested_size);
 }
 
 Tensor& Tensor::ShareDataWith(const Tensor& src) {
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index 0fa76f943ec1417dc712771565f7ff2b263e6365..1ab75e3325740a30c9233b4cef660a869368112a 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -87,17 +87,12 @@ class Tensor {
    * @note    If not exist, then allocation.
    */
   template <typename T>
-  T* mutable_data(platform::Place place,
-                  memory::Allocator::Attr attr = memory::Allocator::kDefault,
-                  size_t requested_size = 0);
+  T* mutable_data(platform::Place place, size_t requested_size = 0);
 
   void* mutable_data(platform::Place place, proto::VarType::Type type,
-                     memory::Allocator::Attr attr = memory::Allocator::kDefault,
                      size_t requested_size = 0);
 
-  void* mutable_data(platform::Place place,
-                     memory::Allocator::Attr attr = memory::Allocator::kDefault,
-                     size_t requested_size = 0);
+  void* mutable_data(platform::Place place, size_t requested_size = 0);
 
   /**
    * @brief     Return a pointer to mutable memory block.
@@ -109,9 +104,7 @@ class Tensor {
    * @note      If not exist, then allocation.
    */
   template <typename T>
-  T* mutable_data(DDim dims, platform::Place place,
-                  memory::Allocator::Attr attr = memory::Allocator::kDefault,
-                  size_t requested_size = 0);
+  T* mutable_data(DDim dims, platform::Place place, size_t requested_size = 0);
 
   /*! Return the dimensions of the memory block. */
   const DDim& dims() const;
diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h
index 8dabecac8ab42ee0fb6b57048f3a1c8223d0b0b1..a4b1457ad567cf5f1f2788a5c24889c3066c84b0 100644
--- a/paddle/fluid/framework/tensor_impl.h
+++ b/paddle/fluid/framework/tensor_impl.h
@@ -49,20 +49,17 @@ inline T* Tensor::data() {
 
 template <typename T>
 inline T* Tensor::mutable_data(DDim dims, platform::Place place,
-                               memory::Allocator::Attr attr,
                                size_t requested_size) {
   static_assert(std::is_pod<T>::value, "T must be POD");
   Resize(dims);
-  return mutable_data<T>(place, attr, requested_size);
+  return mutable_data<T>(place, requested_size);
 }
 
 template <typename T>
-inline T* Tensor::mutable_data(platform::Place place,
-                               memory::Allocator::Attr attr,
-                               size_t requested_size) {
+inline T* Tensor::mutable_data(platform::Place place, size_t requested_size) {
   static_assert(std::is_pod<T>::value, "T must be POD");
   return reinterpret_cast<T*>(
-      mutable_data(place, DataTypeTrait<T>::DataType, attr, requested_size));
+      mutable_data(place, DataTypeTrait<T>::DataType, requested_size));
 }
 
 inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h
index b536d4276e3b6236d0748eee588d345dd15c6954..7cedad3d66c8e68d7fe319fd6d27f074b924cbd6 100644
--- a/paddle/fluid/memory/allocation/aligned_allocator.h
+++ b/paddle/fluid/memory/allocation/aligned_allocator.h
@@ -89,9 +89,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
   using ThinAlignedAllocator::ThinAlignedAllocator;
 
  protected:
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override {
-    auto raw_allocation =
-        underlying_allocator_->Allocate(size + kAlignment, attr);
+  Allocation* AllocateImpl(size_t size) override {
+    auto raw_allocation = underlying_allocator_->Allocate(size + kAlignment);
     return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
   }
 
diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc
index dc0f34fecd5cecf11325116c51728cb374ee9fc3..4998f3dbb9613abbf5ca67a3d43863d01483b79f 100644
--- a/paddle/fluid/memory/allocation/allocator.cc
+++ b/paddle/fluid/memory/allocation/allocator.cc
@@ -14,8 +14,6 @@
 
 #include "paddle/fluid/memory/allocation/allocator.h"
 
-#include <functional>
-
 namespace paddle {
 namespace memory {
 namespace allocation {
diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h
index 1fcd9361805aa8a494684fdafc19013338092791..d31f37268d96af529801fe0fa1b28ec6e80aed76 100644
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -146,42 +146,8 @@ class Allocation {
 };
 
 // Base interface class of memory Allocator.
-// To allocate a memory, allocator needs two parameters:
-//    1. size of bytes.
-//    2. Attribute of memory.
-// NOTE: the attribute of memory might be ignored if the allocator does not
-// care it.
 class Allocator {
  public:
-  enum Attr {
-    kDefault = 0,  // Default attribute. Uses the fast or stablest allocation
-                   // algorithm.
-
-    kFixedHuge = 1,  // The allocation may not be freed until the program
-                     // ends. e.g., `Parameters` and `Momentum`.
-
-    kFluxHuge = 2,  // The allocation may create and freed frequently and the
-                    // allocation is considerable huge. Like `activations`
-                    // and gradients.
-
-    kScratchpad =
-        3,  // The `Scratchpad` memory is allocated and freed very soon,
-            // usually within an operator or aux memory.
-            // Like CUDNN workspace, AUX memory in batch norm, etc.
-            //
-            // https://en.wikipedia.org/wiki/Scratchpad_memory
-
-    kCrossDevice =
-        4,  // The memory used cross-device memory copy/communication.
-            // For example:
-            // 1. it can use an `pinned` memory for CPU-GPU
-            //    communication.
-            // 2. it can use an `registered` memory for RDMA
-            //    communication.
-
-    NumOfAttrs = 5  // The number of all attributes. It is used internally.
-  };
-
   virtual ~Allocator() {}
 
   class AllocationDeleter {
@@ -195,8 +161,8 @@ class Allocator {
   using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
 
   // Allocate an allocation.
-  inline AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault) {
-    auto ptr = AllocateImpl(size, attr);
+  inline AllocationPtr Allocate(size_t size) {
+    auto ptr = AllocateImpl(size);
     ptr->RegisterDecoratedAllocator(this);
     return AllocationPtr(ptr);
   }
@@ -211,7 +177,7 @@ class Allocator {
   virtual bool IsAllocThreadSafe() const;
 
  protected:
-  virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
+  virtual Allocation* AllocateImpl(size_t size) = 0;
   virtual void FreeImpl(Allocation* allocation);
 };
 
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 1ff719c9e726ac5bc3f39cd552fe5c53b22147a0..440b2475f1631ce5b0a1018ccd13849cc2568cd5 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -67,8 +67,8 @@ class CPUManagedAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override {
-    return normal_allocator_->Allocate(size, attr).release();
+  Allocation* AllocateImpl(size_t size) override {
+    return normal_allocator_->Allocate(size).release();
   }
 
  private:
@@ -101,11 +101,10 @@ class ChunkedAllocator : public Allocator {
 
     auto* cond_allocator = new ConditionalAllocator();
     cond_allocator
-        ->AddAllocator(
-            [this](size_t size, Attr attr) { return size < max_chunk_size_; },
-            default_allocator_)
+        ->AddAllocator([this](size_t size) { return size < max_chunk_size_; },
+                       default_allocator_)
         .AddAllocator(
-            [](size_t size, Attr attr) {
+            [](size_t size) {
               return true;  // default case
             },
             raw_allocator_);
@@ -133,8 +132,8 @@ class ChunkedAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override {
-    return default_allocator_->Allocate(size, attr).release();
+  Allocation* AllocateImpl(size_t size) override {
+    return default_allocator_->Allocate(size).release();
   }
 
  protected:
@@ -263,7 +262,7 @@ class AllocatorFacadePrivate {
     explicit ZeroSizeAllocator(platform::Place place) : place_(place) {}
 
    protected:
-    Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override {
+    Allocation* AllocateImpl(size_t size) override {
       return new Allocation(nullptr, 0, place_);
     }
 
@@ -304,13 +303,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
 }
 
 std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
-    const platform::Place& place, size_t size, Allocator::Attr attr) {
-  return std::shared_ptr<Allocation>(Alloc(place, size, attr));
+    const platform::Place& place, size_t size) {
+  return std::shared_ptr<Allocation>(Alloc(place, size));
 }
 
-AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,
-                                     Allocator::Attr attr) {
-  return m_->GetAllocator(place, size)->Allocate(size, attr);
+AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
+                                     size_t size) {
+  return m_->GetAllocator(place, size)->Allocate(size);
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h
index 16da30bec0d9f524bd076fe76d15c2fcfa7edd3a..64b6fe25c352e82d6320e26d95efb61f3cb4a5b1 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -38,13 +38,11 @@ class AllocatorFacade {
   static AllocatorFacade& Instance();
 
   // Allocate a shared allocation.
-  std::shared_ptr<Allocation> AllocShared(
-      const platform::Place& place, size_t size,
-      Allocator::Attr attr = Allocator::kDefault);
+  std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
+                                          size_t size);
 
   // Allocate a unique allocation.
-  AllocationPtr Alloc(const platform::Place& place, size_t size,
-                      Allocator::Attr attr = Allocator::kDefault);
+  AllocationPtr Alloc(const platform::Place& place, size_t size);
 
   // TODO(yy): Allocate a Copy-On-Write allocation?
  private:
diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.cc b/paddle/fluid/memory/allocation/auto_increment_allocator.cc
index c4785d2078601d7f9c5eeb7b902c7d1020340214..bafa82f18c7ee1f92ac4f0ad6634a06620f46c6a 100644
--- a/paddle/fluid/memory/allocation/auto_increment_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_increment_allocator.cc
@@ -34,14 +34,13 @@ std::shared_ptr<Allocator> AutoIncrementAllocator::CreateNewAllocator() {
       "bug.");
   return underlying_allocators_[old_size];
 }
-Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
-                                                 Allocator::Attr attr) {
+Allocation *AutoIncrementAllocator::AllocateImpl(size_t size) {
   auto cur = prev_success_allocator_.load();
   size_t retry_count = allocator_num_.load();
   size_t allocator_num = retry_count;
   while (retry_count-- > 0) {  // until there retry count is zero
     try {
-      auto res = underlying_allocators_[cur]->Allocate(size, attr);
+      auto res = underlying_allocators_[cur]->Allocate(size);
       prev_success_allocator_ = cur;
       return res.release();
     } catch (BadAlloc &) {
@@ -61,7 +60,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
   // the newly created allocator by the first allocation request.
   for (cur = allocator_num; cur < allocator_num_; ++cur) {
     try {
-      auto ret = underlying_allocators_[cur]->Allocate(size, attr);
+      auto ret = underlying_allocators_[cur]->Allocate(size);
       prev_success_allocator_ = cur;
       return ret.release();
     } catch (BadAlloc &) {
@@ -70,7 +69,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
     }
   }
   // No suitable allocator
-  return CreateNewAllocator()->Allocate(size, attr).release();
+  return CreateNewAllocator()->Allocate(size).release();
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.h b/paddle/fluid/memory/allocation/auto_increment_allocator.h
index 382588f17a9748b1b0a356c0469c683f6c904778..068cda473d6e40bd0ac64a0b9e475336882d5edd 100644
--- a/paddle/fluid/memory/allocation/auto_increment_allocator.h
+++ b/paddle/fluid/memory/allocation/auto_increment_allocator.h
@@ -19,6 +19,7 @@
 #include <memory>
 #include <mutex>   // NOLINT
 #include <thread>  // NOLINT
+#include <utility>
 #include <vector>
 #include "paddle/fluid/memory/allocation/allocator.h"
 
@@ -60,7 +61,7 @@ class AutoIncrementAllocator : public Allocator {
   std::shared_ptr<Allocator> CreateNewAllocator();
 
  protected:
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 
  private:
   AllocatorCreator creator_;
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc
index d87dd9a4b6df288065389a335a9ddb4047dd096a..72ee4e5411c21e172166e71fb8baa961ae2a63af 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc
@@ -140,7 +140,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) {
   InsertFreeNode(chunk_it);
   delete allocation;
 }
-Allocation* BestFitAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation* BestFitAllocator::AllocateImpl(size_t size) {
   auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
   MapIt map_it;
   for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h
index c137438c0c35a575d366a1dfdf950262f711defa..64a552e4fd2af1f661e3174e5041ffc71f74fa2c 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.h
@@ -120,7 +120,7 @@ class BestFitAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 
  private:
   Allocation* allocation_;  // not owned
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
index b274b05562b15856276b1c88d3504fda1ecafacc..7e5207e6345bbd8ec02fdc897466c269779e2830 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
@@ -13,8 +13,10 @@
 // limitations under the License.
 
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
+#include <memory>
 #include <random>
 #include <thread>  // NOLINT
+#include <utility>
 #include <vector>
 #include "gtest/gtest.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
@@ -33,10 +35,10 @@ class StubAllocation : public Allocation {
 TEST(BestFitAllocator, test_allocation) {
   StubAllocation stub(4UL * 1024 * 1024 * 1024);
   BestFitAllocator allocator(&stub);
-  { auto allocation = allocator.Allocate(64, allocator.kDefault); }
+  { auto allocation = allocator.Allocate(64); }
 
   {
-    auto allocation = allocator.Allocate(80, allocator.kDefault);
+    auto allocation = allocator.Allocate(80);
 
     {
       auto best_fit_allocation =
@@ -48,10 +50,10 @@ TEST(BestFitAllocator, test_allocation) {
       ASSERT_EQ(allocation->ptr(), nullptr);
     }
 
-    auto allocation2 = allocator.Allocate(60, allocator.kDefault);
-    auto allocation3 = allocator.Allocate(90, allocator.kDefault);
+    auto allocation2 = allocator.Allocate(60);
+    auto allocation3 = allocator.Allocate(90);
     allocation2.reset();
-    allocation2 = allocator.Allocate(30, allocator.kDefault);
+    allocation2 = allocator.Allocate(30);
 
     {
       auto best_fit_allocation =
@@ -59,7 +61,7 @@ TEST(BestFitAllocator, test_allocation) {
       ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
     }
     allocation2.reset();
-    allocation2 = allocator.Allocate(60, allocator.kDefault);
+    allocation2 = allocator.Allocate(60);
 
     {
       auto best_fit_allocation =
@@ -70,7 +72,7 @@ TEST(BestFitAllocator, test_allocation) {
     allocation.reset();
     allocation2.reset();
 
-    allocation = allocator.Allocate(80 + 60, allocator.kDefault);
+    allocation = allocator.Allocate(80 + 60);
     {
       auto best_fit_allocation =
           dynamic_cast<BestFitAllocation*>(allocation.get());
@@ -79,8 +81,8 @@ TEST(BestFitAllocator, test_allocation) {
 
     allocation.reset();
 
-    allocation = allocator.Allocate(80, allocator.kDefault);
-    allocation2 = allocator.Allocate(60, allocator.kDefault);
+    allocation = allocator.Allocate(80);
+    allocation2 = allocator.Allocate(60);
     allocation = nullptr;
     allocation2 = nullptr;
     allocation3 = nullptr;
@@ -91,8 +93,7 @@ TEST(BestFitAllocator, test_allocation) {
 
 TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
   CPUAllocator allocator;
-  auto global_allocation =
-      allocator.Allocate(256UL * 1024 * 1024, allocator.kDefault);
+  auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
 
   std::unique_ptr<Allocator> best_fit_allocator(
       new BestFitAllocator(global_allocation.get()));
@@ -106,8 +107,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
     for (size_t i = 0; i < 128; ++i) {
       size_t allocate_size = dist(engine);
 
-      auto allocation = locked_allocator.Allocate(
-          sizeof(size_t) * allocate_size, locked_allocator.kDefault);
+      auto allocation =
+          locked_allocator.Allocate(sizeof(size_t) * allocate_size);
 
       size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
index fdd5b43ad4aa8024efee314ca949445fefbef067..eb24ba84c886e3393cf36b6f764d7b33e76defeb 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <memory>
 #include <random>
 #include <thread>  // NOLINT
 #include <vector>
@@ -36,8 +37,7 @@ struct ForEachFill {
 TEST(BestFitAllocator, concurrent_cuda) {
   CUDAAllocator allocator(platform::CUDAPlace(0));
   // 256 MB
-  auto cuda_allocation =
-      allocator.Allocate(256U * 1024 * 1024, allocator.kDefault);
+  auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
   LockedAllocator concurrent_allocator(
       std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
 
@@ -50,8 +50,8 @@ TEST(BestFitAllocator, concurrent_cuda) {
     for (size_t i = 0; i < 128; ++i) {
       size_t allocate_size = dist(engine);
 
-      auto allocation = concurrent_allocator.Allocate(
-          sizeof(size_t) * allocate_size, concurrent_allocator.kDefault);
+      auto allocation =
+          concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
 
       size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 
diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc
index 2f3e6205c3c1713756bce254c947f9cd500e3d46..d80616b7a8d39a5e1074ce240d9c4ddb069b212a 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator.cc
@@ -53,7 +53,7 @@ void BufferedAllocator::FreeImpl(Allocation *allocation) {
   allocations_.emplace(allocation->size(), AllocationPtr(allocation));
 }
 
-Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation *BufferedAllocator::AllocateImpl(size_t size) {
   {
     platform::LockGuardPtr<std::mutex> guard(mtx_);
     auto it = allocations_.lower_bound(size);
@@ -65,10 +65,10 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
   }
 
   try {
-    return underlying_allocator_->Allocate(size, attr).release();
+    return underlying_allocator_->Allocate(size).release();
   } catch (BadAlloc &) {
     FreeCache(size);
-    return underlying_allocator_->Allocate(size, attr).release();
+    return underlying_allocator_->Allocate(size).release();
   }
 }
 
diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h
index c728395705842d29a7b2a8441a7048a7e4bf5e6b..fd0996f7748ef407262dba7bca705af9b5fb9674 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.h
+++ b/paddle/fluid/memory/allocation/buffered_allocator.h
@@ -45,7 +45,7 @@ class BufferedAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation *AllocateImpl(size_t size) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
index 854a117b0e7532962d5e0c95fd947527ac3b307a..e4825233d58c7386bc1b7456cdc5c11f03f6b90e 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
@@ -36,7 +36,7 @@ inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
 
 TEST(buffered_allocator, thread_safety) {
   std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
-  auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
+  auto chunk = allocator->Allocate(1 << 20);
   {
     auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
     ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
@@ -72,7 +72,7 @@ class StubAllocator : public Allocator {
     ++destruct_count_;
     delete allocation;
   }
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
+  Allocation *AllocateImpl(size_t size) override {
     ++construct_count_;
     if (size == 0) {
       return new StubAllocation(nullptr, 0, platform::CPUPlace());
@@ -98,7 +98,7 @@ TEST(buffered_allocator, lazy_free) {
 
   {
     underlying_allocator->ResetCounter();
-    auto x = allocator->Allocate(1025, allocator->kDefault);
+    auto x = allocator->Allocate(1025);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
     x = nullptr;
@@ -107,10 +107,10 @@ TEST(buffered_allocator, lazy_free) {
 
   {
     underlying_allocator->ResetCounter();
-    auto x = allocator->Allocate(900, allocator->kDefault);
+    auto x = allocator->Allocate(900);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
-    auto y = allocator->Allocate(2048, allocator->kDefault);
+    auto y = allocator->Allocate(2048);
     ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
     ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
     x = nullptr;
@@ -129,13 +129,13 @@ TEST(buffered_allocator, lazy_free) {
 
 TEST(buffered_allocator, garbage_collection) {
   std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
-  auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
+  auto chunk = cpu_allocator->Allocate(2048);
   auto allocator = GetBufferedAllocator(chunk.get(), false);
-  auto x1 = allocator->Allocate(1600, allocator->kDefault);
-  auto x2 = allocator->Allocate(400, allocator->kDefault);
+  auto x1 = allocator->Allocate(1600);
+  auto x2 = allocator->Allocate(400);
   x1 = nullptr;
   x2 = nullptr;
-  auto x3 = allocator->Allocate(1600, allocator->kDefault);
+  auto x3 = allocator->Allocate(1600);
   ASSERT_NE(x3, nullptr);
   ASSERT_NE(x3->ptr(), nullptr);
 }
diff --git a/paddle/fluid/memory/allocation/conditional_allocator.cc b/paddle/fluid/memory/allocation/conditional_allocator.cc
index 96a818e03e507c6de720344288312dc2af2ae647..373afb1bd6e1ff1582f8aa737ac1ff19309909de 100644
--- a/paddle/fluid/memory/allocation/conditional_allocator.cc
+++ b/paddle/fluid/memory/allocation/conditional_allocator.cc
@@ -13,14 +13,14 @@
 // limitations under the License.
 
 #include "paddle/fluid/memory/allocation/conditional_allocator.h"
+#include <memory>
 
 namespace paddle {
 namespace memory {
 namespace allocation {
 
 ConditionalAllocator& ConditionalAllocator::AddAllocator(
-    std::function<bool(size_t, Allocator::Attr)> func,
-    std::shared_ptr<Allocator> allocator) {
+    std::function<bool(size_t)> func, std::shared_ptr<Allocator> allocator) {
   underlying_allocators_.emplace_back(std::move(func), std::move(allocator));
   return *this;
 }
@@ -33,11 +33,10 @@ bool ConditionalAllocator::IsAllocThreadSafe() const {
                      });
 }
 
-Allocation* ConditionalAllocator::AllocateImpl(size_t size,
-                                               Allocator::Attr attr) {
+Allocation* ConditionalAllocator::AllocateImpl(size_t size) {
   for (auto& pair : underlying_allocators_) {
-    if (pair.first(size, attr)) {
-      return pair.second->Allocate(size, attr).release();
+    if (pair.first(size)) {
+      return pair.second->Allocate(size).release();
     }
   }
   throw BadAlloc("No suitable allocator");
diff --git a/paddle/fluid/memory/allocation/conditional_allocator.h b/paddle/fluid/memory/allocation/conditional_allocator.h
index 94cba4432ed4f72c0a75da9b31d48611a8404ad3..61c3670803a3c5a87a5bbf640ec584b611d06140 100644
--- a/paddle/fluid/memory/allocation/conditional_allocator.h
+++ b/paddle/fluid/memory/allocation/conditional_allocator.h
@@ -14,6 +14,7 @@
 
 #pragma once
 #include <functional>
+#include <memory>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/memory/allocation/allocator.h"
@@ -28,13 +29,10 @@ namespace allocation {
 // For example:
 //
 // auto* cond_allocator = new ConditionalAllocator();
-// cond_allocator->AddAllocator([](size_t size, Attr attr){
+// cond_allocator->AddAllocator([](size_t size){
 //   // if size > 10
 //   return size > 10;
-// }, allocator_a).AddAllocator([](size_t size, Attr attr){
-//   // elif attr is kDefault
-//   return attr == kDefault;
-// }, allocator_b).AddAllocator([](size_t size, Attr attr){
+// }, allocator_b).AddAllocator([](size_t size){
 //   // else
 //   return true;
 // }, allocator_c);
@@ -42,17 +40,17 @@ class ConditionalAllocator : public Allocator {
  public:
   ConditionalAllocator() = default;
 
-  ConditionalAllocator& AddAllocator(std::function<bool(size_t, Attr)> func,
+  ConditionalAllocator& AddAllocator(std::function<bool(size_t)> func,
                                      std::shared_ptr<Allocator> allocator);
 
   bool IsAllocThreadSafe() const override;
 
  protected:
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 
  private:
   using AllocatorWithCond =
-      std::pair<std::function<bool(size_t, Attr)>, std::shared_ptr<Allocator>>;
+      std::pair<std::function<bool(size_t)>, std::shared_ptr<Allocator>>;
   std::vector<AllocatorWithCond> underlying_allocators_;
 };
 
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc
index 90c49c87a677aa38bce35774b3a7bb698e6f43e7..580cf1af56ab0ad2f096f9b6fefaff0ba0e501a0 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.cc
+++ b/paddle/fluid/memory/allocation/cpu_allocator.cc
@@ -32,7 +32,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
   delete allocation;
 }
 
-Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation *CPUAllocator::AllocateImpl(size_t size) {
   void *p;
 #ifdef _WIN32
   p = _aligned_malloc(size, kAlignment);
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h
index 3eb1416b0efa9327f2052e1f128359bc93f94986..058ff63381658da698841c839425dec000a748da 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.h
+++ b/paddle/fluid/memory/allocation/cpu_allocator.h
@@ -38,7 +38,7 @@ class CPUAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 };
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc
index 895a24a6a2a6b8e399ec2ace48136d1ef16c62f6..349c71cece16898da33d1dac3e979c4694b6f7b7 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@@ -31,7 +31,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
   delete allocation;
 }
 
-Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation* CUDAAllocator::AllocateImpl(size_t size) {
   platform::CUDADeviceGuard guard(place_.device);
   void* ptr;
   auto status = cudaMalloc(&ptr, size);
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h
index 580a2d1df1d5997a27180740393741ec8973bf18..886f6e7a327f70068c6fabb6328f927bf71b2881 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_allocator.h
@@ -29,7 +29,7 @@ class CUDAAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 
  private:
   platform::CUDAPlace place_;
diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc
index 0f4a55cedb93f1bd0965493299160ed3b8e8f94d..9b6f7d421146883f8ddd336a6573df2f8293d1e2 100644
--- a/paddle/fluid/memory/allocation/legacy_allocator.cc
+++ b/paddle/fluid/memory/allocation/legacy_allocator.cc
@@ -339,7 +339,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
 namespace allocation {
 LegacyMemMonitor GPUMemMonitor;
 
-Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation *LegacyAllocator::AllocateImpl(size_t size) {
   void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
   auto *tmp_alloc = new Allocation(ptr, size, place_);
   platform::MemEvenRecorder::Instance().PushMemRecord(
diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h
index 27cd42ea35012f07ae7db79c46d767138ddaafff..c7efb5fd2e5a9b4292f83e6ecba1549fb293c56c 100644
--- a/paddle/fluid/memory/allocation/legacy_allocator.h
+++ b/paddle/fluid/memory/allocation/legacy_allocator.h
@@ -72,7 +72,7 @@ class LegacyAllocator : public Allocator {
   explicit LegacyAllocator(const platform::Place &p) : place_(p) {}
 
  protected:
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation *AllocateImpl(size_t size) override;
   void FreeImpl(Allocation *allocation) override;
 
  private:
diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc
index e9ec39c893255fe297f38e68eedaa68f3e6496b0..a912807645bafee3c1cb63f03ff456418033b416 100644
--- a/paddle/fluid/memory/allocation/locked_allocator.cc
+++ b/paddle/fluid/memory/allocation/locked_allocator.cc
@@ -37,9 +37,9 @@ void LockedAllocator::FreeImpl(Allocation *allocation) {
   underlying_allocator_->Free(allocation);
 }
 
-Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation *LockedAllocator::AllocateImpl(size_t size) {
   platform::LockGuardPtr<std::mutex> guard(mtx_);
-  return underlying_allocator_->Allocate(size, attr).release();
+  return underlying_allocator_->Allocate(size).release();
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h
index b735ccef101417b3f880eb6dcdd9964cffbe875c..4af77e6e057f54d15dcb0248ba6cf36f6f00c2f1 100644
--- a/paddle/fluid/memory/allocation/locked_allocator.h
+++ b/paddle/fluid/memory/allocation/locked_allocator.h
@@ -29,7 +29,7 @@ class LockedAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation *AllocateImpl(size_t size) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc
index 5a3d817211750d3e19e65344d1eab5a96800c674..35391167fe66b9b941e3a5359db452ced7995762 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.cc
+++ b/paddle/fluid/memory/allocation/pinned_allocator.cc
@@ -24,8 +24,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
   PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
   delete allocation;
 }
-Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
-                                             Allocator::Attr attr) {
+Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
   void *ptr;
   PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
   return new Allocation(ptr, size, platform::CUDAPinnedPlace());
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h
index deeb55a8fb0396a312286f5c2692114e9e4afc8d..4f535ef33734a3c6f7048ae6538e4332e0c9e8e4 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.h
+++ b/paddle/fluid/memory/allocation/pinned_allocator.h
@@ -26,7 +26,7 @@ class CPUPinnedAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation *AllocateImpl(size_t size) override;
 };
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc
index 167dd923dbbe9d04861c015c013c1211046be76c..bf14ed5db10fc475a7bbaa8bb6759f90c5a207de 100644
--- a/paddle/fluid/memory/allocation/retry_allocator.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator.cc
@@ -23,9 +23,9 @@ void RetryAllocator::FreeImpl(Allocation* allocation) {
   cv_.notify_all();
 }
 
-Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
+Allocation* RetryAllocator::AllocateImpl(size_t size) {
   auto alloc_func = [&, this]() {
-    return underlying_allocator_->Allocate(size, attr).release();
+    return underlying_allocator_->Allocate(size).release();
   };
   // In fact, we can unify the code of allocation success and failure
   // But it would add lock even when allocation success at the first time
diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h
index 379f576d6e1ed8f256a0233b203423a487ee73e4..7840a834472c831f500622535f270fcf39732a67 100644
--- a/paddle/fluid/memory/allocation/retry_allocator.h
+++ b/paddle/fluid/memory/allocation/retry_allocator.h
@@ -40,7 +40,7 @@ class RetryAllocator : public Allocator {
 
  protected:
   void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
+  Allocation* AllocateImpl(size_t size) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc
index 345b5f44d3de9b68017410156740886e08a81b15..4ac08d442d4bd3cb7edc4db020e5c3242b13b535 100644
--- a/paddle/fluid/memory/allocation/retry_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc
@@ -32,7 +32,7 @@ TEST(RetryAllocator, RetryAllocator) {
   CPUAllocator cpu_allocator;
 
   size_t size = (1 << 20);
-  auto cpu_allocation = cpu_allocator.Allocate(size, cpu_allocator.kDefault);
+  auto cpu_allocation = cpu_allocator.Allocate(size);
 
   std::unique_ptr<BestFitAllocator> best_fit_allocator(
       new BestFitAllocator(cpu_allocation.get()));
diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc
index e414ad657a9447142d6e3a42fc7efc86f01e9c9f..5884433aaff115c053b10848b32f8610fcb69747 100644
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -21,13 +21,12 @@ limitations under the License. */
 namespace paddle {
 namespace memory {
 std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
-                                        size_t size, Allocator::Attr attr) {
-  return allocation::AllocatorFacade::Instance().AllocShared(place, size, attr);
+                                        size_t size) {
+  return allocation::AllocatorFacade::Instance().AllocShared(place, size);
 }
 
-AllocationPtr Alloc(const platform::Place& place, size_t size,
-                    Allocator::Attr attr) {
-  return allocation::AllocatorFacade::Instance().Alloc(place, size, attr);
+AllocationPtr Alloc(const platform::Place& place, size_t size) {
+  return allocation::AllocatorFacade::Instance().Alloc(place, size);
 }
 
 }  // namespace memory
diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h
index 916538b2a659d7d9503fdc337a4ba84fa21f77f9..6731203fccb67fc5ded018bbe2ca51878da1a4c3 100644
--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -23,12 +23,10 @@ using allocation::Allocation;
 using allocation::Allocator;
 using allocation::AllocationPtr;
 
-extern std::shared_ptr<Allocation> AllocShared(
-    const platform::Place& place, size_t size,
-    Allocator::Attr attr = Allocator::kDefault);
+extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
+                                               size_t size);
 
-extern AllocationPtr Alloc(const platform::Place& place, size_t size,
-                           Allocator::Attr attr = Allocator::kDefault);
+extern AllocationPtr Alloc(const platform::Place& place, size_t size);
 
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc
index 51141ceb2f178f8fb10e1f1d5669abbcdaa6bd7b..054deeaa710c8e058118b33662a15542678bf961 100644
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@@ -136,7 +136,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
     }
 
     // ------------------- cudnn conv algorithm ---------------------
-    cudnnConvolutionFwdAlgo_t algo;
+    cudnnConvolutionFwdAlgo_t algo{};
     bool half_float = false;
 
 #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
@@ -361,8 +361,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
     int group_offset_out = o_c / groups * o_h * o_w * o_d;
     int group_offset_filter = filter->numel() / groups;
     // ------------------- cudnn backward algorithm ---------------------
-    cudnnConvolutionBwdDataAlgo_t data_algo;
-    cudnnConvolutionBwdFilterAlgo_t filter_algo;
+    cudnnConvolutionBwdDataAlgo_t data_algo{};
+    cudnnConvolutionBwdFilterAlgo_t filter_algo{};
     size_t workspace_size_in_bytes = 0, tmp_size = 0;
     size_t workspace_size_limit = 0;
     if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h
index 77cb7e446b7bc8179dc4832fa55cce4754e06ced..c6140483ff5cb8108895546b6a01f058708231fd 100644
--- a/paddle/fluid/operators/cvm_op.h
+++ b/paddle/fluid/operators/cvm_op.h
@@ -73,8 +73,8 @@ class CVMOpKernel : public framework::OpKernel<T> {
       }
     } else {
       auto lod = x->lod()[0];
-      for (int i = 0; i < lod.size() - 1; ++i) {
-        for (int j = 0; j < lod[i + 1] - lod[i]; ++j) {
+      for (size_t i = 0; i < lod.size() - 1; ++i) {
+        for (size_t j = 0; j < lod[i + 1] - lod[i]; ++j) {
           CvmComputeKernel(use_cvm, item_size, &x_data, &y_data);
         }
       }
@@ -113,7 +113,7 @@ class CVMGradOpKernel : public framework::OpKernel<T> {
       auto lod = dx->lod()[0];
       int seq_num = static_cast<int>(lod.size()) - 1;
       for (int i = 0; i < seq_num; ++i) {
-        for (int j = 0; j < lod[i + 1] - lod[i]; ++j) {
+        for (size_t j = 0; j < lod[i + 1] - lod[i]; ++j) {
           CvmGradComputeKernel(use_cvm, item_size, *cvm_data, &dout_data,
                                &dx_data);
         }
diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
index 8fd76e5406b1de79e0cfd738f969fa27c40ced0f..ba0b4ac010b75b916e2e9cfbc2d6d287a1790308 100644
--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
@@ -127,8 +127,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
         nullptr, temp_storage_bytes, concat_scores.data<T>(), keys_out, idx_in,
         idx_out, total_roi_num);
     // Allocate temporary storage
-    auto d_temp_storage = memory::Alloc(place, temp_storage_bytes,
-                                        memory::Allocator::kScratchpad);
+    auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
 
     // Run sorting operation
     // sort score to get corresponding index
@@ -160,8 +159,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
         nullptr, temp_storage_bytes, sorted_batch_id.data<int>(), out_id_data,
         batch_idx_in, index_out_t.data<int>(), real_post_num);
     // Allocate temporary storage
-    d_temp_storage = memory::Alloc(place, temp_storage_bytes,
-                                   memory::Allocator::kScratchpad);
+    d_temp_storage = memory::Alloc(place, temp_storage_bytes);
 
     // Run sorting operation
     // sort batch_id to get corresponding index
diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
index 598510870a671468ba9b72438235f2dfec122401..f34866360f91b8e75d8e0e89425ba2b2e83af8af 100644
--- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
+++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
@@ -140,8 +140,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
                                               target_lvls_data, keys_out,
                                               idx_in, idx_out, roi_num);
     // Allocate temporary storage
-    auto d_temp_storage = memory::Alloc(place, temp_storage_bytes,
-                                        memory::Allocator::kScratchpad);
+    auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
 
     // Run sorting operation
     // sort target level to get corresponding index
diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu
index 2dfd9befdb7e536f388e439dc1449a709185509c..43deb5f9f3871b69ca46b7908c56c1236c1c5595 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cu
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cu
@@ -70,8 +70,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
       nullptr, temp_storage_bytes, keys_in, keys_out, idx_in, idx_out, num);
   // Allocate temporary storage
   auto place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
-  auto d_temp_storage =
-      memory::Alloc(place, temp_storage_bytes, memory::Allocator::kScratchpad);
+  auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
 
   // Run sorting operation
   cub::DeviceRadixSort::SortPairsDescending<T, int>(
diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc
index e5c96507e97267c3d0519a27a36cbac0336c7f28..9bd2c9928ccdb6416976b76e776fb22b28ea1f5d 100644
--- a/paddle/fluid/operators/distributed/sendrecvop_utils.cc
+++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_CUDA
 #include <nccl.h>
 #endif
+#include <memory>
 #include <thread>  // NOLINT
 
 #include "paddle/fluid/framework/data_type.h"
@@ -39,8 +40,7 @@ static TensorPayload GetCommunicationAllocationFromTensor(
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx);
     auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type());
     platform::CUDAPinnedPlace cuda_pinned;
-    auto result = memory::AllocShared(
-        cuda_pinned, copy_size, memory::allocation::Allocator::kCrossDevice);
+    auto result = memory::AllocShared(cuda_pinned, copy_size);
 
     memory::Copy(cuda_pinned, result->ptr(),
                  boost::get<platform::CUDAPlace>(tensor.place()),
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
index 25bd6a9ead24601f43e8fe548f9b9bcdcc0ad12c..10d3b6ce4f79e7c59d7d3588b3d481d01ef04c46 100644
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -234,9 +234,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
                         "same dimension sizes");
 
       if (residual_param->format() != handler.GetDstFormat()) {
-        auto output_data = output->mutable_data<T>(
-            ctx.GetPlace(), ::paddle::memory::Allocator::kDefault,
-            handler.GetDstMemorySize());
+        auto output_data =
+            output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
         auto residual_data_tz =
             paddle::framework::vectorize2int(residual_param->dims());
         auto residual_data_type =
@@ -256,9 +255,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
             handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
       }
     } else {
-      auto output_data = output->mutable_data<T>(
-          ctx.GetPlace(), paddle::memory::Allocator::kDefault,
-          handler.GetDstMemorySize());
+      auto output_data =
+          output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
       dst_memory_p =
           handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
     }
@@ -893,8 +891,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
               user_diff_dst_memory_p, pipeline);
 
       const size_t size = handler.GetDiffWeightsMemorySize();
-      filter_grad_data = filter_grad->mutable_data<T>(
-          ctx.GetPlace(), paddle::memory::Allocator::kDefault, size);
+      filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace(), size);
 
       auto diff_weights_memory_p =
           handler.AcquireDiffWeightsMemoryFromWeightsPrimitive(
@@ -919,8 +916,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
                                                         pipeline);
 
       const size_t size = handler.GetDiffSourceMemorySize();
-      input_grad_data = input_grad->mutable_data<T>(
-          ctx.GetPlace(), paddle::memory::Allocator::kDefault, size);
+      input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace(), size);
 
       auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive(
           reinterpret_cast<void*>(input_grad_data));
diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
index 95494bce5a667142a5a850d3f0f44013fd8dd1b1..6d5982ab3f8ab65e3480dcf905dd8901759f90e0 100644
--- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
@@ -188,9 +188,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     std::shared_ptr<mkldnn::memory> dst_memory_p;
 
-    auto output_data = output->mutable_data<T>(
-        ctx.GetPlace(), paddle::memory::Allocator::kDefault,
-        handler.GetDstMemorySize());
+    auto output_data =
+        output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
     dst_memory_p = handler.AcquireDstMemoryFromPrimitive(
         platform::to_void_cast<T>(output_data));
 
diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
index 764183f085ac0664a1f6c02a8bfbd1f4625b80ae..b525eaac3ef87f663a4a22c32017a3c5c3a38a20 100644
--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -198,8 +198,7 @@ class FCPrimitiveFactory {
       const ExecutionContext& ctx, Tensor* output) {
     auto dst_prim_desc = fc_prim_desc.dst_primitive_desc();
     auto buffer_size = dst_prim_desc.get_size();
-    T* output_data = output->mutable_data<T>(
-        ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, buffer_size);
+    T* output_data = output->mutable_data<T>(ctx.GetPlace(), buffer_size);
     output->set_format((memory::format)dst_prim_desc.desc().data.format);
     return memory(dst_prim_desc, to_void_cast<T>(output_data));
   }
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index f55c44402507fa30fa0ac554173d9896b703043e..caaf0e2c50c3ed3410ea8d886f1eaeea06f19446 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -167,8 +167,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
     if (UNLIKELY(num_bytes == 0)) {
       return nullptr;
     }
-    auto buf = paddle::memory::Alloc(place_, num_bytes,
-                                     memory::Allocator::kScratchpad);
+    auto buf = paddle::memory::Alloc(place_, num_bytes);
     void* retv = buf->ptr();
     {
       std::lock_guard<std::mutex> lock(mtx_);
@@ -232,8 +231,7 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) {
     PADDLE_ENFORCE(cudaStreamSynchronize(*stream_));
     workspace_.reset();
   }
-  workspace_ = paddle::memory::Alloc(place_, required_workspace_len,
-                                     paddle::memory::Allocator::kScratchpad);
+  workspace_ = paddle::memory::Alloc(place_, required_workspace_len);
 }
 
 CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 87405fb5507d92ff53918287fcfc13ada9b3933e..fa36a49fb88f0e3cd49031d7e258f04fe5e4f18c 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -325,8 +325,7 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
       auto dst_mdp = mkldnn::memory::primitive_desc{
           Axis2MemoryDesc(dims_, axis_), engine_};
 
-      auto dst_data = output->mutable_data<float>(
-          place, paddle::memory::Allocator::kDefault, dst_mdp.get_size());
+      auto dst_data = output->mutable_data<float>(place, dst_mdp.get_size());
 
       mem_p = std::make_shared<mkldnn::memory>(dst_mdp, dst_data);
       dev_ctx_.SetBlob(local_key, mem_p);
@@ -865,9 +864,8 @@ template <typename T>
 static std::shared_ptr<mkldnn::memory> SetDstMemory(
     const framework::ExecutionContext& ctx, framework::Tensor* output,
     const std::shared_ptr<ConvMKLDNNHandler>& handler) {
-  T* output_data = output->mutable_data<T>(
-      ctx.GetPlace(), ::paddle::memory::Allocator::kDefault,
-      handler->GetDstMemorySize());
+  T* output_data =
+      output->mutable_data<T>(ctx.GetPlace(), handler->GetDstMemorySize());
   std::shared_ptr<mkldnn::memory> dst_memory_p =
       handler->AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
   return dst_memory_p;
@@ -898,9 +896,8 @@ static void SetDstMemoryHandler(
     const framework::ExecutionContext& ctx, framework::Tensor* output,
     const std::shared_ptr<ConvMKLDNNHandler>& handler,
     std::shared_ptr<mkldnn::memory>* dst_memory_p) {
-  T* output_data = output->mutable_data<T>(
-      ctx.GetPlace(), ::paddle::memory::Allocator::kDefault,
-      handler->GetDstMemorySize());
+  T* output_data =
+      output->mutable_data<T>(ctx.GetPlace(), handler->GetDstMemorySize());
   (*dst_memory_p)->set_data_handle(to_void_cast<T>(output_data));
 }
 
diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc
index fe2f528be347a7899daff45f0ec68942dd00c0a4..6177b024f0ccbeeae14106868e2fc5ca7b8789eb 100644
--- a/paddle/fluid/platform/temporary_allocator.cc
+++ b/paddle/fluid/platform/temporary_allocator.cc
@@ -90,8 +90,7 @@ void TemporaryAllocator::SetCallback(const std::function<void()> &callback) {
   callback_ = callback;
 }
 
-alloc::Allocation *TemporaryAllocator::AllocateImpl(
-    size_t size, alloc::Allocator::Attr attr) {
+alloc::Allocation *TemporaryAllocator::AllocateImpl(size_t size) {
   {
     // Find available allocation in temp_mem_map.
     std::unique_lock<std::mutex> lock(mtx_);
@@ -113,7 +112,7 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
   }
   // If not find the the available allocation, get allocation from
   // AllocatorFacadeInstance.
-  auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
+  auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size);
   VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
   return temp_mem.release();
 }
diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h
index 912d45eaf17fe8c05840995275dd3e2e688b38ef..41f0e4a80b735e6c4eabce864ac5a1dfe1d67ced 100644
--- a/paddle/fluid/platform/temporary_allocator.h
+++ b/paddle/fluid/platform/temporary_allocator.h
@@ -51,8 +51,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
  protected:
   void FreeImpl(memory::allocation::Allocation *allocation) override;
 
-  memory::allocation::Allocation *AllocateImpl(
-      size_t size, memory::allocation::Allocator::Attr attr) override;
+  memory::allocation::Allocation *AllocateImpl(size_t size) override;
 
  private:
   platform::Place place_;