diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
index 0a46c83a2b3ad78d212fc05f0cedc6b50fe15265..09e4abc77f5734df79bfafe76b4eda705a724bd1 100644
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -410,8 +410,8 @@ class ExecutionContext {
     auto tmp_allocation_ptr = memory::Alloc(dev_ctx, product(dim) * sizeof(T));
     auto& deleter = tmp_allocation_ptr.get_deleter();
     auto* allocation_ptr = tmp_allocation_ptr.release();
-    auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
-        allocation_ptr, deleter);
+    auto shared_allocation =
+        std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
 
     PADDLE_ENFORCE_GE(
         allocation_ptr->size(), framework::product(dim) * sizeof(T),
diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc
index f11b37825d4f03d0053b7636876676e2199eb4ae..6aa10a058081b888523f81cac926081f8d083e16 100644
--- a/paddle/fluid/framework/tensor.cc
+++ b/paddle/fluid/framework/tensor.cc
@@ -17,14 +17,6 @@ limitations under the License. */
 
 DECLARE_bool(use_stream_safe_cuda_allocator);
 
-namespace paddle {
-namespace memory {
-namespace allocation {
-class Allocation;
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
-
 namespace paddle {
 namespace framework {
 
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index e86009e9aafeac4d693b340191ef493a64ea4ee5..fcdb837bc80ce435b2a8fa6d4bddc9d3916109ce 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -32,14 +32,6 @@ limitations under the License. */
 
 #include "paddle/pten/core/dense_tensor.h"
 
-namespace paddle {
-namespace memory {
-namespace allocation {
-class Allocation;
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
-
 namespace paddle {
 
 namespace framework {
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
index 7fd125834a0c31f759ded34d431581997a9fb151..5fd581220097b8a690546e7b6a6e7d01a9ba490b 100644
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -151,8 +151,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
             paddle::memory::allocation::AllocatorFacade::Instance()
                 .GetAllocator(npu_pinned_place)
                 .get());
-    paddle::memory::allocation::Allocation* allocation =
-        npu_pinned_tensor.Holder().get();
+    pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
     npu_pinned_allocator->RecordEvent(
         allocation,
         reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
index 46eba6a1e41bbe00114824d33a4433dbccba6154..11858e4166595dd756ec51384f6da2fe9277479e 100644
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -183,8 +183,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
             paddle::memory::allocation::AllocatorFacade::Instance()
                 .GetAllocator(npu_pinned_place)
                 .get());
-    paddle::memory::allocation::Allocation* allocation =
-        npu_pinned_tensor.Holder().get();
+    pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
     npu_pinned_allocator->RecordEvent(
         allocation,
         reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
@@ -241,8 +240,7 @@ void TensorFromVector(const std::vector<T>& src,
             paddle::memory::allocation::AllocatorFacade::Instance()
                 .GetAllocator(npu_pinned_place)
                 .get());
-    paddle::memory::allocation::Allocation* allocation =
-        npu_pinned_tensor.Holder().get();
+    pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
     npu_pinned_allocator->RecordEvent(
         allocation,
         reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
@@ -312,8 +310,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
             paddle::memory::allocation::AllocatorFacade::Instance()
                 .GetAllocator(npu_pinned_place)
                 .get());
-    paddle::memory::allocation::Allocation* allocation =
-        npu_pinned_tensor.Holder().get();
+    pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
     npu_pinned_allocator->RecordEvent(
         allocation,
         reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 01d4dbccd50eaf2c288110562784bdea5a66080b..2f2f4c0ead7604d185ab3ea1066bc9f88c66a2d0 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -223,9 +223,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
   auto t_place = tensor->place();
 
   paddle::framework::Tensor out;
-  auto mem_allocation = std::make_shared<paddle::memory::Allocation>(
-      static_cast<void *>(data), ele_num * sizeof(T),
-      paddle::platform::CPUPlace());
+  auto mem_allocation =
+      std::make_shared<paddle::memory::allocation::Allocation>(
+          static_cast<void *>(data), ele_num * sizeof(T),
+          paddle::platform::CPUPlace());
   out.ResetHolder(mem_allocation);
 
   if (paddle::platform::is_cpu_place(t_place)) {
diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc
index b1e0eb5ef16abacdd5e572c89979503930738c06..0d5cd29a0c57921df24f4fa0c966412c1e6fa207 100644
--- a/paddle/fluid/inference/lite/tensor_utils.cc
+++ b/paddle/fluid/inference/lite/tensor_utils.cc
@@ -257,9 +257,8 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
   size_t memory_size =
       GetLiteTensorNumel(*src) *
       framework::SizeOfType(GetNativePrecisionType(src->precision()));
-  std::shared_ptr<memory::allocation::Allocation> holder(
-      new memory::allocation::Allocation(src_raw_data, memory_size,
-                                         GetNativePlace(src->target())));
+  std::shared_ptr<pten::Allocation> holder(new pten::Allocation(
+      src_raw_data, memory_size, GetNativePlace(src->target())));
   dst->Resize(paddle::framework::make_ddim(src->shape()));
   SetLoD(dst->mutable_lod(), src->lod());
   dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
diff --git a/paddle/fluid/memory/allocation/aligned_allocator.cc b/paddle/fluid/memory/allocation/aligned_allocator.cc
index 10380c0d6028d57422e17a7c1dff7845ad0390f1..258cff32b4fca5f0af5bfb837a185831f0140e7c 100644
--- a/paddle/fluid/memory/allocation/aligned_allocator.cc
+++ b/paddle/fluid/memory/allocation/aligned_allocator.cc
@@ -23,7 +23,7 @@ namespace allocation {
 // For memory address alignment
 class AlignedAllocation : public Allocation {
  public:
-  AlignedAllocation(AllocationPtr underlying_allocation, size_t offset)
+  AlignedAllocation(DecoratedAllocationPtr underlying_allocation, size_t offset)
       : Allocation(
             reinterpret_cast<uint8_t*>(underlying_allocation->ptr()) + offset,
             underlying_allocation->base_ptr(),
@@ -32,7 +32,7 @@ class AlignedAllocation : public Allocation {
         underlying_allocation_(std::move(underlying_allocation)) {}
 
  private:
-  AllocationPtr underlying_allocation_;
+  DecoratedAllocationPtr underlying_allocation_;
 };
 
 AlignedAllocator::AlignedAllocator(
@@ -52,13 +52,17 @@ bool AlignedAllocator::IsAllocThreadSafe() const {
   return underlying_allocator_->IsAllocThreadSafe();
 }
 
-Allocation* AlignedAllocator::AllocateImpl(size_t size) {
+pten::Allocation* AlignedAllocator::AllocateImpl(size_t size) {
   auto raw_allocation = underlying_allocator_->Allocate(size + alignment_);
   size_t offset = AlignedPtrOffset(raw_allocation->ptr(), alignment_);
-  return new AlignedAllocation(std::move(raw_allocation), offset);
+  auto* p = new AlignedAllocation(
+      static_unique_ptr_cast<Allocation>(std::move(raw_allocation)), offset);
+  return p;
 }
 
-void AlignedAllocator::FreeImpl(Allocation* allocation) { delete allocation; }
+void AlignedAllocator::FreeImpl(pten::Allocation* allocation) {
+  delete allocation;
+}
 
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h
index 6fef5cae8d6af32a8b5b2eff0c74ab94410bd070..ffd5ad0fae1b0c4b83f87df877b9dfd71a286fb6 100644
--- a/paddle/fluid/memory/allocation/aligned_allocator.h
+++ b/paddle/fluid/memory/allocation/aligned_allocator.h
@@ -30,9 +30,9 @@ class AlignedAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  Allocation* AllocateImpl(size_t size) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 
-  void FreeImpl(Allocation* allocation) override;
+  void FreeImpl(pten::Allocation* allocation) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc
index 4998f3dbb9613abbf5ca67a3d43863d01483b79f..0ef6f5cbab5cce3439378a1da38bf1adebe2e39d 100644
--- a/paddle/fluid/memory/allocation/allocator.cc
+++ b/paddle/fluid/memory/allocation/allocator.cc
@@ -18,11 +18,10 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
-bool Allocator::IsAllocThreadSafe() const { return false; }
-
-void Allocator::FreeImpl(Allocation* allocation) {
-  Allocator* allocator = allocation->TopDecoratedAllocator();
-  allocator->Free(allocation);
+void Allocator::FreeImpl(pten::Allocation* allocation) {
+  static_cast<Allocation*>(allocation)
+      ->TopDecoratedAllocator()
+      ->Free(allocation);
 }
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h
index ee802462ddc943244fc9cbdbcd7cb8cdd52f8e47..3f04d47516377251011174b1382679ba41fdca02 100644
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -22,6 +22,7 @@
 #include "paddle/fluid/framework/inlined_vector.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/pten/core/allocator.h"
 
 DECLARE_string(allocator_strategy);
 
@@ -80,30 +81,19 @@ class Allocator;
  * e.g., something what is done in AlignedAllocator, etc.
  * In this case, we should declare a derived class of Allocation, which
  * contains an underlying Allocation allocated by the underlying allocator.
- * Therefore, `decorated_allocators_` of the new Allocation object would
+ * Therefore, `decorated_allocators_` of the new Allocation object
+ * would
  * be a new chain, differing from the underlying Allocation object.
  */
-class Allocation {
+class Allocation : public pten::Allocation {
  public:
-  inline Allocation(void* ptr, size_t size, platform::Place place)
-      : ptr_(ptr), base_ptr_(ptr), size_(size), place_(place) {}
-  inline Allocation(void* ptr, void* base_ptr, size_t size,
-                    platform::Place place)
-      : ptr_(ptr), base_ptr_(base_ptr), size_(size), place_(place) {}
-
-  Allocation(const Allocation& o) = delete;
-  Allocation& operator=(const Allocation& o) = delete;
-  Allocation(Allocation&& o) = delete;
-  Allocation& operator=(Allocation&& o) = delete;
-
-  // Returns the holding pointer.
-  // NOTE: For performance consideration, it is better not to make this method
-  // as a virtual method. If we want to implement a `defragmentation` later,
-  // we might need to make `ptr_` field as a protected field, and add a virtual
-  // method like `defragmentation` to change `ptr_`.
-  inline void* ptr() const { return ptr_; }
-
-  inline void* base_ptr() const {
+  Allocation(void* ptr, size_t size, platform::Place place)
+      : pten::Allocation(ptr, size, place), base_ptr_(ptr) {}
+  Allocation(void* ptr, void* base_ptr, size_t size,
+             const platform::Place& place)
+      : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
+
+  void* base_ptr() const {
     PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
                       paddle::platform::errors::Unimplemented(
                           "base_ptr() is only implemented for auto_growth "
@@ -112,21 +102,6 @@ class Allocation {
     return base_ptr_;
   }
 
-  // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
-  // last valid element.
-  //
-  // NOTE: Some allocator might alloc more memory than request. The size
-  // could larger than its request. For example,
-  //    the AlignedAllocator will always allocate memory as size + kAlignment.
-  //    The raw pointer might not aligned, so an offset might be added to raw
-  //    the pointer. The size of this allocation will be
-  //    `size + kAlignemnt - offset`.
-  inline size_t size() const { return size_; }
-
-  inline const platform::Place& place() const { return place_; }
-
-  virtual ~Allocation() {}
-
  private:
   inline void RegisterDecoratedAllocator(Allocator* allocator) {
     decorated_allocators_.emplace_back(allocator);
@@ -139,10 +114,7 @@ class Allocation {
   }
 
  private:
-  void* ptr_;
   void* base_ptr_;  // the point that directly requested from system
-  size_t size_;
-  platform::Place place_;
 
   /**
    * NOTE(zjl): Since decorated_allocators_ is usually a small vector.
@@ -162,53 +134,42 @@ class Allocation {
   friend class Allocator;
 };
 
+using AllocationPtr = pten::Allocator::AllocationPtr;
+using DecoratedAllocationPtr =
+    std::unique_ptr<Allocation, pten::Allocator::DeleterType>;
+
 // Base interface class of memory Allocator.
-class Allocator {
+class Allocator : public pten::Allocator {
  public:
-  virtual ~Allocator() {}
-
-  class AllocationDeleter {
-   public:
-    inline void operator()(Allocation* allocation) const {
-      Allocator* allocator = allocation->TopDecoratedAllocator();
-      allocator->Free(allocation);
-    }
-  };
-
-  using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
+  static void AllocationDeleter(pten::Allocation* allocation) {
+    Allocator* allocator =
+        static_cast<Allocation*>(allocation)->TopDecoratedAllocator();
+    allocator->Free(allocation);
+  }
 
   // Allocate an allocation.
   // size may be 0, but it would be too complex if we handle size == 0
   // in each Allocator. So we handle size == 0 inside AllocatorFacade
   // in our design.
-  inline AllocationPtr Allocate(size_t size) {
+  AllocationPtr Allocate(size_t size) override {
     auto ptr = AllocateImpl(size);
-    ptr->RegisterDecoratedAllocator(this);
-    return AllocationPtr(ptr);
+    static_cast<Allocation*>(ptr)->RegisterDecoratedAllocator(this);
+    return AllocationPtr(ptr, AllocationDeleter);
   }
 
-  // This function should not be called outside Allocator class
-  inline void Free(Allocation* allocation) {
-    allocation->PopDecoratedAllocator();
+  void Free(pten::Allocation* allocation) {
+    static_cast<Allocation*>(allocation)->PopDecoratedAllocator();
     FreeImpl(allocation);
   }
 
-  inline uint64_t Release(const platform::Place& place) {
-    return ReleaseImpl(place);
-  }
-
-  // True if the `Allocate` is thread safe.
-  virtual bool IsAllocThreadSafe() const;
+  uint64_t Release(const platform::Place& place) { return ReleaseImpl(place); }
 
  protected:
-  virtual Allocation* AllocateImpl(size_t size) = 0;
-  virtual void FreeImpl(Allocation* allocation);
+  virtual pten::Allocation* AllocateImpl(size_t size) = 0;
+  virtual void FreeImpl(pten::Allocation* allocation);
   virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
 };
 
-using AllocationDeleter = Allocator::AllocationDeleter;
-using AllocationPtr = Allocator::AllocationPtr;
-
 inline size_t AlignedSize(size_t size, size_t alignment) {
   auto remaining = size % alignment;
   return remaining == 0 ? size : size + alignment - remaining;
@@ -220,6 +181,14 @@ inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) {
   return diff == 0 ? 0 : alignment - diff;
 }
 
+template <typename Derived, typename Base, typename BaseDel>
+decltype(auto) static_unique_ptr_cast(std::unique_ptr<Base, BaseDel>&& p) {
+  static_assert(std::is_base_of<Base, Derived>::value,
+                "Derived type must derive from Base.");
+  auto d = static_cast<Derived*>(p.release());
+  return std::unique_ptr<Derived, BaseDel>(d, p.get_deleter());
+}
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 9bc2f5461f383fbeba509e6de7e5a81f7f7e2780..474b4fe3d4522d5ba413f3d499406d7a33ab8e55 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -94,7 +94,7 @@ class CUDAGraphAllocator
   class PrivateAllocation : public Allocation {
    public:
     PrivateAllocation(CUDAGraphAllocator* allocator,
-                      AllocationPtr underlying_allocation)
+                      DecoratedAllocationPtr underlying_allocation)
         : Allocation(
               underlying_allocation->ptr(), underlying_allocation->base_ptr(),
               underlying_allocation->size(), underlying_allocation->place()),
@@ -103,7 +103,7 @@ class CUDAGraphAllocator
 
    private:
     std::shared_ptr<Allocator> allocator_;
-    AllocationPtr underlying_allocation_;
+    DecoratedAllocationPtr underlying_allocation_;
   };
 
   explicit CUDAGraphAllocator(const std::shared_ptr<Allocator>& allocator)
@@ -116,12 +116,14 @@ class CUDAGraphAllocator
   }
 
  protected:
-  Allocation* AllocateImpl(size_t size) {
+  pten::Allocation* AllocateImpl(size_t size) {
     VLOG(10) << "Allocate " << size << " for CUDA Graph";
-    return new PrivateAllocation(this, underlying_allocator_->Allocate(size));
+    return new PrivateAllocation(this,
+                                 static_unique_ptr_cast<Allocation>(
+                                     underlying_allocator_->Allocate(size)));
   }
 
-  void FreeImpl(Allocation* allocation) {
+  void FreeImpl(pten::Allocation* allocation) {
     VLOG(10) << "delete for CUDA Graph";
     delete allocation;
   }
@@ -322,7 +324,7 @@ class AllocatorFacadePrivate {
     return static_cast<platform::CUDADeviceContext*>(pool.Get(place))->stream();
   }
 
-  void RecordStream(std::shared_ptr<Allocation> allocation,
+  void RecordStream(std::shared_ptr<pten::Allocation> allocation,
                     const gpuStream_t& stream) {
     if (allocation->size() == 0) {
       return;
@@ -339,7 +341,7 @@ class AllocatorFacadePrivate {
   }
 
   const gpuStream_t& GetStream(
-      const std::shared_ptr<Allocation>& allocation) const {
+      const std::shared_ptr<pten::Allocation>& allocation) const {
     const StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
         dynamic_cast<const StreamSafeCUDAAllocation*>(allocation.get());
     PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
@@ -391,10 +393,10 @@ class AllocatorFacadePrivate {
     bool IsAllocThreadSafe() const override { return true; }
 
    protected:
-    Allocation* AllocateImpl(size_t size) override {
+    pten::Allocation* AllocateImpl(size_t size) override {
       return new Allocation(nullptr, 0, place_);
     }
-    void FreeImpl(Allocation* allocation) override { delete allocation; }
+    void FreeImpl(pten::Allocation* allocation) override { delete allocation; }
 
    private:
     platform::Place place_;
@@ -820,9 +822,9 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
   return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
 }
 
-std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
+std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
     const platform::Place& place, size_t size) {
-  return std::shared_ptr<Allocation>(Alloc(place, size));
+  return std::shared_ptr<pten::Allocation>(Alloc(place, size));
 }
 
 AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
@@ -866,7 +868,7 @@ uint64_t AllocatorFacade::Release(const platform::Place& place) {
       ->Release(place);
 }
 
-std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
+std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
     const platform::Place& place, size_t size, const platform::Stream& stream) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   PADDLE_ENFORCE_EQ(
@@ -884,14 +886,14 @@ std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
   }
 #endif
   gpuStream_t s = reinterpret_cast<gpuStream_t>(stream.id());
-  return std::shared_ptr<Allocation>(Alloc(place, size, s));
+  return std::shared_ptr<pten::Allocation>(Alloc(place, size, s));
 #else
   PADDLE_THROW(platform::errors::PreconditionNotMet("Not compiled with GPU."));
 #endif
 }
 
 bool AllocatorFacade::InSameStream(
-    const std::shared_ptr<Allocation>& allocation,
+    const std::shared_ptr<pten::Allocation>& allocation,
     const platform::Stream& stream) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   PADDLE_ENFORCE_EQ(
@@ -962,7 +964,7 @@ uint64_t AllocatorFacade::Release(const platform::CUDAPlace& place,
   return m_->GetAllocator(place, stream)->Release(place);
 }
 
-void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation,
+void AllocatorFacade::RecordStream(std::shared_ptr<pten::Allocation> allocation,
                                    const gpuStream_t& stream) {
   PADDLE_ENFORCE_EQ(
       FLAGS_use_stream_safe_cuda_allocator, true,
@@ -983,7 +985,7 @@ void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation,
 }
 
 const gpuStream_t& AllocatorFacade::GetStream(
-    const std::shared_ptr<Allocation>& allocation) const {
+    const std::shared_ptr<pten::Allocation>& allocation) const {
   PADDLE_ENFORCE_EQ(
       FLAGS_use_stream_safe_cuda_allocator, true,
       platform::errors::Unimplemented(
diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h
index d59ecaece5a70f461b8443d06488ba6f2fe5446b..76e2f0b5a94f6ddae8e8fb6281bdfcf70f10b76c 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -42,6 +42,7 @@ using NPUPinnedAllocator = paddle::memory::allocation::NPUPinnedAllocator;
 class AllocatorFacadePrivate;
 class AllocatorFacade {
  public:
+  using Allocation = pten::Allocation;
   AllocatorFacade(const AllocatorFacade& o) = delete;
   const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
   ~AllocatorFacade();
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
index dd2a65d889d8d98ddde9910d28d9ea68811862aa..ad62af8480f58381a1e77368d2a2613d82df2b64 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
@@ -45,7 +45,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
       chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)),
       allow_free_idle_chunk_(allow_free_idle_chunk) {}
 
-Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
+pten::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
+    size_t unaligned_size) {
   size_t size = AlignedSize(unaligned_size, alignment_);
   VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size;
 
@@ -78,11 +79,13 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
     size_t realloc_size = std::max(size, chunk_size_);
 
     try {
-      chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
+      chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
+          underlying_allocator_->Allocate(realloc_size)));
     } catch (BadAlloc &ex) {
       if (FLAGS_free_when_no_cache_hit) throw ex;
       FreeIdleChunks();
-      chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
+      chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
+          underlying_allocator_->Allocate(realloc_size)));
     }
 
     auto *chunk = &(*chunks_.rbegin());
@@ -104,7 +107,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
   return new BlockAllocation(block_it);
 }
 
-void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
+void AutoGrowthBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
   VLOG(10) << "Free " << allocation->size()
            << " bytes, ptr = " << allocation->ptr();
   std::lock_guard<SpinLock> guard(spinlock_);
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
index 2334a1b6d4d55285f49a08938d8625b818dddcc8..94aff93ec50f85b1725359d53fa64eb58c28219c 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
@@ -36,9 +36,9 @@ class AutoGrowthBestFitAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
 
-  void FreeImpl(Allocation *allocation) override;
+  void FreeImpl(pten::Allocation *allocation) override;
 
   // Release the memory block which is not used in pool.
   uint64_t ReleaseImpl(const platform::Place &place) override {
@@ -64,10 +64,10 @@ class AutoGrowthBestFitAllocator : public Allocator {
   };
 
   struct Chunk {
-    explicit Chunk(AllocationPtr allocation)
+    explicit Chunk(DecoratedAllocationPtr allocation)
         : allocation_(std::move(allocation)) {}
 
-    AllocationPtr allocation_;
+    DecoratedAllocationPtr allocation_;
     List<Block> blocks_;
   };
 
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
index 926af8292d2e8664af0b80273a6853d8a62f5cf7..5942fbe730e5705f9ed891aaa088d83fa06fcb67 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
@@ -28,12 +28,12 @@ namespace allocation {
 
 class RecordedAllocator : public Allocator {
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     allocated_size_ += size;
     return new Allocation(malloc(size), size, platform::CPUPlace());
   }
 
-  void FreeImpl(Allocation *allocation) {
+  void FreeImpl(pten::Allocation *allocation) {
     allocated_size_ -= allocation->size();
     free(allocation->ptr());
     delete allocation;
@@ -79,7 +79,7 @@ class LimitedResourceAllocator : public Allocator {
   size_t AllocatedSize() const { return allocated_size_; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     if (allocated_size_ + size > capacity_) {
       throw BadAlloc("", __FILE__, __LINE__);
     }
@@ -88,7 +88,7 @@ class LimitedResourceAllocator : public Allocator {
     return new Allocation(malloc(size), size, platform::CPUPlace());
   }
 
-  void FreeImpl(Allocation *allocation) {
+  void FreeImpl(pten::Allocation *allocation) {
     allocated_size_ -= allocation->size();
     free(allocation->ptr());
     delete allocation;
diff --git a/paddle/fluid/memory/allocation/base_ptr_test.cu b/paddle/fluid/memory/allocation/base_ptr_test.cu
index a34750a5e34ba161c73ad736ab2d282470d0db24..5edabfcb9f5e7efab1242da5f5c091bebcf74c11 100644
--- a/paddle/fluid/memory/allocation/base_ptr_test.cu
+++ b/paddle/fluid/memory/allocation/base_ptr_test.cu
@@ -37,7 +37,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
       size_t size = dis_(random_engine_);
       AllocationPtr allocation = Alloc(place_, size);
 
-      void* base_ptr = allocation->base_ptr();
+      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
       void* system_ptr =
           platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
       EXPECT_EQ(base_ptr, system_ptr);
@@ -56,7 +56,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
         size_t size = dis_(random_engine_);
         AllocationPtr allocation = Alloc(place_, size);
 
-        void* base_ptr = allocation->base_ptr();
+        void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
         void* system_ptr =
             platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
         EXPECT_EQ(base_ptr, system_ptr);
@@ -77,7 +77,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
       size_t size = dis_(random_engine_);
       AllocationPtr allocation = Alloc(place_, size);
 
-      void* base_ptr = allocation->base_ptr();
+      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
       void* system_ptr =
           platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
       EXPECT_EQ(base_ptr, system_ptr);
@@ -91,7 +91,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
 
   void ZeroSizeAllocTest() {
     AllocationPtr allocation = Alloc(place_, 0);
-    void* base_ptr = allocation->base_ptr();
+    void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
     void* system_ptr =
         platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
     EXPECT_EQ(base_ptr, system_ptr);
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc
index 0955b5212622f775e3e17568b4e46d0e2d9210c3..3cba70bd5b502d7de425cd673e26fa4993861b00 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc
@@ -33,7 +33,7 @@ static int HighestBitPos(size_t N) {
   }
 }
 
-BestFitAllocator::BestFitAllocator(Allocation* allocation)
+BestFitAllocator::BestFitAllocator(pten::Allocation* allocation)
     : allocation_(allocation) {
   details::Chunk chunk;
   chunk.size_ = allocation_->size();
@@ -115,7 +115,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
   }
   return num;
 }
-void BestFitAllocator::FreeImpl(Allocation* allocation) {
+void BestFitAllocator::FreeImpl(pten::Allocation* allocation) {
   auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
   PADDLE_ENFORCE_NOT_NULL(
       bf_allocation,
@@ -150,7 +150,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) {
   InsertFreeNode(chunk_it);
   delete allocation;
 }
-Allocation* BestFitAllocator::AllocateImpl(size_t size) {
+pten::Allocation* BestFitAllocator::AllocateImpl(size_t size) {
   auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
   MapIt map_it;
   for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h
index 42f69e6d704af6139aafa51ae22b16d56e9ff38c..297d876178f3d14c69c3a75a970c80b733c8e03f 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.h
@@ -108,7 +108,7 @@ class BestFitAllocation : public Allocation {
 // the prev-chunk and the next-chunk when possible.
 class BestFitAllocator : public Allocator {
  public:
-  explicit BestFitAllocator(Allocation* allocation);
+  explicit BestFitAllocator(pten::Allocation* allocation);
 
   void* BasePtr() const { return allocation_->ptr(); }
 
@@ -127,11 +127,11 @@ class BestFitAllocator : public Allocator {
   void InsertFreeNode(const ListIt& it);
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 
  private:
-  Allocation* allocation_;  // not owned
+  pten::Allocation* allocation_;  // not owned
   details::ChunkList chunks_;
   details::FreeChunkBin free_chunks_;
 };
diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc
index 325cb010bf46629ca2dcbef2a6c7d184d39bc5d9..11739ebba955f69fb07e4b7084b71d60bdcade3e 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator.cc
@@ -46,12 +46,13 @@ void BufferedAllocator::FreeCache(size_t size) {
 
 bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
 
-void BufferedAllocator::FreeImpl(Allocation *allocation) {
+void BufferedAllocator::FreeImpl(pten::Allocation *allocation) {
   platform::LockGuardPtr<std::mutex> guard(mtx_);
-  allocations_.emplace(allocation->size(), AllocationPtr(allocation));
+  allocations_.emplace(allocation->size(),
+                       AllocationPtr(allocation, Allocator::AllocationDeleter));
 }
 
-Allocation *BufferedAllocator::AllocateImpl(size_t size) {
+pten::Allocation *BufferedAllocator::AllocateImpl(size_t size) {
   {
     platform::LockGuardPtr<std::mutex> guard(mtx_);
     auto it = allocations_.lower_bound(size);
diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h
index 5e1733bd839de2c0e219a40c569e492b1aca1d56..0ccccef573963e772baffefb5a7cded1c15b0b2c 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator.h
+++ b/paddle/fluid/memory/allocation/buffered_allocator.h
@@ -45,8 +45,8 @@ class BufferedAllocator : public Allocator {
   void FreeCache(size_t size);
 
  protected:
-  void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
index 0bfa10a1616b67ebe258d30819184dc7aac76968..21c30efccd8ada2853f4375d2266c75ddfd8002f 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
@@ -27,7 +27,7 @@ namespace memory {
 namespace allocation {
 
 inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
-    Allocation *allocation, bool thread_safe) {
+    pten::Allocation *allocation, bool thread_safe) {
   std::unique_ptr<Allocator> allocator(new BestFitAllocator(allocation));
   if (thread_safe) {
     allocator.reset(new LockedAllocator(std::move(allocator)));
@@ -68,7 +68,7 @@ class StubAllocator : public Allocator {
   size_t GetFreeCount() const { return destruct_count_; }
 
  protected:
-  void FreeImpl(Allocation *allocation) override {
+  void FreeImpl(pten::Allocation *allocation) override {
     auto *alloc = dynamic_cast<StubAllocation *>(allocation);
     PADDLE_ENFORCE_NOT_NULL(
         alloc, platform::errors::InvalidArgument(
@@ -77,7 +77,7 @@ class StubAllocator : public Allocator {
     ++destruct_count_;
     delete allocation;
   }
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     ++construct_count_;
     if (size == 0) {
       return new StubAllocation(nullptr, 0, platform::CPUPlace());
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc
index 128591f5a8d3e6e5ef4ca29dd97f265116635d04..bf0bd891be26ff654b3945b77ce715d260819183 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.cc
+++ b/paddle/fluid/memory/allocation/cpu_allocator.cc
@@ -24,7 +24,7 @@ namespace allocation {
 
 bool CPUAllocator::IsAllocThreadSafe() const { return true; }
 
-void CPUAllocator::FreeImpl(Allocation *allocation) {
+void CPUAllocator::FreeImpl(pten::Allocation *allocation) {
   void *p = allocation->ptr();
 #ifdef _WIN32
   _aligned_free(p);
@@ -34,7 +34,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
   delete allocation;
 }
 
-Allocation *CPUAllocator::AllocateImpl(size_t size) {
+pten::Allocation *CPUAllocator::AllocateImpl(size_t size) {
   void *p;
 #ifdef _WIN32
   p = _aligned_malloc(size, kAlignment);
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h
index 058ff63381658da698841c839425dec000a748da..a64089dd2de42a1ad43b18103f16e83e94af289c 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.h
+++ b/paddle/fluid/memory/allocation/cpu_allocator.h
@@ -37,8 +37,8 @@ class CPUAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 };
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc
index 4242083f2e617af5e7dc8456746b3ca9738dc3f8..ff9bbf4ab3df8194d1efbf5a2df3222c2fb2ac70 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@@ -32,7 +32,7 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
-void CUDAAllocator::FreeImpl(Allocation* allocation) {
+void CUDAAllocator::FreeImpl(pten::Allocation* allocation) {
   PADDLE_ENFORCE_EQ(
       BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
       platform::errors::PermissionDenied(
@@ -42,7 +42,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
   delete allocation;
 }
 
-Allocation* CUDAAllocator::AllocateImpl(size_t size) {
+pten::Allocation* CUDAAllocator::AllocateImpl(size_t size) {
   std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
 
   void* ptr;
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h
index 5969d4d20ddee498c8257412cc9d8fa7177d475b..57e85a3dc21d1cef83e0ff38c2bcc7fab5c3c001 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_allocator.h
@@ -28,8 +28,8 @@ class CUDAAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 
  private:
   platform::CUDAPlace place_;
diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
index 33cf2fe05424778b88eae135f582d3d39405e55a..a6696634c12d4abe48d05d7572cf89a74742e92b 100644
--- a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
@@ -41,7 +41,7 @@ namespace allocation {
  */
 class CUDADeviceContextAllocation : public Allocation {
  public:
-  explicit CUDADeviceContextAllocation(AllocationPtr allocation)
+  explicit CUDADeviceContextAllocation(DecoratedAllocationPtr allocation)
       : Allocation(allocation->ptr(), allocation->base_ptr(),
                    allocation->size(), allocation->place()),
         underlying_allocation_(std::move(allocation)) {}
@@ -56,7 +56,7 @@ class CUDADeviceContextAllocation : public Allocation {
             << p_allocation;
     dev_ctx_->AddStreamCallback([p_allocation] {
       VLOG(4) << "Delete CUDADeviceContextAllocation at " << p_allocation;
-      AllocationDeleter()(p_allocation);
+      Allocator::AllocationDeleter(p_allocation);
     });
   }
 
@@ -65,7 +65,7 @@ class CUDADeviceContextAllocation : public Allocation {
   }
 
  private:
-  AllocationPtr underlying_allocation_;
+  DecoratedAllocationPtr underlying_allocation_;
   const platform::CUDADeviceContext *dev_ctx_{nullptr};
 };
 
@@ -102,14 +102,14 @@ class CUDADeviceContextAllocator : public Allocator {
   }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     PADDLE_ENFORCE_NOT_NULL(
         default_stream_,
         platform::errors::PreconditionNotMet(
             "Default stream is not set for CUDADeviceContextAllocator"));
     platform::CUDADeviceGuard guard(place_.device);
-    auto allocation =
-        new CUDADeviceContextAllocation(memory::Alloc(place_, size));
+    auto allocation = new CUDADeviceContextAllocation(
+        static_unique_ptr_cast<Allocation>(memory::Alloc(place_, size)));
 // Wait for the event on stream
 #ifdef PADDLE_WITH_HIP
     PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, default_stream_));
@@ -121,7 +121,7 @@ class CUDADeviceContextAllocator : public Allocator {
     return allocation;
   }
 
-  void FreeImpl(Allocation *allocation) override { delete allocation; }
+  void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
 
  private:
   platform::CUDAPlace place_;
diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc
index f4baca8288f03ca9073dd628ce772f383b104331..2ae2cf20ee6d4b6e90139b83436d00b3136ea11a 100644
--- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc
@@ -101,7 +101,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator(
 
 bool CUDAVirtualMemAllocator::IsAllocThreadSafe() const { return false; }
 
-void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) {
+void CUDAVirtualMemAllocator::FreeImpl(pten::Allocation* allocation) {
   PADDLE_ENFORCE_EQ(
       BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
       platform::errors::PermissionDenied(
@@ -140,7 +140,7 @@ void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) {
   delete allocation;
 }
 
-Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) {
+pten::Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) {
   size = AlignedSize(size, granularity_);
 
   CUdeviceptr ptr = virtual_mem_base_ + virtual_mem_alloced_offset_;
diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h
index c51b56566bb0254d3c6694a9f6411e71ab5c4dbb..0e1e59d200d91358b6be8b0c5dbd3e4ef1e68cb5 100644
--- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h
@@ -37,8 +37,8 @@ class CUDAVirtualMemAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 
  private:
   platform::CUDAPlace place_;
diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc
index 6e8f870b235ff0f5ab364f7813ef7d00985a3307..a0c8efddbd80d32223697ff4790cde31c6b64730 100644
--- a/paddle/fluid/memory/allocation/locked_allocator.cc
+++ b/paddle/fluid/memory/allocation/locked_allocator.cc
@@ -37,12 +37,12 @@ LockedAllocator::LockedAllocator(
   }
 }
 
-void LockedAllocator::FreeImpl(Allocation *allocation) {
+void LockedAllocator::FreeImpl(pten::Allocation *allocation) {
   platform::LockGuardPtr<std::mutex> guard(mtx_);
   underlying_allocator_->Free(allocation);
 }
 
-Allocation *LockedAllocator::AllocateImpl(size_t size) {
+pten::Allocation *LockedAllocator::AllocateImpl(size_t size) {
   platform::LockGuardPtr<std::mutex> guard(mtx_);
   return underlying_allocator_->Allocate(size).release();
 }
diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h
index 1b8418bc8494a270645935f13dd3c8ea62849ed2..d17c8b24e27bdb155314264abc070401127d68ca 100644
--- a/paddle/fluid/memory/allocation/locked_allocator.h
+++ b/paddle/fluid/memory/allocation/locked_allocator.h
@@ -29,8 +29,8 @@ class LockedAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
index 8710bbe6ce98bfb0d6e2f141359ecafc6ee22689..ffe7ccf9190bebeeaf014310031ed026b9af4ba8 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -790,7 +790,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
 
 namespace allocation {
 
-Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
+pten::Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
   void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
   auto *tmp_alloc = new Allocation(ptr, size, place_);
   platform::MemEvenRecorder::Instance().PushMemRecord(
@@ -798,7 +798,7 @@ Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
   return tmp_alloc;
 }
 
-void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
+void NaiveBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
   boost::apply_visitor(
       legacy::FreeVisitor(allocation->ptr(), allocation->size()),
       allocation->place());
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
index 474a308a064fdbc6d8e181fec0bf4aababed13e0..b7b3647ff98c157c743a34b8e7fb5bdfd3e00432 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
@@ -34,8 +34,8 @@ class NaiveBestFitAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override;
-  void FreeImpl(Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
   uint64_t ReleaseImpl(const platform::Place &place) override;
 
  private:
diff --git a/paddle/fluid/memory/allocation/npu_allocator.cc b/paddle/fluid/memory/allocation/npu_allocator.cc
index 074a900cf54636908d057fb2a66d9e36ee0694dd..d9fa7ec27fdde6ae1c579ba808425a6bd901fe78 100644
--- a/paddle/fluid/memory/allocation/npu_allocator.cc
+++ b/paddle/fluid/memory/allocation/npu_allocator.cc
@@ -22,7 +22,7 @@ namespace memory {
 namespace allocation {
 
 bool NPUAllocator::IsAllocThreadSafe() const { return true; }
-void NPUAllocator::FreeImpl(Allocation* allocation) {
+void NPUAllocator::FreeImpl(pten::Allocation* allocation) {
   PADDLE_ENFORCE_EQ(
       BOOST_GET_CONST(platform::NPUPlace, allocation->place()), place_,
       platform::errors::PermissionDenied(
@@ -32,7 +32,7 @@ void NPUAllocator::FreeImpl(Allocation* allocation) {
   delete allocation;
 }
 
-Allocation* NPUAllocator::AllocateImpl(size_t size) {
+pten::Allocation* NPUAllocator::AllocateImpl(size_t size) {
   std::call_once(once_flag_,
                  [this] { platform::SetNPUDeviceId(place_.device); });
 
diff --git a/paddle/fluid/memory/allocation/npu_allocator.h b/paddle/fluid/memory/allocation/npu_allocator.h
index bf668973505bab0b00b2da6111709e27236ffea6..88b0c9a24bb3d1b836a6ff356ff6284acf1aa8f9 100644
--- a/paddle/fluid/memory/allocation/npu_allocator.h
+++ b/paddle/fluid/memory/allocation/npu_allocator.h
@@ -28,8 +28,8 @@ class NPUAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
 
  private:
   platform::NPUPlace place_;
diff --git a/paddle/fluid/memory/allocation/npu_pinned_allocator.cc b/paddle/fluid/memory/allocation/npu_pinned_allocator.cc
index 292fe15c5d952fd68e38cb8d9beb3f98d80dd45e..2389973fa9b886eedc9ea992493289abca0a480a 100644
--- a/paddle/fluid/memory/allocation/npu_pinned_allocator.cc
+++ b/paddle/fluid/memory/allocation/npu_pinned_allocator.cc
@@ -26,7 +26,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
     platform::NPUEventQuery(event, &status);
 
     if (status == ACL_EVENT_STATUS_COMPLETE) {
-      Allocation *allocation = it->first;
+      auto *allocation = it->first;
       void *ptr = allocation->ptr();
       free(ptr);
       npu_events_.erase(it++);
@@ -38,7 +38,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
   }
 }
 
-Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
+pten::Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
   std::lock_guard<std::mutex> lock(mtx_);
   ProcessEventsAndFree();
   void *ptr;
@@ -50,7 +50,7 @@ Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
   return new Allocation(ptr, size, platform::NPUPinnedPlace());
 }
 
-void NPUPinnedAllocator::FreeImpl(Allocation *allocation) {
+void NPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
   std::lock_guard<std::mutex> lock(mtx_);
   void *ptr = allocation->ptr();
   auto iter = npu_events_.find(allocation);
@@ -83,7 +83,7 @@ uint64_t NPUPinnedAllocator::ReleaseImpl(const platform::Place &place) {
   return static_cast<uint64_t>(0);
 }
 
-void NPUPinnedAllocator::RecordEvent(Allocation *allocation,
+void NPUPinnedAllocator::RecordEvent(pten::Allocation *allocation,
                                      aclrtStream stream) {
   std::lock_guard<std::mutex> lock(mtx_);
   aclrtEvent event = nullptr;
diff --git a/paddle/fluid/memory/allocation/npu_pinned_allocator.h b/paddle/fluid/memory/allocation/npu_pinned_allocator.h
index 1d3f8bf1e449d7246301b0d99814aedca52a8fc7..716b12eea15f84836a43cf3365116a7f26a88b3f 100644
--- a/paddle/fluid/memory/allocation/npu_pinned_allocator.h
+++ b/paddle/fluid/memory/allocation/npu_pinned_allocator.h
@@ -32,16 +32,16 @@ class NPUPinnedAllocator : public Allocator {
  public:
   bool IsAllocThreadSafe() const override { return true; }
   void ProcessEventsAndFree();
-  void RecordEvent(Allocation *allocation, aclrtStream stream);
+  void RecordEvent(pten::Allocation *allocation, aclrtStream stream);
   constexpr static size_t kAlignment = 4096UL;
 
  protected:
-  Allocation *AllocateImpl(size_t size) override;
-  void FreeImpl(Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
   uint64_t ReleaseImpl(const platform::Place &place) override;
 
  private:
-  std::unordered_map<Allocation *, aclrtEvent> npu_events_;
+  std::unordered_map<pten::Allocation *, aclrtEvent> npu_events_;
   mutable std::mutex mtx_;
 };
 
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc
index c56a7235c109ca0ab3210a0fa1e9e21fce7355c3..f1175fc4374e77000b2c24714d6552684ea6fb8b 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.cc
+++ b/paddle/fluid/memory/allocation/pinned_allocator.cc
@@ -18,7 +18,7 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
-void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
+void CPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
 #ifdef PADDLE_WITH_HIP
   PADDLE_ENFORCE_GPU_SUCCESS(hipHostFree(allocation->ptr()));
 #else
@@ -26,7 +26,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
 #endif
   delete allocation;
 }
-Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
+pten::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
   void *ptr;
 #ifdef PADDLE_WITH_HIP
   PADDLE_ENFORCE_GPU_SUCCESS(hipHostMalloc(&ptr, size, hipHostMallocPortable));
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h
index 4f535ef33734a3c6f7048ae6538e4332e0c9e8e4..800e3ff3bb2e39af623d444c6dfa7f12f55d5521 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.h
+++ b/paddle/fluid/memory/allocation/pinned_allocator.h
@@ -25,8 +25,8 @@ class CPUPinnedAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  void FreeImpl(Allocation *allocation) override;
-  Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
 };
 
 }  // namespace allocation
diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc
index 1607af3808b434629fad47657a6b50c2a1b10028..856b6c2e9a2b084c47204837a5c859b9edcdad63 100644
--- a/paddle/fluid/memory/allocation/retry_allocator.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator.cc
@@ -39,7 +39,7 @@ class WaitedAllocateSizeGuard {
   size_t requested_size_;
 };
 
-void RetryAllocator::FreeImpl(Allocation* allocation) {
+void RetryAllocator::FreeImpl(pten::Allocation* allocation) {
   // Delete underlying allocation first.
   size_t size = allocation->size();
   underlying_allocator_->Free(allocation);
@@ -51,7 +51,7 @@ void RetryAllocator::FreeImpl(Allocation* allocation) {
   }
 }
 
-Allocation* RetryAllocator::AllocateImpl(size_t size) {
+pten::Allocation* RetryAllocator::AllocateImpl(size_t size) {
   auto alloc_func = [&, this]() {
     return underlying_allocator_->Allocate(size).release();
   };
diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h
index 031a5e2b97f178eb002b578532bb1c615f29caf8..b427a37907a671f9dd86888ac6ed50655da9af25 100644
--- a/paddle/fluid/memory/allocation/retry_allocator.h
+++ b/paddle/fluid/memory/allocation/retry_allocator.h
@@ -45,8 +45,8 @@ class RetryAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  void FreeImpl(Allocation* allocation) override;
-  Allocation* AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation* allocation) override;
+  pten::Allocation* AllocateImpl(size_t size) override;
   uint64_t ReleaseImpl(const platform::Place& place) override {
     return underlying_allocator_->Release(place);
   }
diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc
index 787f3d9dca377d54d057241d4e51ff4164c294cc..d636c73e07a18d08ef10ebd2f77601c399fd1464 100644
--- a/paddle/fluid/memory/allocation/retry_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc
@@ -98,12 +98,12 @@ class DummyAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
         "Here is a test exception, always BadAlloc."));
   }
 
-  void FreeImpl(Allocation *) override {}
+  void FreeImpl(pten::Allocation *) override {}
 };
 
 TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc
index a4f766f1d1abc66ccd39d493e8c4abc591258e8d..05c6a7adaff8b3904e407354ece6e907d4b4ac5e 100644
--- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc
@@ -19,7 +19,7 @@ namespace memory {
 namespace allocation {
 
 StreamSafeCUDAAllocation::StreamSafeCUDAAllocation(
-    AllocationPtr underlying_allocation, gpuStream_t owning_stream)
+    DecoratedAllocationPtr underlying_allocation, gpuStream_t owning_stream)
     : Allocation(underlying_allocation->ptr(),
                  underlying_allocation->base_ptr(),
                  underlying_allocation->size(), underlying_allocation->place()),
@@ -116,7 +116,7 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() {
 
 bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; }
 
-Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
+pten::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
   ProcessUnfreedAllocations();
   VLOG(8) << "Try allocate " << size << " bytes";
   AllocationPtr underlying_allocation;
@@ -136,13 +136,14 @@ Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
     throw;
   }
   StreamSafeCUDAAllocation* allocation = new StreamSafeCUDAAllocation(
-      std::move(underlying_allocation), default_stream_);
+      static_unique_ptr_cast<Allocation>(std::move(underlying_allocation)),
+      default_stream_);
   VLOG(8) << "Allocate " << allocation->size() << " bytes at address "
           << allocation->ptr();
   return allocation;
 }
 
-void StreamSafeCUDAAllocator::FreeImpl(Allocation* allocation) {
+void StreamSafeCUDAAllocator::FreeImpl(pten::Allocation* allocation) {
   StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
       dynamic_cast<StreamSafeCUDAAllocation*>(allocation);
   PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h
index d84994f58a9c40e7bc2f4adc64a01ca667104382..f54cdc749611a6e951beb74e9e0cc044ee212e92 100644
--- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h
+++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h
@@ -34,7 +34,7 @@ namespace allocation {
 
 class StreamSafeCUDAAllocation : public Allocation {
  public:
-  StreamSafeCUDAAllocation(AllocationPtr underlying_allocation,
+  StreamSafeCUDAAllocation(DecoratedAllocationPtr underlying_allocation,
                            gpuStream_t owning_stream);
   void RecordStream(const gpuStream_t &stream);
   bool CanBeFreed();
@@ -42,7 +42,7 @@ class StreamSafeCUDAAllocation : public Allocation {
   const gpuStream_t &GetOwningStream() const;
 
  private:
-  AllocationPtr underlying_allocation_;
+  DecoratedAllocationPtr underlying_allocation_;
   std::map<gpuStream_t, gpuEvent_t> outstanding_event_map_;
   gpuStream_t owning_stream_;
   SpinLock outstanding_event_map_lock_;
@@ -57,8 +57,8 @@ class StreamSafeCUDAAllocator : public Allocator {
   bool IsAllocThreadSafe() const override;
 
  protected:
-  Allocation *AllocateImpl(size_t size) override;
-  void FreeImpl(Allocation *allocation) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
+  void FreeImpl(pten::Allocation *allocation) override;
   uint64_t ReleaseImpl(const platform::Place &place) override;
 
  private:
diff --git a/paddle/fluid/memory/allocation/test_aligned_allocator.cc b/paddle/fluid/memory/allocation/test_aligned_allocator.cc
index 3eb1f140edd8471e32fbb68c24519d504e13e08c..987c7ea772d23af763960f4092bbb4fc45aad310 100644
--- a/paddle/fluid/memory/allocation/test_aligned_allocator.cc
+++ b/paddle/fluid/memory/allocation/test_aligned_allocator.cc
@@ -32,12 +32,12 @@ struct StubAllocator : public Allocator {
   size_t AllocNum() const { return alloc_num_; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     ++alloc_num_;
     return new Allocation(new uint8_t[size], size, platform::CPUPlace());
   }
 
-  void FreeImpl(Allocation *allocation) override {
+  void FreeImpl(pten::Allocation *allocation) override {
     delete[] static_cast<uint8_t *>(allocation->ptr());
     delete allocation;
     --alloc_num_;
diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.h b/paddle/fluid/memory/allocation/thread_local_allocator.h
index c55f579981b00501e830257d7097a06f7c623fb7..9c9306517021a0a9a55547b2be8c42dfc50e841e 100644
--- a/paddle/fluid/memory/allocation/thread_local_allocator.h
+++ b/paddle/fluid/memory/allocation/thread_local_allocator.h
@@ -83,11 +83,11 @@ class ThreadLocalCUDAAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation* AllocateImpl(size_t size) override {
+  pten::Allocation* AllocateImpl(size_t size) override {
     return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->AllocateImpl(
         size);
   }
-  void FreeImpl(Allocation* allocation) override {
+  void FreeImpl(pten::Allocation* allocation) override {
     auto* tl_allocation = static_cast<ThreadLocalAllocation*>(allocation);
     auto allocator_impl = tl_allocation->GetAllocator();
     allocator_impl->FreeImpl(tl_allocation);
diff --git a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc
index 5c7e8e2d933f30c6ab247801459678a2b2a42e9b..face27debe9ff10b1967533a7d3afa4286183cf7 100644
--- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc
@@ -35,7 +35,8 @@ VirtualMemoryAutoGrowthBestFitAllocator::
       alignment_(alignment),
       place_(place) {}
 
-Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
+pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(
+    size_t size) {
   std::lock_guard<SpinLock> guard(spinlock_);
   size = AlignedSize(size, alignment_);
   auto result = AllocFromFreeBlocks(size);
@@ -48,7 +49,8 @@ Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
   return result;
 }
 
-void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
+void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(
+    pten::Allocation *allocation) {
   std::lock_guard<SpinLock> guard(spinlock_);
   auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_;
   TryMergeBlock2Blocks(block_it);
@@ -225,7 +227,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
   }
 }
 
-Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
+pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
     size_t size) {
   auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
   if (iter != free_blocks_.end()) {
diff --git a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h
index 5171e5b3cd1bf3111c003f453582fc3d02d06f45..10bf0bbf49d5a075cfb0fbef22ef9109604d7d18 100644
--- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h
@@ -60,12 +60,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
   bool IsAllocThreadSafe() const override { return true; }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override;
+  pten::Allocation *AllocateImpl(size_t size) override;
 
-  void FreeImpl(Allocation *allocation) override;
+  void FreeImpl(pten::Allocation *allocation) override;
 
  private:
-  Allocation *AllocFromFreeBlocks(size_t size);
+  pten::Allocation *AllocFromFreeBlocks(size_t size);
   void ExtendAndMerge(size_t size);
   void TryMergeBlock2Blocks(std::list<Block>::iterator iter);
 
diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h
index 7069fb46203d6d0d96be51e556806e467d019ba0..8830c46a1779846503c039732f9fb1c9f316db52 100644
--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -28,7 +28,7 @@ class DeviceContext;
 
 namespace memory {
 
-using allocation::Allocation;
+using pten::Allocation;
 using allocation::Allocator;
 using allocation::AllocationPtr;
 
diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu
index bc2d496a3e76a8fa620dcf17a0cb4818516ab302..6892f7ce4e50348978dae30857a485f380030878 100644
--- a/paddle/fluid/operators/math/concat_and_split.cu
+++ b/paddle/fluid/operators/math/concat_and_split.cu
@@ -336,9 +336,8 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
     auto* data_alloc_released = data_alloc.release();
     auto* col_alloc_released = col_alloc.release();
     context.AddStreamCallback([data_alloc_released, col_alloc_released] {
-      memory::allocation::AllocationDeleter deleter;
-      deleter(data_alloc_released);
-      deleter(col_alloc_released);
+      memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
+      memory::allocation::Allocator::AllocationDeleter(col_alloc_released);
     });
 #endif
   }
@@ -466,9 +465,8 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
     auto* data_alloc_released = data_alloc.release();
     auto* cols_alloc_released = cols_alloc.release();
     context.AddStreamCallback([data_alloc_released, cols_alloc_released] {
-      memory::allocation::AllocationDeleter deleter;
-      deleter(data_alloc_released);
-      deleter(cols_alloc_released);
+      memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
+      memory::allocation::Allocator::AllocationDeleter(cols_alloc_released);
     });
 #endif
   }
diff --git a/paddle/fluid/platform/device/mlu/device_context_allocator.h b/paddle/fluid/platform/device/mlu/device_context_allocator.h
index 9deab92af5cd6d31121637202215a3008d0c594c..408016c0f0d99c15ea5e9ea1516bc69c9c2a9bdc 100644
--- a/paddle/fluid/platform/device/mlu/device_context_allocator.h
+++ b/paddle/fluid/platform/device/mlu/device_context_allocator.h
@@ -55,7 +55,7 @@ class MLUDeviceContextAllocation : public Allocation {
             << p_allocation;
     dev_ctx_->AddStreamCallback([p_allocation] {
       VLOG(4) << "Delete MLUDeviceContextAllocation at " << p_allocation;
-      AllocationDeleter()(p_allocation);
+      Allocator::AllocationDeleter(p_allocation);
     });
   }
 
@@ -91,7 +91,7 @@ class MLUDeviceContextAllocator : public Allocator {
   }
 
  protected:
-  Allocation *AllocateImpl(size_t size) override {
+  pten::Allocation *AllocateImpl(size_t size) override {
     PADDLE_ENFORCE_NOT_NULL(
         default_stream_,
         platform::errors::PreconditionNotMet(
@@ -105,7 +105,7 @@ class MLUDeviceContextAllocator : public Allocator {
     return allocation;
   }
 
-  void FreeImpl(Allocation *allocation) override { delete allocation; }
+  void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
 
  private:
   platform::MLUPlace place_;
diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.h b/paddle/fluid/platform/device/npu/npu_op_runner.h
index e83057e682fef2fff739484e77037bef168c5657..c049da3b335668ef12940ba7c61cd7cd0ea17645 100644
--- a/paddle/fluid/platform/device/npu/npu_op_runner.h
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.h
@@ -158,8 +158,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
             paddle::memory::allocation::AllocatorFacade::Instance()
                 .GetAllocator(npu_pinned_place)
                 .get());
-    paddle::memory::allocation::Allocation *allocation =
-        npu_pinned_tensor.Holder().get();
+    pten::Allocation *allocation = npu_pinned_tensor.Holder().get();
 
     npu_pinned_allocator->RecordEvent(allocation, GetCurrentNPUStream());
   } else {
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 3f8923440be50220cd9087459e758522c0bc4b9a..659df6b9b44def9b0c5d4737e96b3de3aca98f73 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -53,7 +53,7 @@ size_t PyArray_Size_(PyObject* numpy_data) {
   return res;
 }
 
-class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
+class EagerNumpyAllocation : public pten::Allocation {
  public:
   explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
       : Allocation(
diff --git a/paddle/pten/api/lib/utils/CMakeLists.txt b/paddle/pten/api/lib/utils/CMakeLists.txt
index 4a44ad7758b56e3f0131600ea981bba23bf7b004..a4db8c4b193b64f111a019fe21bec6895610e65e 100644
--- a/paddle/pten/api/lib/utils/CMakeLists.txt
+++ b/paddle/pten/api/lib/utils/CMakeLists.txt
@@ -1,2 +1,2 @@
-cc_library(pten_api_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS
+cc_library(pten_api_utils SRCS storage.cc tensor_utils.cc DEPS
 tensor_base convert_utils dense_tensor lod_tensor selected_rows place var_type_traits)
diff --git a/paddle/pten/api/lib/utils/allocator.cc b/paddle/pten/api/lib/utils/allocator.cc
deleted file mode 100644
index e80152431e71292fcd34c96108c191f8c799a220..0000000000000000000000000000000000000000
--- a/paddle/pten/api/lib/utils/allocator.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/pten/api/lib/utils/allocator.h"
-
-namespace paddle {
-namespace experimental {
-
-memory::Allocator::AllocationDeleter DefaultAllocator::deleter_;
-
-}  // namespace experimental
-}  // namespace paddle
diff --git a/paddle/pten/api/lib/utils/allocator.h b/paddle/pten/api/lib/utils/allocator.h
index 4f5a810e400ce471ae05b4fef8a6b25a3465ab23..a8c05b76516892786a118f55763bc43e3d7f8742 100644
--- a/paddle/pten/api/lib/utils/allocator.h
+++ b/paddle/pten/api/lib/utils/allocator.h
@@ -22,14 +22,15 @@ limitations under the License. */
 namespace paddle {
 namespace experimental {
 
-class DefaultAllocator : public pten::Allocator {
+class DefaultAllocator : public pten::deprecated::Allocator {
  public:
-  using Allocation = pten::Allocation;
+  using Allocation = pten::deprecated::Allocation;
   explicit DefaultAllocator(const paddle::platform::Place& place)
       : place_(place) {}
 
   static void Delete(Allocation* allocation) {
-    deleter_(allocation->CastContextWithoutCheck<paddle::memory::Allocation>());
+    paddle::memory::allocation::Allocator::AllocationDeleter(
+        allocation->CastContextWithoutCheck<paddle::memory::Allocation>());
   }
 
   Allocation Allocate(size_t bytes_size) override {
@@ -42,7 +43,6 @@ class DefaultAllocator : public pten::Allocator {
 
  private:
   paddle::platform::Place place_;
-  static paddle::memory::Allocator::AllocationDeleter deleter_;
 };
 
 }  // namespace experimental
diff --git a/paddle/pten/api/lib/utils/storage.cc b/paddle/pten/api/lib/utils/storage.cc
index 9ee1b9e5b7f92ee4a709ffcb2eafafb6c6bca4ac..6116a709d506560285d33fde16a9b774972f9c69 100644
--- a/paddle/pten/api/lib/utils/storage.cc
+++ b/paddle/pten/api/lib/utils/storage.cc
@@ -20,14 +20,13 @@ namespace experimental {
 ExternalStorage::ExternalStorage(void* ptr,
                                  size_t size,
                                  const paddle::platform::Place& place)
-    : pten::Storage(
-          std::make_shared<paddle::memory::Allocation>(ptr, size, place)),
+    : pten::Storage(std::make_shared<pten::Allocation>(ptr, size, place)),
       size_(size) {}
 
 ExternalStorage::ExternalStorage(const pten::intrusive_ptr<pten::Storage>& root,
                                  size_t delta,
                                  size_t size)
-    : Storage(std::make_shared<paddle::memory::Allocation>(
+    : Storage(std::make_shared<pten::Allocation>(
           static_cast<uint8_t*>(root->data()) + delta, size, root->place())),
       size_(size) {
   PADDLE_ENFORCE_LE(static_cast<size_t>(delta + size),
diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc
index 69a1fc274a28db254db90b9e87bb5fe0d42b8bf6..0b6cb8d95cc1a5721caef9de05b97573e195d6bd 100644
--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
@@ -307,7 +307,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
   dst->Resize(src->dims());
   dst->set_type(pten::TransToProtoVarType(src->dtype()));
   auto storage = src->release();
-  std::shared_ptr<paddle::memory::allocation::Allocation> holder(
+  std::shared_ptr<pten::Allocation> holder(
       new TensorStorage(std::move(storage)));
   dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
   dst->set_offset(src->meta().offset);
diff --git a/paddle/pten/core/allocator.h b/paddle/pten/core/allocator.h
index 74455be1368343306e3d0a4bc7d1581ab2b210c0..2647490c9f58ba9f4e905654c70e5a30a73cdebc 100644
--- a/paddle/pten/core/allocator.h
+++ b/paddle/pten/core/allocator.h
@@ -16,8 +16,10 @@ limitations under the License. */
 
 #include <cstdint>
 #include "paddle/fluid/platform/place.h"
+#include "paddle/pten/core/candidate/allocator.h"
 
 namespace pten {
+namespace deprecated {
 
 /// \brief Encapsulates strategies for access/addressing, allocation/
 /// deallocation and construction/destruction of objects.
@@ -147,4 +149,5 @@ inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) {
   return a->Allocate(n);
 }
 
+}  // namespace deprecated
 }  // namespace pten
diff --git a/paddle/pten/core/candidate/allocator.h b/paddle/pten/core/candidate/allocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..75d42c4fd15cb13e10c86dd1f1b42700a53b83bd
--- /dev/null
+++ b/paddle/pten/core/candidate/allocator.h
@@ -0,0 +1,107 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include "paddle/fluid/platform/place.h"
+
+namespace pten {
+
+/// \brief Fancy pointer with deleter. The use of this data type
+/// is to be compatible with allocators from different frameworks
+/// without significant performance loss. This class does not
+/// support being inherited.
+class Allocation {
+ public:
+  using Place = paddle::platform::Place;
+  using DeleterFnPtr = void (*)(Allocation*);
+
+  Allocation() = default;
+
+  // Don't own resources, only provide access.
+  Allocation(void* data, size_t size, const Place& place)
+      : ptr_(data), size_(size), place_(place) {}
+
+  // Own resources.
+  Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
+      : ptr_(data), size_(size), deleter_(deleter), place_(place) {}
+
+  Allocation(Allocation&& other) noexcept { swap(*this, other); }
+  Allocation& operator=(Allocation&& other) noexcept {
+    // Exchange them explicitly to avoid moving is equivalent
+    // to copying.
+    swap(*this, other);
+    return *this;
+  }
+
+  virtual ~Allocation() {
+    if (deleter_) {
+      deleter_(this);
+    }
+  }
+
+  // Returns the holding pointer.
+  // NOTE: For performance consideration, it is better not to make this method
+  // as a virtual method. If we want to implement a `defragmentation` later,
+  // we might need to make `ptr_` field as a protected field, and add a virtual
+  // method like `defragmentation` to change `ptr_`.
+  void* ptr() const noexcept { return ptr_; }
+
+  // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
+  // last valid element.
+  //
+  // NOTE: Some allocator might alloc more memory than request. The size
+  // could larger than its request. For example,
+  //    the AlignedAllocator will always allocate memory as size + kAlignment.
+  //    The raw pointer might not aligned, so an offset might be added to raw
+  //    the pointer. The size of this allocation will be
+  //    `size + kAlignemnt - offset`.
+  size_t size() const noexcept { return size_; }
+
+  void* operator->() const noexcept { return ptr_; }
+  operator bool() const noexcept { return ptr_; }
+  const Place& place() const noexcept { return place_; }
+  DeleterFnPtr deleter() const noexcept { return deleter_; }
+
+ protected:
+  friend void swap(Allocation& a, Allocation& b) noexcept;
+  void* ptr_{nullptr};
+  size_t size_{};
+  DeleterFnPtr deleter_{nullptr};
+  // TODO(Shixiaowei02): Enum needs to be used instead to reduce
+  // the construction overhead by more than 50%.
+  Place place_;
+};
+
+inline void swap(Allocation& a, Allocation& b) noexcept {
+  ::std::swap(a.ptr_, b.ptr_);
+  ::std::swap(a.deleter_, b.deleter_);
+  ::std::swap(a.place_, b.place_);
+  ::std::swap(a.size_, b.size_);
+}
+
+class Allocator {
+ public:
+  using DeleterType = std::function<void(Allocation*)>;
+  using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
+
+  virtual ~Allocator() = default;
+  virtual AllocationPtr Allocate(size_t bytes_size) = 0;
+
+  virtual bool IsAllocThreadSafe() const { return false; }
+};
+
+}  // namespace pten
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
index 1502accd197be6fddd1d0849e7373bebea7adf8b..1802a2461158fc9952857cce7d48ba42229405ed 100644
--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -60,6 +60,8 @@ class TensorInplaceVersion {
 class DenseTensor : public TensorBase,
                     public TypeInfoTraits<TensorBase, DenseTensor> {
  public:
+  using Allocator = deprecated::Allocator;
+
   /// \brief Construct a dense tensor and allocate space.
   /// \param a The allocator used to allocate space.
   /// \param meta The meta data of dense tensor.
diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h
index fc56935eeaf193398cb07df1b64524bc84291524..cf18dd913093a1f9e46ac67d78e03ed9c0a1b78b 100644
--- a/paddle/pten/core/storage.h
+++ b/paddle/pten/core/storage.h
@@ -91,6 +91,7 @@ class Storage : public intrusive_ref_counter<Storage> {
 class TensorStorage : public Storage {
  public:
   using Place = paddle::platform::Place;
+  using Allocator = deprecated::Allocator;
 
   explicit TensorStorage(const std::shared_ptr<Allocator>& a) : alloc_(a) {}
 
diff --git a/paddle/pten/tests/core/allocator.h b/paddle/pten/tests/core/allocator.h
index 094c0e8437d9811709972b3482703bbefdc4e54e..c2c74e1aacf1f2cb253e434385f8c64aed52d89d 100644
--- a/paddle/pten/tests/core/allocator.h
+++ b/paddle/pten/tests/core/allocator.h
@@ -21,7 +21,7 @@ limitations under the License. */
 namespace pten {
 namespace tests {
 
-class HostAllocatorSample : public pten::RawAllocator {
+class HostAllocatorSample : public pten::deprecated::RawAllocator {
  public:
   using Place = paddle::platform::Place;
   void* Allocate(size_t bytes_size) override {
@@ -36,8 +36,9 @@ class HostAllocatorSample : public pten::RawAllocator {
   Place place_{paddle::platform::CPUPlace()};
 };
 
-class FancyAllocator : public pten::Allocator {
+class FancyAllocator : public pten::deprecated::Allocator {
  public:
+  using Allocation = pten::deprecated::Allocation;
   static void Delete(Allocation* allocation) {
     ::operator delete(allocation->ptr());
   }
@@ -55,7 +56,7 @@ class FancyAllocator : public pten::Allocator {
 template <typename T>
 struct CustomAllocator {
   using value_type = T;
-  using Allocator = pten::RawAllocator;
+  using Allocator = pten::deprecated::RawAllocator;
 
   explicit CustomAllocator(const std::shared_ptr<Allocator>& a) noexcept
       : alloc_(a) {}
diff --git a/paddle/pten/tests/core/test_allocator.cc b/paddle/pten/tests/core/test_allocator.cc
index c509d8bd20a01243eda2bb7be13166ca8d540d5b..94ba9a1e1b9a23711caf3d6e40db0cdce00dec93 100644
--- a/paddle/pten/tests/core/test_allocator.cc
+++ b/paddle/pten/tests/core/test_allocator.cc
@@ -24,6 +24,10 @@ limitations under the License. */
 namespace pten {
 namespace tests {
 
+using RawAllocator = pten::deprecated::RawAllocator;
+using Allocator = pten::deprecated::Allocator;
+using Allocation = pten::deprecated::Allocation;
+
 template <typename T>
 bool host_allocator_test(size_t vector_size) {
   std::vector<T> src(vector_size);
diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh
index e0ae6008198736aeae3cb184f6007921e2e924d9..caacecf446a82344192a4f7cd2c0e127ea1a94a1 100644
--- a/tools/check_file_diff_approvals.sh
+++ b/tools/check_file_diff_approvals.sh
@@ -226,7 +226,7 @@ if [ "${HAS_MODIFIED_DEMO_CMAKE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
 HAS_MODIFIED_ALLOCATION=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/memory/allocation" || true`
 if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
     echo_line="You must be approved by zhiqiu and Shixiaowei02 for paddle/fluid/memory/allocation.\nIt is being modularized and refactored. Thanks!\n"
-    check_approval 2 6888866 39303645
+    check_approval 1 6888866 39303645
   fi
 
 HAS_MODIFIED_TENSOR=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/framework/tensor" || true`
@@ -241,23 +241,6 @@ if [ "${HAS_MODIFIED_TENSOR}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
     check_approval 1 22561442 22334008
   fi
 
-ALLOCSHARED_FILE_CHANGED=`git diff --name-only --diff-filter=AM upstream/$BRANCH |grep -E "*\.(h|cc)" || true`
-if [ "${ALLOCSHARED_FILE_CHANGED}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
-    ERROR_LINES=""
-    for TEST_FILE in ${ALLOCSHARED_FILE_CHANGED};
-    do
-        HAS_SKIP_CHECK_ALLOC_CI=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${TEST_FILE} |grep "AllocShared" || true`
-        if [ "${HAS_SKIP_CHECK_ALLOC_CI}" != "" ]; then
-            ERROR_LINES="${ERROR_LINES}\n${TEST_FILE}\n${HAS_SKIP_CHECK_ALLOC_CI}\n"
-        fi
-    done
-    if [ "${ERROR_LINES}" != "" ]; then
-        ERROR_LINES=${ERROR_LINES//+/'\n+\t'}
-        echo_line="memory::AllocShared is not recommended, because it is being modularized and refactored. Please use memory::Alloc here. Otherwise, please request zhiqiu and Shixiaowei02 review and approve.\n"
-        check_approval 2 6888866 39303645
-    fi
-fi
-
 ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true`
 if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
     echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n"