Update memory release interface. (#28456)

ced5c40c · Wilber · GitHub · 7821759d · ced5c40c · ced5c40c
14 changed file
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -178,7 +178,9 @@ class Allocator {
    FreeImpl(allocation);
  }
-  inline void Release(const platform::Place& place) { ReleaseImpl(place); }
+  inline uint64_t Release(const platform::Place& place) {
+    return ReleaseImpl(place);
+  }
  // True if the `Allocate` is thread safe.
  virtual bool IsAllocThreadSafe() const;
@@ -186,7 +188,7 @@ class Allocator {
 protected:
  virtual Allocation* AllocateImpl(size_t size) = 0;
  virtual void FreeImpl(Allocation* allocation);
-  virtual void ReleaseImpl(const platform::Place& place) {}
+  virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
 };
 using AllocationDeleter = Allocator::AllocationDeleter;

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -287,8 +287,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
  return m_->GetAllocator(place, size)->Allocate(size);
 }
-void AllocatorFacade::Release(const platform::Place& place) {
+uint64_t AllocatorFacade::Release(const platform::Place& place) {
-  m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
+  return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
      ->Release(place);
 }

--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -45,7 +45,7 @@ class AllocatorFacade {
  AllocationPtr Alloc(const platform::Place& place, size_t size);
  // Release unused memory pool.
-  void Release(const platform::Place& place);
+  uint64_t Release(const platform::Place& place);
  // TODO(yy): Allocate a Copy-On-Write allocation?
 private:

--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
@@ -138,18 +138,21 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
  }
 }
-void AutoGrowthBestFitAllocator::FreeIdleChunks() {
+uint64_t AutoGrowthBestFitAllocator::FreeIdleChunks() {
+  uint64_t bytes = 0;
  for (auto chunk_it = chunks_.begin(); chunk_it != chunks_.end();) {
    auto &blocks = chunk_it->blocks_;
    if (blocks.size() == 1 && blocks.begin()->is_free_) {
      auto &block = *blocks.begin();
      VLOG(2) << "Free chunk with size " << block.size_;
+      bytes += block.size_;
      free_blocks_.erase(std::make_pair(block.size_, block.ptr_));
      chunk_it = chunks_.erase(chunk_it);
    } else {
      ++chunk_it;
    }
  }
+  return bytes;
 }
 }  // namespace allocation

--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
@@ -40,10 +40,12 @@ class AutoGrowthBestFitAllocator : public Allocator {
  void FreeImpl(Allocation *allocation) override;
  // Release the memory block which is not used in pool.
-  void ReleaseImpl(const platform::Place &place) override { FreeIdleChunks(); }
+  uint64_t ReleaseImpl(const platform::Place &place) override {
+    return FreeIdleChunks();
+  }
 private:
-  void FreeIdleChunks();
+  uint64_t FreeIdleChunks();
  template <typename T>
  using List = std::list<T>;

--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -54,7 +54,7 @@ template <typename Place>
 void Free(const Place &place, void *p, size_t size);
 template <typename Place>
-void Release(const Place &place);
+uint64_t Release(const Place &place);
 template <typename Place>
 size_t Used(const Place &place);
@@ -103,8 +103,8 @@ void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p,
 }
 template <>
-void Release<platform::CPUPlace>(const platform::CPUPlace &place) {
+uint64_t Release<platform::CPUPlace>(const platform::CPUPlace &place) {
-  GetCPUBuddyAllocator()->Release();
+  return GetCPUBuddyAllocator()->Release();
 }
 template <>
@@ -195,7 +195,7 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
 }
 template <>
-void Release<platform::XPUPlace>(const platform::XPUPlace &place) {
+uint64_t Release<platform::XPUPlace>(const platform::XPUPlace &place) {
 #ifdef PADDLE_WITH_XPU
  PADDLE_THROW(
      platform::errors::PermissionDenied("Release XPU pool is not supported."));
@@ -333,9 +333,9 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
 }
 template <>
-void Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
+uint64_t Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
 #ifdef PADDLE_WITH_CUDA
-  GetGPUBuddyAllocator(place.device)->Release();
+  return GetGPUBuddyAllocator(place.device)->Release();
 #else
  PADDLE_THROW(platform::errors::PermissionDenied(
      "'CUDAPlace' is not supported in CPU only device."));
@@ -401,10 +401,10 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
 }
 template <>
-void Release<platform::CUDAPinnedPlace>(
+uint64_t Release<platform::CUDAPinnedPlace>(
    const platform::CUDAPinnedPlace &place) {
 #ifdef PADDLE_WITH_CUDA
-  GetCUDAPinnedBuddyAllocator()->Release();
+  return GetCUDAPinnedBuddyAllocator()->Release();
 #else
  PADDLE_THROW(platform::errors::PermissionDenied(
      "'CUDAPinnedPlace' is not supported in CPU only device."));
@@ -437,10 +437,10 @@ struct FreeVisitor : public boost::static_visitor<void> {
  size_t size_;
 };
-struct ReleaseVisitor : public boost::static_visitor<void> {
+struct ReleaseVisitor : public boost::static_visitor<uint64_t> {
  template <typename Place>
-  inline void operator()(const Place &place) const {
+  inline uint64_t operator()(const Place &place) const {
-    Release<Place>(place);
+    return Release<Place>(place);
  }
 };
@@ -486,8 +486,8 @@ void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
  delete allocation;
 }
-void NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
+uint64_t NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
-  boost::apply_visitor(legacy::ReleaseVisitor(), place);
+  return boost::apply_visitor(legacy::ReleaseVisitor(), place);
 }
 }  // namespace allocation

--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
@@ -35,7 +35,7 @@ class NaiveBestFitAllocator : public Allocator {
 protected:
  Allocation *AllocateImpl(size_t size) override;
  void FreeImpl(Allocation *allocation) override;
-  void ReleaseImpl(const platform::Place &place) override;
+  uint64_t ReleaseImpl(const platform::Place &place) override;
 private:
  platform::Place place_;

--- a/paddle/fluid/memory/allocation/retry_allocator.h
+++ b/paddle/fluid/memory/allocation/retry_allocator.h
@@ -47,8 +47,8 @@ class RetryAllocator : public Allocator {
 protected:
  void FreeImpl(Allocation* allocation) override;
  Allocation* AllocateImpl(size_t size) override;
-  void ReleaseImpl(const platform::Place& place) override {
+  uint64_t ReleaseImpl(const platform::Place& place) override {
-    underlying_allocator_->Release(place);
+    return underlying_allocator_->Release(place);
  }
 private:

--- a/paddle/fluid/memory/allocation/thread_local_allocator.cc
+++ b/paddle/fluid/memory/allocation/thread_local_allocator.cc
@@ -72,7 +72,9 @@ void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
  delete allocation;
 }
-void ThreadLocalAllocatorImpl::ReleaseImpl() { buddy_allocator_->Release(); }
+uint64_t ThreadLocalAllocatorImpl::ReleaseImpl() {
+  return buddy_allocator_->Release();
+}
 }  // namespace allocation
 }  // namespace memory

--- a/paddle/fluid/memory/allocation/thread_local_allocator.h
+++ b/paddle/fluid/memory/allocation/thread_local_allocator.h
@@ -52,7 +52,7 @@ class ThreadLocalAllocatorImpl
  explicit ThreadLocalAllocatorImpl(const platform::Place& p);
  ThreadLocalAllocation* AllocateImpl(size_t size);
  void FreeImpl(ThreadLocalAllocation* allocation);
-  void ReleaseImpl();
+  uint64_t ReleaseImpl();
 private:
  std::unique_ptr<memory::detail::BuddyAllocator> buddy_allocator_;
@@ -92,7 +92,7 @@ class ThreadLocalCUDAAllocator : public Allocator {
    auto allocator_impl = tl_allocation->GetAllocator();
    allocator_impl->FreeImpl(tl_allocation);
  }
-  void ReleaseImpl(const platform::Place& p) override {
+  uint64_t ReleaseImpl(const platform::Place& p) override {
    return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->ReleaseImpl();
  }

--- a/paddle/fluid/memory/detail/buddy_allocator.cc
+++ b/paddle/fluid/memory/detail/buddy_allocator.cc
@@ -162,7 +162,7 @@ void BuddyAllocator::Free(void* p) {
      IndexSizeAddress(desc->get_index(), desc->get_total_size(), block));
 }
-void BuddyAllocator::Release() {
+uint64_t BuddyAllocator::Release() {
  std::lock_guard<std::mutex> lock(mutex_);
  int num = 0;
  uint64_t bytes = 0;
@@ -193,6 +193,7 @@ void BuddyAllocator::Release() {
    }
  }
  VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes.";
+  return bytes;
 }
 size_t BuddyAllocator::Used() { return total_used_; }

--- a/paddle/fluid/memory/detail/buddy_allocator.h
+++ b/paddle/fluid/memory/detail/buddy_allocator.h
@@ -41,7 +41,7 @@ class BuddyAllocator {
  void* Alloc(size_t unaligned_size);
  void Free(void* ptr);
  // Release the unused memory pool, a real free operation for the OS.
-  void Release();
+  uint64_t Release();
  size_t Used();
  size_t GetMinChunkSize();
  size_t GetMaxChunkSize();

--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -31,7 +31,7 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
  return allocation::AllocatorFacade::Instance().Alloc(place, size);
 }
-void Release(const platform::Place &place) {
+uint64_t Release(const platform::Place &place) {
  return allocation::AllocatorFacade::Instance().Release(place);
 }

--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -38,7 +38,7 @@ extern AllocationPtr Alloc(const platform::Place& place, size_t size);
 extern AllocationPtr Alloc(const platform::DeviceContext& dev_ctx, size_t size);
-extern void Release(const platform::Place& place);
+extern uint64_t Release(const platform::Place& place);
 }  // namespace memory
 }  // namespace paddle