未验证 提交 ced5c40c 编写于 作者: W Wilber 提交者: GitHub

Update memory release interface. (#28456)

上级 7821759d
...@@ -178,7 +178,9 @@ class Allocator { ...@@ -178,7 +178,9 @@ class Allocator {
FreeImpl(allocation); FreeImpl(allocation);
} }
inline void Release(const platform::Place& place) { ReleaseImpl(place); } inline uint64_t Release(const platform::Place& place) {
return ReleaseImpl(place);
}
// True if the `Allocate` is thread safe. // True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
...@@ -186,7 +188,7 @@ class Allocator { ...@@ -186,7 +188,7 @@ class Allocator {
protected: protected:
virtual Allocation* AllocateImpl(size_t size) = 0; virtual Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(Allocation* allocation); virtual void FreeImpl(Allocation* allocation);
virtual void ReleaseImpl(const platform::Place& place) {} virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
}; };
using AllocationDeleter = Allocator::AllocationDeleter; using AllocationDeleter = Allocator::AllocationDeleter;
......
...@@ -287,8 +287,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, ...@@ -287,8 +287,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
return m_->GetAllocator(place, size)->Allocate(size); return m_->GetAllocator(place, size)->Allocate(size);
} }
void AllocatorFacade::Release(const platform::Place& place) { uint64_t AllocatorFacade::Release(const platform::Place& place) {
m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1) return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
->Release(place); ->Release(place);
} }
......
...@@ -45,7 +45,7 @@ class AllocatorFacade { ...@@ -45,7 +45,7 @@ class AllocatorFacade {
AllocationPtr Alloc(const platform::Place& place, size_t size); AllocationPtr Alloc(const platform::Place& place, size_t size);
// Release unused memory pool. // Release unused memory pool.
void Release(const platform::Place& place); uint64_t Release(const platform::Place& place);
// TODO(yy): Allocate a Copy-On-Write allocation? // TODO(yy): Allocate a Copy-On-Write allocation?
private: private:
......
...@@ -138,18 +138,21 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { ...@@ -138,18 +138,21 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
} }
} }
void AutoGrowthBestFitAllocator::FreeIdleChunks() { uint64_t AutoGrowthBestFitAllocator::FreeIdleChunks() {
uint64_t bytes = 0;
for (auto chunk_it = chunks_.begin(); chunk_it != chunks_.end();) { for (auto chunk_it = chunks_.begin(); chunk_it != chunks_.end();) {
auto &blocks = chunk_it->blocks_; auto &blocks = chunk_it->blocks_;
if (blocks.size() == 1 && blocks.begin()->is_free_) { if (blocks.size() == 1 && blocks.begin()->is_free_) {
auto &block = *blocks.begin(); auto &block = *blocks.begin();
VLOG(2) << "Free chunk with size " << block.size_; VLOG(2) << "Free chunk with size " << block.size_;
bytes += block.size_;
free_blocks_.erase(std::make_pair(block.size_, block.ptr_)); free_blocks_.erase(std::make_pair(block.size_, block.ptr_));
chunk_it = chunks_.erase(chunk_it); chunk_it = chunks_.erase(chunk_it);
} else { } else {
++chunk_it; ++chunk_it;
} }
} }
return bytes;
} }
} // namespace allocation } // namespace allocation
......
...@@ -40,10 +40,12 @@ class AutoGrowthBestFitAllocator : public Allocator { ...@@ -40,10 +40,12 @@ class AutoGrowthBestFitAllocator : public Allocator {
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
// Release the memory block which is not used in pool. // Release the memory block which is not used in pool.
void ReleaseImpl(const platform::Place &place) override { FreeIdleChunks(); } uint64_t ReleaseImpl(const platform::Place &place) override {
return FreeIdleChunks();
}
private: private:
void FreeIdleChunks(); uint64_t FreeIdleChunks();
template <typename T> template <typename T>
using List = std::list<T>; using List = std::list<T>;
......
...@@ -54,7 +54,7 @@ template <typename Place> ...@@ -54,7 +54,7 @@ template <typename Place>
void Free(const Place &place, void *p, size_t size); void Free(const Place &place, void *p, size_t size);
template <typename Place> template <typename Place>
void Release(const Place &place); uint64_t Release(const Place &place);
template <typename Place> template <typename Place>
size_t Used(const Place &place); size_t Used(const Place &place);
...@@ -103,8 +103,8 @@ void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p, ...@@ -103,8 +103,8 @@ void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p,
} }
template <> template <>
void Release<platform::CPUPlace>(const platform::CPUPlace &place) { uint64_t Release<platform::CPUPlace>(const platform::CPUPlace &place) {
GetCPUBuddyAllocator()->Release(); return GetCPUBuddyAllocator()->Release();
} }
template <> template <>
...@@ -195,7 +195,7 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p, ...@@ -195,7 +195,7 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
} }
template <> template <>
void Release<platform::XPUPlace>(const platform::XPUPlace &place) { uint64_t Release<platform::XPUPlace>(const platform::XPUPlace &place) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
PADDLE_THROW( PADDLE_THROW(
platform::errors::PermissionDenied("Release XPU pool is not supported.")); platform::errors::PermissionDenied("Release XPU pool is not supported."));
...@@ -333,9 +333,9 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p, ...@@ -333,9 +333,9 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
} }
template <> template <>
void Release<platform::CUDAPlace>(const platform::CUDAPlace &place) { uint64_t Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator(place.device)->Release(); return GetGPUBuddyAllocator(place.device)->Release();
#else #else
PADDLE_THROW(platform::errors::PermissionDenied( PADDLE_THROW(platform::errors::PermissionDenied(
"'CUDAPlace' is not supported in CPU only device.")); "'CUDAPlace' is not supported in CPU only device."));
...@@ -401,10 +401,10 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place, ...@@ -401,10 +401,10 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
} }
template <> template <>
void Release<platform::CUDAPinnedPlace>( uint64_t Release<platform::CUDAPinnedPlace>(
const platform::CUDAPinnedPlace &place) { const platform::CUDAPinnedPlace &place) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator()->Release(); return GetCUDAPinnedBuddyAllocator()->Release();
#else #else
PADDLE_THROW(platform::errors::PermissionDenied( PADDLE_THROW(platform::errors::PermissionDenied(
"'CUDAPinnedPlace' is not supported in CPU only device.")); "'CUDAPinnedPlace' is not supported in CPU only device."));
...@@ -437,10 +437,10 @@ struct FreeVisitor : public boost::static_visitor<void> { ...@@ -437,10 +437,10 @@ struct FreeVisitor : public boost::static_visitor<void> {
size_t size_; size_t size_;
}; };
struct ReleaseVisitor : public boost::static_visitor<void> { struct ReleaseVisitor : public boost::static_visitor<uint64_t> {
template <typename Place> template <typename Place>
inline void operator()(const Place &place) const { inline uint64_t operator()(const Place &place) const {
Release<Place>(place); return Release<Place>(place);
} }
}; };
...@@ -486,8 +486,8 @@ void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) { ...@@ -486,8 +486,8 @@ void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
delete allocation; delete allocation;
} }
void NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) { uint64_t NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
boost::apply_visitor(legacy::ReleaseVisitor(), place); return boost::apply_visitor(legacy::ReleaseVisitor(), place);
} }
} // namespace allocation } // namespace allocation
......
...@@ -35,7 +35,7 @@ class NaiveBestFitAllocator : public Allocator { ...@@ -35,7 +35,7 @@ class NaiveBestFitAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size) override; Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
void ReleaseImpl(const platform::Place &place) override; uint64_t ReleaseImpl(const platform::Place &place) override;
private: private:
platform::Place place_; platform::Place place_;
......
...@@ -47,8 +47,8 @@ class RetryAllocator : public Allocator { ...@@ -47,8 +47,8 @@ class RetryAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; Allocation* AllocateImpl(size_t size) override;
void ReleaseImpl(const platform::Place& place) override { uint64_t ReleaseImpl(const platform::Place& place) override {
underlying_allocator_->Release(place); return underlying_allocator_->Release(place);
} }
private: private:
......
...@@ -72,7 +72,9 @@ void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) { ...@@ -72,7 +72,9 @@ void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
delete allocation; delete allocation;
} }
void ThreadLocalAllocatorImpl::ReleaseImpl() { buddy_allocator_->Release(); } uint64_t ThreadLocalAllocatorImpl::ReleaseImpl() {
return buddy_allocator_->Release();
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -52,7 +52,7 @@ class ThreadLocalAllocatorImpl ...@@ -52,7 +52,7 @@ class ThreadLocalAllocatorImpl
explicit ThreadLocalAllocatorImpl(const platform::Place& p); explicit ThreadLocalAllocatorImpl(const platform::Place& p);
ThreadLocalAllocation* AllocateImpl(size_t size); ThreadLocalAllocation* AllocateImpl(size_t size);
void FreeImpl(ThreadLocalAllocation* allocation); void FreeImpl(ThreadLocalAllocation* allocation);
void ReleaseImpl(); uint64_t ReleaseImpl();
private: private:
std::unique_ptr<memory::detail::BuddyAllocator> buddy_allocator_; std::unique_ptr<memory::detail::BuddyAllocator> buddy_allocator_;
...@@ -92,7 +92,7 @@ class ThreadLocalCUDAAllocator : public Allocator { ...@@ -92,7 +92,7 @@ class ThreadLocalCUDAAllocator : public Allocator {
auto allocator_impl = tl_allocation->GetAllocator(); auto allocator_impl = tl_allocation->GetAllocator();
allocator_impl->FreeImpl(tl_allocation); allocator_impl->FreeImpl(tl_allocation);
} }
void ReleaseImpl(const platform::Place& p) override { uint64_t ReleaseImpl(const platform::Place& p) override {
return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->ReleaseImpl(); return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->ReleaseImpl();
} }
......
...@@ -162,7 +162,7 @@ void BuddyAllocator::Free(void* p) { ...@@ -162,7 +162,7 @@ void BuddyAllocator::Free(void* p) {
IndexSizeAddress(desc->get_index(), desc->get_total_size(), block)); IndexSizeAddress(desc->get_index(), desc->get_total_size(), block));
} }
void BuddyAllocator::Release() { uint64_t BuddyAllocator::Release() {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
int num = 0; int num = 0;
uint64_t bytes = 0; uint64_t bytes = 0;
...@@ -193,6 +193,7 @@ void BuddyAllocator::Release() { ...@@ -193,6 +193,7 @@ void BuddyAllocator::Release() {
} }
} }
VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes."; VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes.";
return bytes;
} }
size_t BuddyAllocator::Used() { return total_used_; } size_t BuddyAllocator::Used() { return total_used_; }
......
...@@ -41,7 +41,7 @@ class BuddyAllocator { ...@@ -41,7 +41,7 @@ class BuddyAllocator {
void* Alloc(size_t unaligned_size); void* Alloc(size_t unaligned_size);
void Free(void* ptr); void Free(void* ptr);
// Release the unused memory pool, a real free operation for the OS. // Release the unused memory pool, a real free operation for the OS.
void Release(); uint64_t Release();
size_t Used(); size_t Used();
size_t GetMinChunkSize(); size_t GetMinChunkSize();
size_t GetMaxChunkSize(); size_t GetMaxChunkSize();
......
...@@ -31,7 +31,7 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) { ...@@ -31,7 +31,7 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
return allocation::AllocatorFacade::Instance().Alloc(place, size); return allocation::AllocatorFacade::Instance().Alloc(place, size);
} }
void Release(const platform::Place &place) { uint64_t Release(const platform::Place &place) {
return allocation::AllocatorFacade::Instance().Release(place); return allocation::AllocatorFacade::Instance().Release(place);
} }
......
...@@ -38,7 +38,7 @@ extern AllocationPtr Alloc(const platform::Place& place, size_t size); ...@@ -38,7 +38,7 @@ extern AllocationPtr Alloc(const platform::Place& place, size_t size);
extern AllocationPtr Alloc(const platform::DeviceContext& dev_ctx, size_t size); extern AllocationPtr Alloc(const platform::DeviceContext& dev_ctx, size_t size);
extern void Release(const platform::Place& place); extern uint64_t Release(const platform::Place& place);
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册