未验证 提交 ced5c40c 编写于 作者: W Wilber 提交者: GitHub

Update memory release interface. (#28456)

上级 7821759d
......@@ -178,7 +178,9 @@ class Allocator {
FreeImpl(allocation);
}
inline void Release(const platform::Place& place) { ReleaseImpl(place); }
inline uint64_t Release(const platform::Place& place) {
return ReleaseImpl(place);
}
// True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const;
......@@ -186,7 +188,7 @@ class Allocator {
protected:
virtual Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(Allocation* allocation);
virtual void ReleaseImpl(const platform::Place& place) {}
virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
};
using AllocationDeleter = Allocator::AllocationDeleter;
......
......@@ -287,8 +287,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
return m_->GetAllocator(place, size)->Allocate(size);
}
void AllocatorFacade::Release(const platform::Place& place) {
m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
uint64_t AllocatorFacade::Release(const platform::Place& place) {
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
->Release(place);
}
......
......@@ -45,7 +45,7 @@ class AllocatorFacade {
AllocationPtr Alloc(const platform::Place& place, size_t size);
// Release unused memory pool.
void Release(const platform::Place& place);
uint64_t Release(const platform::Place& place);
// TODO(yy): Allocate a Copy-On-Write allocation?
private:
......
......@@ -138,18 +138,21 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
}
}
void AutoGrowthBestFitAllocator::FreeIdleChunks() {
uint64_t AutoGrowthBestFitAllocator::FreeIdleChunks() {
uint64_t bytes = 0;
for (auto chunk_it = chunks_.begin(); chunk_it != chunks_.end();) {
auto &blocks = chunk_it->blocks_;
if (blocks.size() == 1 && blocks.begin()->is_free_) {
auto &block = *blocks.begin();
VLOG(2) << "Free chunk with size " << block.size_;
bytes += block.size_;
free_blocks_.erase(std::make_pair(block.size_, block.ptr_));
chunk_it = chunks_.erase(chunk_it);
} else {
++chunk_it;
}
}
return bytes;
}
} // namespace allocation
......
......@@ -40,10 +40,12 @@ class AutoGrowthBestFitAllocator : public Allocator {
void FreeImpl(Allocation *allocation) override;
// Release the memory block which is not used in pool.
void ReleaseImpl(const platform::Place &place) override { FreeIdleChunks(); }
uint64_t ReleaseImpl(const platform::Place &place) override {
return FreeIdleChunks();
}
private:
void FreeIdleChunks();
uint64_t FreeIdleChunks();
template <typename T>
using List = std::list<T>;
......
......@@ -54,7 +54,7 @@ template <typename Place>
void Free(const Place &place, void *p, size_t size);
template <typename Place>
void Release(const Place &place);
uint64_t Release(const Place &place);
template <typename Place>
size_t Used(const Place &place);
......@@ -103,8 +103,8 @@ void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p,
}
template <>
void Release<platform::CPUPlace>(const platform::CPUPlace &place) {
GetCPUBuddyAllocator()->Release();
uint64_t Release<platform::CPUPlace>(const platform::CPUPlace &place) {
return GetCPUBuddyAllocator()->Release();
}
template <>
......@@ -195,7 +195,7 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
}
template <>
void Release<platform::XPUPlace>(const platform::XPUPlace &place) {
uint64_t Release<platform::XPUPlace>(const platform::XPUPlace &place) {
#ifdef PADDLE_WITH_XPU
PADDLE_THROW(
platform::errors::PermissionDenied("Release XPU pool is not supported."));
......@@ -333,9 +333,9 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
}
template <>
void Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
uint64_t Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
#ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator(place.device)->Release();
return GetGPUBuddyAllocator(place.device)->Release();
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"'CUDAPlace' is not supported in CPU only device."));
......@@ -401,10 +401,10 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
}
template <>
void Release<platform::CUDAPinnedPlace>(
uint64_t Release<platform::CUDAPinnedPlace>(
const platform::CUDAPinnedPlace &place) {
#ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator()->Release();
return GetCUDAPinnedBuddyAllocator()->Release();
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"'CUDAPinnedPlace' is not supported in CPU only device."));
......@@ -437,10 +437,10 @@ struct FreeVisitor : public boost::static_visitor<void> {
size_t size_;
};
struct ReleaseVisitor : public boost::static_visitor<void> {
struct ReleaseVisitor : public boost::static_visitor<uint64_t> {
template <typename Place>
inline void operator()(const Place &place) const {
Release<Place>(place);
inline uint64_t operator()(const Place &place) const {
return Release<Place>(place);
}
};
......@@ -486,8 +486,8 @@ void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
delete allocation;
}
void NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
boost::apply_visitor(legacy::ReleaseVisitor(), place);
uint64_t NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
return boost::apply_visitor(legacy::ReleaseVisitor(), place);
}
} // namespace allocation
......
......@@ -35,7 +35,7 @@ class NaiveBestFitAllocator : public Allocator {
protected:
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
void ReleaseImpl(const platform::Place &place) override;
uint64_t ReleaseImpl(const platform::Place &place) override;
private:
platform::Place place_;
......
......@@ -47,8 +47,8 @@ class RetryAllocator : public Allocator {
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void ReleaseImpl(const platform::Place& place) override {
underlying_allocator_->Release(place);
uint64_t ReleaseImpl(const platform::Place& place) override {
return underlying_allocator_->Release(place);
}
private:
......
......@@ -72,7 +72,9 @@ void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
delete allocation;
}
void ThreadLocalAllocatorImpl::ReleaseImpl() { buddy_allocator_->Release(); }
uint64_t ThreadLocalAllocatorImpl::ReleaseImpl() {
return buddy_allocator_->Release();
}
} // namespace allocation
} // namespace memory
......
......@@ -52,7 +52,7 @@ class ThreadLocalAllocatorImpl
explicit ThreadLocalAllocatorImpl(const platform::Place& p);
ThreadLocalAllocation* AllocateImpl(size_t size);
void FreeImpl(ThreadLocalAllocation* allocation);
void ReleaseImpl();
uint64_t ReleaseImpl();
private:
std::unique_ptr<memory::detail::BuddyAllocator> buddy_allocator_;
......@@ -92,7 +92,7 @@ class ThreadLocalCUDAAllocator : public Allocator {
auto allocator_impl = tl_allocation->GetAllocator();
allocator_impl->FreeImpl(tl_allocation);
}
void ReleaseImpl(const platform::Place& p) override {
uint64_t ReleaseImpl(const platform::Place& p) override {
return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->ReleaseImpl();
}
......
......@@ -162,7 +162,7 @@ void BuddyAllocator::Free(void* p) {
IndexSizeAddress(desc->get_index(), desc->get_total_size(), block));
}
void BuddyAllocator::Release() {
uint64_t BuddyAllocator::Release() {
std::lock_guard<std::mutex> lock(mutex_);
int num = 0;
uint64_t bytes = 0;
......@@ -193,6 +193,7 @@ void BuddyAllocator::Release() {
}
}
VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes.";
return bytes;
}
size_t BuddyAllocator::Used() { return total_used_; }
......
......@@ -41,7 +41,7 @@ class BuddyAllocator {
void* Alloc(size_t unaligned_size);
void Free(void* ptr);
// Release the unused memory pool, a real free operation for the OS.
void Release();
uint64_t Release();
size_t Used();
size_t GetMinChunkSize();
size_t GetMaxChunkSize();
......
......@@ -31,7 +31,7 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
return allocation::AllocatorFacade::Instance().Alloc(place, size);
}
void Release(const platform::Place &place) {
uint64_t Release(const platform::Place &place) {
return allocation::AllocatorFacade::Instance().Release(place);
}
......
......@@ -38,7 +38,7 @@ extern AllocationPtr Alloc(const platform::Place& place, size_t size);
extern AllocationPtr Alloc(const platform::DeviceContext& dev_ctx, size_t size);
extern void Release(const platform::Place& place);
extern uint64_t Release(const platform::Place& place);
} // namespace memory
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册