diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 8a86813e9362d7b82c2023428a35a1982adb0508..24ab33a1442e261bf98de13c2dd4b1f9af630ec8 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -404,9 +404,6 @@ class ExecutionContext { auto shared_allocation = std::shared_ptr( allocation_ptr, deleter); - PADDLE_ENFORCE( - dynamic_cast(allocation_ptr) != nullptr, - "The AllocationPtr must be TemporaryAllocation."); PADDLE_ENFORCE_GE(allocation_ptr->size(), framework::product(dim) * sizeof(T)); diff --git a/paddle/fluid/framework/small_stack.h b/paddle/fluid/framework/small_stack.h new file mode 100644 index 0000000000000000000000000000000000000000..6919ff7a28aac909511d0a8fd983df3eeaf3ca13 --- /dev/null +++ b/paddle/fluid/framework/small_stack.h @@ -0,0 +1,74 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { + +template +class SmallStack { + static_assert(N > 0, "N must be larger than 0"); + + public: + inline void push(const T& item) { + if (size_ < N) { + head_[size_] = item; + } else { + tail_.emplace_back(item); + } + ++size_; + } + + inline void pop() { + PADDLE_ENFORCE(!empty(), "Try to pop element from empty stack."); + if (size_ > N) { + tail_.pop_back(); + } + --size_; + } + + inline const T& top() const { + PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack."); + return size_ <= N ? head_[size_ - 1] : tail_.back(); + } + + inline T& top() { + PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack."); + return size_ <= N ? head_[size_ - 1] : tail_.back(); + } + + inline bool empty() const { return size_ == 0; } + + inline size_t size() const { return size_; } + + // This API can only be used in unittest + T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; } + + const T& operator[](size_t i) const { + return i < N ? head_[i] : tail_[i - N]; + } + + private: + T head_[N]; + std::deque tail_; + size_t size_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 4b7b9064dcde9b5209264257d51bbd976ba8eb85..5ff91dfbc28be8de6bc4289544ebfad1269857e6 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -3,8 +3,11 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) +cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator) + cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) +cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator) if (WITH_GPU) nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard) @@ -53,6 +56,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS conditional_allocator retry_allocator buffered_allocator + multi_bin_buffered_allocator allocator_strategy legacy_allocator ) diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index fc1a8e9247b16374037bfde44449fd552b44c6b4..602d85bf9e8811bb7b1effc2292a84be1301b14c 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -93,6 +93,8 @@ class AlignedAllocator : public ThinAlignedAllocator { underlying_allocator_->Allocate(size + kAlignment, attr); return new AlignedAllocation(std::move(raw_allocation), size); } + + void FreeImpl(Allocation* allocation) override { delete allocation; } }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 8fb8a5fb897a736d7515951ba08c633da9a7706c..664b3b8420f7590dc605cb8c67059f334fc76139 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -26,17 +26,28 @@ Allocator::~Allocator() {} bool Allocator::IsAllocThreadSafe() const { return false; } AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { + VLOG(2) << "Alloc allocation on " << typeid(*this).name(); auto ptr = AllocateImpl(size, attr); - ptr->set_allocator(this); + ptr->RegisterAllocatorChain(this); + VLOG(2) << "Alloc success"; return AllocationPtr(ptr); } -void Allocator::Free(Allocation* allocation) { delete allocation; } +void Allocator::FreeImpl(Allocation* allocation) { + auto* allocator = allocation->TopAllocator(); + allocator->Free(allocation); +} + +void Allocator::Free(Allocation* allocation) { + VLOG(2) << "Free allocation on " << typeid(*this).name(); + allocation->PopAllocator(); + FreeImpl(allocation); +} const char* BadAlloc::what() const noexcept { return msg_.c_str(); } void AllocationDeleter::operator()(Allocation* allocation) const { - auto* allocator = allocation->allocator(); + auto* allocator = allocation->TopAllocator(); allocator->Free(allocation); } diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index f2b6f438c382275cab4ecf9aceea1c55e5885dee..f74fab3c7516db1ce9f49fad091125265d78e5f2 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -15,6 +15,8 @@ #pragma once #include #include +#include +#include "paddle/fluid/framework/small_stack.h" #include "paddle/fluid/platform/place.h" namespace paddle { @@ -47,10 +49,12 @@ class Allocator; class Allocation { public: Allocation(void* ptr, size_t size, platform::Place place) - : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} + : ptr_(ptr), size_(size), place_(place) {} Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; + Allocation(Allocation&& o) = delete; + Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method @@ -72,17 +76,34 @@ class Allocation { const platform::Place& place() const { return place_; } - Allocator* allocator() { return allocator_; } + virtual ~Allocation(); - void set_allocator(Allocator* allocator) { allocator_ = allocator; } + // This function should only be used in unittest + std::vector GetAllocatorChain() const { + std::vector allocators; + for (size_t i = 0; i < allocator_chain_.size(); ++i) { + allocators[i] = allocator_chain_[i]; + } + return allocators; + } - virtual ~Allocation(); + private: + inline void RegisterAllocatorChain(Allocator* allocator) { + allocator_chain_.push(allocator); + } + + inline void PopAllocator() { allocator_chain_.pop(); } + + inline Allocator* TopAllocator() { return allocator_chain_.top(); } private: - Allocator* allocator_; void* ptr_; size_t size_; platform::Place place_; + framework::SmallStack allocator_chain_; + + friend class Allocator; + friend class AllocationDeleter; }; using AllocationPtr = std::unique_ptr; @@ -132,9 +153,12 @@ class Allocator { // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; + // This function should not be called outside + void Free(Allocation* allocation); + protected: - virtual void Free(Allocation* allocation); virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; + virtual void FreeImpl(Allocation* allocation); private: friend class AllocationDeleter; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index ea0b729dc6f62f517877e060cb0ecbe5c1d22e61..1a9f5e8f7f018029128b7dc9ddd0cb07549cc0b8 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -27,6 +27,7 @@ #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" +#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/platform/cpu_info.h" @@ -43,6 +44,8 @@ DEFINE_int64( "The retry time (milliseconds) when allocator fails " "to allocate memory. No retry if this value is not greater than 0"); +DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator"); + namespace paddle { namespace memory { namespace allocation { @@ -110,8 +113,8 @@ class ChunkedAllocator : public Allocator { std::shared_ptr CreateAllocatorWithChunk() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); - std::unique_ptr allocator(new LockedAllocator( - std::unique_ptr(new BestFitAllocator(allocation)))); + std::shared_ptr allocator(new LockedAllocator( + std::shared_ptr(new BestFitAllocator(allocation)))); if (retry_time_ > 0) { auto* retry_allocator = @@ -119,6 +122,10 @@ class ChunkedAllocator : public Allocator { allocator.reset(retry_allocator); } + if (FLAGS_enable_buffered_allocator) { + allocator.reset(new MultiBinBufferedAllocator(allocator)); + } + return std::make_shared>(std::move(allocator)); } diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index e3d6c2f511ef083ef9ecc1fe8df96051b2b85cc2..d87dd9a4b6df288065389a335a9ddb4047dd096a 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::Free(Allocation* allocation) { +void BestFitAllocator::FreeImpl(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(bf_allocation, "The input allocation is not BestFitAllocation."); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 4f10f2b53e8543d4197097f1cae8de765bceeb0f..c137438c0c35a575d366a1dfdf950262f711defa 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index fc75abc9dfee6c9df5bc87faa493002cc1fe6298..e04c0aa34b1cd6200806cc2a012161e3478eca0b 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,11 +22,11 @@ namespace paddle { namespace memory { namespace allocation { -BufferedAllocator::BufferedAllocator(std::unique_ptr &&allocator) +BufferedAllocator::BufferedAllocator(std::shared_ptr allocator) : underlying_allocator_(std::move(allocator)) { PADDLE_ENFORCE_NOT_NULL( underlying_allocator_, - "Underlying allocator of BufferedAllocator must be unmanaged"); + "Underlying allocator of BufferedAllocator must not be null"); if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } @@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { while (!allocations_.empty()) { // free the largest auto it = --allocations_.end(); cur += it->second->size(); - delete it->second.release(); + underlying_allocator_->Free(it->second.release()); allocations_.erase(it); if (cur >= size) return; } } -bool BufferedAllocator::IsAllocThreadSafe() const { - return this->underlying_allocator_->IsAllocThreadSafe(); -} -void BufferedAllocator::Free(Allocation *allocation) { +bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } + +void BufferedAllocator::FreeImpl(Allocation *allocation) { platform::LockGuardPtr guard(mtx_); allocations_.emplace(allocation->size(), AllocationPtr(allocation)); } + Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { { platform::LockGuardPtr guard(mtx_); @@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (it != allocations_.end() && it->first < size * 2) { AllocationPtr result(std::move(it->second)); allocations_.erase(it); - return new AllocationWithUnderlying(std::move(result)); + return result.release(); } } try { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } catch (BadAlloc &) { FreeCache(size); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index d44a3f85beba712b1e735ba14008689bce7d0d64..c728395705842d29a7b2a8441a7048a7e4bf5e6b 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -31,7 +31,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::unique_ptr &&allocator); + explicit BufferedAllocator(std::shared_ptr allocator); ~BufferedAllocator(); @@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::multimap allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index 41ebb9dbeaf36eafe3dff4ae294b84427f660cbf..7b2138cf34ce1dc5358d4494a519b07b09608f6c 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -64,7 +64,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void Free(Allocation *allocation) override { + void FreeImpl(Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(alloc); if (alloc->ptr()) delete[] static_cast(alloc->ptr()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index cc81a6f7b8b1950b07b6fb1571b53d9b5ddb1b9f..0fb2e6e1496fcfed74121f0fae5ab87744f75aa2 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -25,7 +25,7 @@ CPUAllocation::CPUAllocation(void *ptr, size_t size) bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::Free(Allocation *allocation) { +void CPUAllocator::FreeImpl(Allocation *allocation) { PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); free(allocation->ptr()); delete allocation; diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 26d3643f4edff1f2d71b1c761e915a6dacb485ad..9e0c2551860954f4681be6e1a223aa61ae3a4df0 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -43,7 +43,7 @@ class CPUAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 430bf0be98e08787ac4412a8b6e0fcc310ffe2b4..2e7c4ee78f4dcc51391233cca8ef896784550da1 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -23,13 +23,14 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::Free(Allocation* allocation) { +void CUDAAllocator::FreeImpl(Allocation* allocation) { platform::CUDADeviceGuard guard(place_.device); auto* cuda_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(cuda_allocation); PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), place_); PADDLE_ENFORCE(cudaFree(allocation->ptr())); + VLOG(2) << "cudaFree is called"; delete allocation; } Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 63726f5820b1c81565117c7a9bf798c17c9681f6..962f9a7c028a77ccb451fa06d295110c46a8cdc4 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -35,7 +35,7 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 1936f9d4cd83c53cf7b322ab29a3e0d92e042abc..9c71c0bbcef3762591516691c18f231c89938453 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -336,7 +336,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { return new Allocation(ptr, size, place_); } -void LegacyAllocator::Free(Allocation *allocation) { +void LegacyAllocator::FreeImpl(Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index d9bdae153da6439598f76f5cac226897e6e0c596..27cd42ea35012f07ae7db79c46d767138ddaafff 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { protected: Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index 835f6527c8a1d83340167bd9079f7cee25ad24cf..03a17814e1a6f5285cec7f12e6840bc4f08d9a27 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -23,26 +23,24 @@ namespace allocation { bool LockedAllocator::IsAllocThreadSafe() const { return true; } LockedAllocator::LockedAllocator( - std::unique_ptr &&underlying_allocator) + std::shared_ptr underlying_allocator) : underlying_allocator_(std::move(underlying_allocator)) { PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); if (!underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } } -void LockedAllocator::Free(Allocation *allocation) { - { - platform::LockGuardPtr guard(mtx_); - reinterpret_cast(allocation) - ->allocation_.reset(); // Destroy inner allocation - } - delete allocation; + +void LockedAllocator::FreeImpl(Allocation *allocation) { + platform::LockGuardPtr guard(mtx_); + underlying_allocator_->Free(allocation); } + Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::LockGuardPtr guard(mtx_); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index 4967b9bb8d3ad101cff4657b0a45b49b76e2deb2..b735ccef101417b3f880eb6dcdd9964cffbe875c 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -24,15 +24,15 @@ namespace allocation { // A allocator to make underlying allocator thread safe. class LockedAllocator : public Allocator { public: - explicit LockedAllocator(std::unique_ptr &&underlying_allocator); + explicit LockedAllocator(std::shared_ptr underlying_allocator); bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..44240121f055dbbcb9246b00ee5d5ae38d7c1a55 --- /dev/null +++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" +#include +#include +#include "paddle/fluid/platform/lock_guard_ptr.h" + +DEFINE_double(tolerant_times, 2, + "Tolerant memory size times of buffered_allocator"); + +namespace paddle { +namespace memory { +namespace allocation { + +static void CheckAndModifyMemoryDivisionPlan( + std::vector *division_plan) { + // Check whether the division plan is strictly sorted + bool is_strictly_sorted = true; + for (size_t i = 1; i < division_plan->size(); ++i) { + if ((*division_plan)[i - 1] >= (*division_plan)[i]) { + is_strictly_sorted = false; + break; + } + } + PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted"); + + // Insert 0 and remove MAX to disivion plan for clean binary searching code + if (division_plan->empty() || division_plan->front() != 0) { + division_plan->insert(division_plan->begin(), 0); + } + + constexpr auto kSizeTypeMax = std::numeric_limits::max(); + if (division_plan->back() == kSizeTypeMax) { + division_plan->pop_back(); + } + + PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty"); +} + +static std::vector GetDefaultDivisionPlan() { + std::vector plan; + for (size_t i = 0; i < sizeof(size_t) * 8; ++i) { + plan.push_back(static_cast(1) << i); + } + return plan; +} + +inline static size_t FindDivisionPlanBinIndex(const std::vector &bins, + size_t size) { + return static_cast(std::upper_bound(bins.begin(), bins.end(), size) - + bins.begin() - 1); +} + +inline static size_t TolerantUpperSize(size_t size) { + return static_cast(size * FLAGS_tolerant_times); +} + +MultiBinBufferedAllocator::MultiBinBufferedAllocator( + std::shared_ptr underlying_allocator) + : MultiBinBufferedAllocator(std::move(underlying_allocator), + GetDefaultDivisionPlan()) {} + +MultiBinBufferedAllocator::MultiBinBufferedAllocator( + std::shared_ptr underlying_allocator, + const std::vector &division_plan) + : underlying_allocator_(std::move(underlying_allocator)), + division_plan_(division_plan) { + CheckAndModifyMemoryDivisionPlan(&division_plan_); + allocations_.resize(division_plan_.size()); + mtx_.resize(division_plan_.size()); + if (underlying_allocator_->IsAllocThreadSafe()) { + for (auto &mtx : mtx_) { + mtx.reset(new std::mutex()); + } + } + + VLOG(1) << "FLAGS_tolerant_times = " << FLAGS_tolerant_times; +} + +void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) { + auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size()); + { + platform::LockGuardPtr guard(mtx_[bin_index]); + allocations_[bin_index].emplace(allocation->size(), + AllocationPtr(allocation)); + } +} + +void MultiBinBufferedAllocator::FreeCache(size_t size, size_t bin_index) { + size_t accumulated_size = 0; + // FIXME(zjl): free the largest first when there is no extra + for (size_t i = allocations_.size() - 1; i != static_cast(-1); --i) { + platform::LockGuardPtr lock(mtx_[i]); + if (allocations_[i].empty()) continue; + auto it = --allocations_[i].end(); + do { + accumulated_size += it->second->size(); + underlying_allocator_->Free(it->second.release()); + allocations_[i].erase(it--); + if (accumulated_size >= size) { + return; + } + } while (!allocations_[i].empty()); + } +} + +Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) { + auto bin_index = FindDivisionPlanBinIndex(division_plan_, size); + auto upper_size = TolerantUpperSize(size); + + for (; upper_size >= division_plan_[bin_index]; ++bin_index) { + auto &allocation = allocations_[bin_index]; + platform::LockGuardPtr lock(mtx_[bin_index]); + auto it = allocation.lower_bound(size); + if (it != allocation.end() && it->second->size() < upper_size) { + auto ret = std::move(it->second); + allocation.erase(it); + return ret.release(); + } + } + + try { + return underlying_allocator_->Allocate(size, attr).release(); + } catch (BadAlloc &) { + VLOG(2) << "BadAlloc raises, try to free " << size << " caches"; + FreeCache(size, bin_index); + return underlying_allocator_->Allocate(size, attr).release(); + } +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..e2437ff7e35832e80eaf15b5a4a6cd2c83cfe537 --- /dev/null +++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h @@ -0,0 +1,54 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/memory/allocation/allocator.h" + +namespace paddle { +namespace memory { +namespace allocation { + +class MultiBinBufferedAllocator : public Allocator { + public: + explicit MultiBinBufferedAllocator( + std::shared_ptr underlying_allocator); + + MultiBinBufferedAllocator(std::shared_ptr underlying_allocator, + const std::vector& division_plan); + + bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; } + + void ClearCache() { FreeCache(static_cast(-1), 0); } + + protected: + Allocation* AllocateImpl(size_t size, Attr attr) override; + void FreeImpl(Allocation* allocation) override; + + private: + void FreeCache(size_t size, size_t bin_index); + + std::shared_ptr underlying_allocator_; + std::vector> allocations_; + std::vector division_plan_; + std::vector> mtx_; +}; + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..22787a8512338c9e2045f3e482171656811dd0eb --- /dev/null +++ b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc @@ -0,0 +1,148 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" +#include +#include +#include "paddle/fluid/memory/allocation/best_fit_allocator.h" +#include "paddle/fluid/memory/allocation/cpu_allocator.h" +#include "paddle/fluid/memory/allocation/locked_allocator.h" + +namespace paddle { +namespace memory { +namespace allocation { + +inline std::shared_ptr GetBufferedAllocator( + Allocation *allocation, bool thread_safe) { + std::shared_ptr allocator(new BestFitAllocator(allocation)); + if (thread_safe) { + allocator.reset(new LockedAllocator(std::move(allocator))); + } + + return std::make_shared(allocator); +} + +TEST(buffered_allocator, thread_safety) { + std::unique_ptr allocator(new CPUAllocator()); + auto chunk = allocator->Allocate(1 << 20, allocator->kDefault); + { + auto buf_allocator = GetBufferedAllocator(chunk.get(), true); + ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true); + } + + { + auto buf_allocator = GetBufferedAllocator(chunk.get(), false); + ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false); + } +} + +class StubAllocation : public Allocation { + public: + using Allocation::Allocation; +}; + +class StubAllocator : public Allocator { + public: + void ResetCounter() { + construct_count_ = 0; + destruct_count_ = 0; + } + + size_t GetAllocCount() const { return construct_count_; } + + size_t GetFreeCount() const { return destruct_count_; } + + protected: + void FreeImpl(Allocation *allocation) override { + auto *alloc = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(alloc); + if (alloc->ptr()) delete[] static_cast(alloc->ptr()); + ++destruct_count_; + delete allocation; + } + + Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override { + ++construct_count_; + if (size == 0) { + return new StubAllocation(nullptr, 0, platform::CPUPlace()); + } else { + return new StubAllocation(new uint8_t[size], size, platform::CPUPlace()); + } + } + + private: + size_t construct_count_ = 0; + size_t destruct_count_ = 0; +}; + +constexpr size_t kZero = 0; +constexpr size_t kOne = 1; +constexpr size_t kTwo = 2; + +TEST(buffered_allocator, lazy_free) { + std::vector original_alloc_size({1022, 1023, 1024, 1025, 1026}); + for (auto alloc_size : original_alloc_size) { + auto stub_allocator = std::make_shared(); + auto *underlying_allocator = stub_allocator.get(); + auto allocator = + std::make_shared(stub_allocator); + + { + underlying_allocator->ResetCounter(); + auto x = allocator->Allocate(alloc_size, allocator->kDefault); + ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + x = nullptr; + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + } + + { + underlying_allocator->ResetCounter(); + auto x = allocator->Allocate(900, allocator->kDefault); + ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + auto y = allocator->Allocate(2048, allocator->kDefault); + ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + x = nullptr; + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + y = nullptr; + ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); + } + + { + underlying_allocator->ResetCounter(); + allocator->ClearCache(); + ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); + ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo); + } + } +} + +TEST(buffered_allocator, garbage_collection) { + std::unique_ptr cpu_allocator(new CPUAllocator()); + auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault); + auto allocator = GetBufferedAllocator(chunk.get(), false); + auto x1 = allocator->Allocate(1600, allocator->kDefault); + auto x2 = allocator->Allocate(400, allocator->kDefault); + x1 = nullptr; + x2 = nullptr; + auto x3 = allocator->Allocate(1600, allocator->kDefault); + ASSERT_NE(x3, nullptr); + ASSERT_NE(x3->ptr(), nullptr); +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index de81d12cca6ca280289371abdec225c9e2b8f4d0..dfc52edf9c8b3539c565d00e291c121b62c2e22a 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -20,7 +20,7 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::Free(Allocation *allocation) { +void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); delete allocation; diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index 42d0938f2afbb1efca8bfdd7035bc0eada30f06b..3acb1f0c5ae35261667296280356efdc4f4b7649 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -31,7 +31,7 @@ class CPUPinnedAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 981705051b449e6a35c2dcce9138dc2efae52920..7e888988f9602e362d73f64c1b45552e84e3349c 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -18,25 +18,15 @@ namespace paddle { namespace memory { namespace allocation { -bool RetryAllocator::IsAllocThreadSafe() const { - return underlying_allocator_->IsAllocThreadSafe(); -} - -void RetryAllocator::Free(Allocation* allocation) { +void RetryAllocator::FreeImpl(Allocation* allocation) { // Delete underlying allocation first. - reinterpret_cast(allocation)->allocation_.reset(); - { - // notify all waited allocators, they can try to allocate memory after free. - std::lock_guard lock(mutex_); - cv_.notify_all(); - } - delete allocation; + underlying_allocator_->Free(allocation); + cv_.notify_all(); } Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { auto alloc_func = [&, this]() { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); }; // In fact, we can unify the code of allocation success and failure // But it would add lock even when allocation success at the first time diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 5efcac8b108002a2a2da920173d237096de4fffa..70b9c2ba1d6f91525fa87d8f1d442da58b641f2f 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -24,32 +24,25 @@ namespace paddle { namespace memory { namespace allocation { -class RetryAllocator; - class RetryAllocator : public Allocator { public: - RetryAllocator(std::unique_ptr&& allocator, size_t retry_ms) + RetryAllocator(std::shared_ptr allocator, size_t retry_ms) : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { - EnforceCheck(); - } - - bool IsAllocThreadSafe() const override; - - private: - void EnforceCheck() { PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_.get(), - "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); + underlying_allocator_, + "UnderlyingAllocator of RetryAllocator must not be null"); PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), "UnderlyingAllocator of RetryAllocator must be thread-safe"); } + bool IsAllocThreadSafe() const override { return true; } + protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::chrono::milliseconds retry_time_; std::mutex mutex_; std::condition_variable cv_; @@ -57,8 +50,6 @@ class RetryAllocator : public Allocator { // For debug, We can add an atomic integer to record how many memory sizes are // waited to allocate // std::atomic waited_allocate_size_{0}; - - friend class RetryAllocation; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc index cb2df1a029815478bbc9d3b09425f3ef145c5fb3..a0211b6d83281316f8f669a6039cb3ed3ed1e464 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ b/paddle/fluid/memory/allocation/zero_size_allocator.cc @@ -22,6 +22,14 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { return underlying_allocator_->IsAllocThreadSafe(); } +void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { + if (dynamic_cast(allocation)) { + delete allocation; + } else { + underlying_allocator_->Free(allocation); + } +} + Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (size == 0) { return new ZeroSizeAllocation(place_); diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h index 6b80245a34e7a6834aa75a90218845cc92036881..e6081798364272e41b13a9914bc92160dbda70bf 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ b/paddle/fluid/memory/allocation/zero_size_allocator.h @@ -39,6 +39,7 @@ class ZeroSizeAllocator : public Allocator { protected: Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; + void FreeImpl(Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc index 9cbdfe46e78dc84e58eae6929c887221d9562c69..6cb4ec1da5e324aece08234ea56704bb5df001ff 100644 --- a/paddle/fluid/platform/temporary_allocator.cc +++ b/paddle/fluid/platform/temporary_allocator.cc @@ -29,38 +29,31 @@ namespace paddle { namespace platform { namespace alloc = memory::allocation; -TemporaryAllocation::TemporaryAllocation( - alloc::AllocationPtr &&underlying_allocation) - : Allocation(underlying_allocation->ptr(), underlying_allocation->size(), - underlying_allocation->place()), - underlying_allocation_(std::move(underlying_allocation)) {} - TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } void TemporaryAllocator::Release(const std::function &callback) { - std::unique_ptr> t_allocations; + std::unique_ptr> t_allocations; { std::unique_lock lock(mtx_); callback(); t_allocations.swap(temp_mem_map_); - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); wait_delete_mem_ = 0; } + alloc::AllocationDeleter deleter; for (auto tmp : *t_allocations) { VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() << " size: " << tmp.second->size(); - delete tmp.second; + deleter(tmp.second); } } -void TemporaryAllocator::Free(alloc::Allocation *allocation) { - auto *temp_allocation = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(temp_allocation); +void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { if (platform::is_gpu_place(temp_allocation->place())) { PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), "The place should be the same."); @@ -84,7 +77,6 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { } VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() << " size: " << temp_allocation->size(); - delete temp_allocation; } size_t TemporaryAllocator::TemporaryAllocationQueueSize() { @@ -119,11 +111,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( } // If not find the the available allocation, get allocation from // AllocatorFacadeInstance. - auto raw_allocation = - alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); - auto temp_mem = new TemporaryAllocation(std::move(raw_allocation)); + auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; - return temp_mem; + return temp_mem.release(); } } // namespace platform diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index d657a14223326aa1e2cb5b154a10a56ae742f95c..cead316ed94135f66eeb81d3bf911986660d3f43 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -22,14 +22,6 @@ namespace paddle { namespace platform { -class TemporaryAllocation : public memory::allocation::Allocation { - public: - explicit TemporaryAllocation( - memory::allocation::AllocationPtr &&underlying_allocation); - - memory::allocation::AllocationPtr underlying_allocation_; -}; - /*! \brief the TemporaryAllocator is used to alloc the temporary allocation * which used by CUDA's async operation. * @@ -56,7 +48,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { void SetCallback(const std::function &callback); protected: - void Free(memory::allocation::Allocation *allocation) override; + void FreeImpl(memory::allocation::Allocation *allocation) override; memory::allocation::Allocation *AllocateImpl( size_t size, memory::allocation::Allocator::Attr attr) override; @@ -65,8 +57,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { platform::Place place_; // When the allocation is not held by any variable, it should be placed // to temp_mem_map immediately. - std::unique_ptr> temp_mem_map_{ - nullptr}; + std::unique_ptr> + temp_mem_map_{nullptr}; std::mutex mtx_; size_t wait_delete_mem_{0}; std::function callback_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index cf59ff6d3b97a4be5d87f1185acc6173b5d501b2..5820d87ce2f3a4a6dbc3a49bf499fb3735920bed 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -308,6 +308,7 @@ PYBIND11_MODULE(core, m) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) + .def("_clear", &Tensor::clear) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 8102732c55be2e9922875a7f7b29d68aba1f4900..2fa6b79caf129862e501842b0f7b86926423d3a1 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -129,6 +129,7 @@ def __bootstrap__(): 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'allocator_strategy', + 'enable_buffered_allocator', 'tolerant_times', 'reader_queue_speed_test_mode', 'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism', 'enable_parallel_graph', 'multiple_of_cupti_buffer_size',