diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index a02e53dcf764368601646a900833ac650c5bb31a..6d8ba430bd0b9c9e48b4a80a07feb24b2da7d7b8 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -365,9 +365,6 @@ class ExecutionContext { auto shared_allocation = std::shared_ptr( allocation_ptr, deleter); - PADDLE_ENFORCE( - dynamic_cast(allocation_ptr) != nullptr, - "The AllocationPtr must be TemporaryAllocation."); PADDLE_ENFORCE_GE(allocation_ptr->size(), framework::product(dim) * sizeof(T)); diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index ac77c3d2a500816a4eb41ed13f23ee628290f287..0f6014ae8aa28f090cb51401ee2cb0772bca7a45 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) +cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -37,30 +38,20 @@ else () set(AllocatorFacadeDeps) endif() +list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) + cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) -cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) -cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) -cc_library(allocator_facade SRCS allocator_facade.cc DEPS - ${AllocatorFacadeDeps} - cpu_allocator - locked_allocator - best_fit_allocator - aligned_allocator - auto_increment_allocator - zero_size_allocator - conditional_allocator - retry_allocator - buffered_allocator - allocator_strategy - legacy_allocator - ) +cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) +cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) +cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade) + cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index fc1a8e9247b16374037bfde44449fd552b44c6b4..b536d4276e3b6236d0748eee588d345dd15c6954 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include "paddle/fluid/memory/allocation/allocator.h" namespace paddle { @@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator { underlying_allocator_->Allocate(size + kAlignment, attr); return new AlignedAllocation(std::move(raw_allocation), size); } + + void FreeImpl(Allocation* allocation) override { delete allocation; } }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 8fb8a5fb897a736d7515951ba08c633da9a7706c..5a5253d911abc722c026730e7e88eb326bb82afd 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; } AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { auto ptr = AllocateImpl(size, attr); - ptr->set_allocator(this); + ptr->RegisterDecoratedAllocator(this); return AllocationPtr(ptr); } -void Allocator::Free(Allocation* allocation) { delete allocation; } +void Allocator::FreeImpl(Allocation* allocation) { + Allocator* allocator = allocation->TopDecoratedAllocator(); + allocator->Free(allocation); +} + +void Allocator::Free(Allocation* allocation) { + allocation->PopDecoratedAllocator(); + FreeImpl(allocation); +} const char* BadAlloc::what() const noexcept { return msg_.c_str(); } void AllocationDeleter::operator()(Allocation* allocation) const { - auto* allocator = allocation->allocator(); + Allocator* allocator = allocation->TopDecoratedAllocator(); allocator->Free(allocation); } diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index f2b6f438c382275cab4ecf9aceea1c55e5885dee..33b816b90812d7fedc450a67743b5d7d20579302 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -15,6 +15,8 @@ #pragma once #include #include +#include +#include #include "paddle/fluid/platform/place.h" namespace paddle { @@ -44,13 +46,56 @@ class Allocator; // NOTE: this is the base class of Allocation. Each allocator can use its own // allocation object. // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 + +/** + * Allocation is returned by Allocator::Allocate() method. + * + * An allocator may be decorated by another allocator. For example, we can + * decorate + * a RetryAllocator to any allocator to perform allocation retry when first + * allocation request fails. + * + * Explanations of Allocator design is as follows: + * + * Suppose we have an allocator which is decorated by several allocators: + * + * A(1) <- A(2) <- A(3) <- ... <- A(n) + * + * , and the public allocator is A(1). + * + * The allocation process would be: + * + * A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate() + * + * , and the free process would be: + * + * A(1).Free() -> A(2).Free() -> ... -> A(n).Free() + * + * Therefore, we should record the allocator chain when allocating, so + * that we can free the allocation in the reverse order of allocator chain. + * The field `decorated_allocators_` is used to record this chain. + * + * Another example is that we want to add additional fields in Allocation, + * e.g., something what is done in AlignedAllocator, etc. + * In this case, we should declare a derived class of Allocation, which + * contains an underlying Allocation allocated by the underlying allocator. + * Therefore, `decorated_allocators_` of the new Allocation object would + * be a new chain, differing from the underlying Allocation object. + */ class Allocation { public: Allocation(void* ptr, size_t size, platform::Place place) - : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} + : ptr_(ptr), size_(size), place_(place) { + // NOTE(zjl): Since decorated_allocators_ is usually a small vector + // We reserve a small buffer to it to prevent frequent heap allocation + // Not quite sure whether we need something like gtl vector. + decorated_allocators_.reserve(8); + } Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; + Allocation(Allocation&& o) = delete; + Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method @@ -72,17 +117,31 @@ class Allocation { const platform::Place& place() const { return place_; } - Allocator* allocator() { return allocator_; } + virtual ~Allocation(); - void set_allocator(Allocator* allocator) { allocator_ = allocator; } + private: + const std::vector& DecoratedAllocators() const { + return decorated_allocators_; + } - virtual ~Allocation(); + inline void RegisterDecoratedAllocator(Allocator* allocator) { + decorated_allocators_.push_back(allocator); + } + + inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } + + inline Allocator* TopDecoratedAllocator() { + return decorated_allocators_.back(); + } private: - Allocator* allocator_; void* ptr_; size_t size_; platform::Place place_; + std::vector decorated_allocators_; + + friend class Allocator; + friend class AllocationDeleter; }; using AllocationPtr = std::unique_ptr; @@ -132,9 +191,12 @@ class Allocator { // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; + // This function should not be called outside + void Free(Allocation* allocation); + protected: - virtual void Free(Allocation* allocation); virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; + virtual void FreeImpl(Allocation* allocation); private: friend class AllocationDeleter; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index ea0b729dc6f62f517877e060cb0ecbe5c1d22e61..09328aded58cb0cccd9de0aba399f5c49313042f 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" @@ -30,6 +31,7 @@ #include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/memory/allocation/cuda_allocator.h" @@ -47,6 +49,17 @@ namespace paddle { namespace memory { namespace allocation { +static inline std::shared_ptr WrapRetryAllocator( + std::shared_ptr allocator, int64_t retry_time) { + if (retry_time > 0) { + auto* retry_allocator = + new RetryAllocator(std::move(allocator), retry_time); + allocator.reset(retry_allocator); + } + + return allocator; +} + // TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public Allocator { public: @@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator { std::shared_ptr CreateAllocatorWithChunk() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); - std::unique_ptr allocator(new LockedAllocator( - std::unique_ptr(new BestFitAllocator(allocation)))); + std::shared_ptr allocator(new LockedAllocator( + std::shared_ptr(new BestFitAllocator(allocation)))); - if (retry_time_ > 0) { - auto* retry_allocator = - new RetryAllocator(std::move(allocator), retry_time_); - allocator.reset(retry_allocator); - } + allocator = WrapRetryAllocator(allocator, retry_time_); return std::make_shared>(std::move(allocator)); } @@ -188,13 +197,23 @@ class AllocatorFacadePrivate { ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { - if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { - InitLegacyAllocator(); - } else { - InitCPUAllocator(); - InitCUDAAllocator(); - InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); + auto strategy = GetAllocatorStrategy(); + switch (strategy) { + case AllocatorStrategy::kLegacy: { + InitLegacyAllocator(); + break; + } + case AllocatorStrategy::kNaiveBestFit: { + InitCPUAllocator(); + InitCUDAAllocator(); + InitCUDAPinnedAllocator(); + WrapZeroSizeAllocator(); + break; + } + default: { + PADDLE_THROW("Unsupported allocator strategy: %d", + static_cast(strategy)); + } } } @@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() { std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, Allocator::Attr attr) { - return std::shared_ptr(Alloc(place, size, attr).release(), - AllocationDeleter()); + return std::shared_ptr(Alloc(place, size, attr)); } AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index b46b1e9ae206b82f5810b4ba7345ebc60fb84285..fff94c01e709613603eea7150a08df3c2611dec2 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -14,20 +14,27 @@ #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "gflags/gflags.h" +#include "paddle/fluid/platform/enforce.h" DEFINE_string( allocator_strategy, "legacy", "The allocation strategy. Legacy means the original allocator of Fluid." - "New means the experimental allocators of Fluid. in [legacy, new]"); + "naive_best_fit means the experimental best fit allocator. " + "allocator. Enum in [legacy, naive_best_fit]."); namespace paddle { namespace memory { namespace allocation { static AllocatorStrategy GetStrategyFromFlag() { - return FLAGS_allocator_strategy == "legacy" - ? AllocatorStrategy::kLegacy - : AllocatorStrategy::kNaiveBestFit; + if (FLAGS_allocator_strategy == "legacy") { + return AllocatorStrategy::kLegacy; + } else if (FLAGS_allocator_strategy == "naive_best_fit") { + return AllocatorStrategy::kNaiveBestFit; + } else { + PADDLE_THROW("Unsupported allocator strategy: %s", + FLAGS_allocator_strategy); + } } AllocatorStrategy GetAllocatorStrategy() { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index e3d6c2f511ef083ef9ecc1fe8df96051b2b85cc2..d87dd9a4b6df288065389a335a9ddb4047dd096a 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::Free(Allocation* allocation) { +void BestFitAllocator::FreeImpl(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(bf_allocation, "The input allocation is not BestFitAllocation."); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 4f10f2b53e8543d4197097f1cae8de765bceeb0f..c137438c0c35a575d366a1dfdf950262f711defa 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index fc75abc9dfee6c9df5bc87faa493002cc1fe6298..e04c0aa34b1cd6200806cc2a012161e3478eca0b 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,11 +22,11 @@ namespace paddle { namespace memory { namespace allocation { -BufferedAllocator::BufferedAllocator(std::unique_ptr &&allocator) +BufferedAllocator::BufferedAllocator(std::shared_ptr allocator) : underlying_allocator_(std::move(allocator)) { PADDLE_ENFORCE_NOT_NULL( underlying_allocator_, - "Underlying allocator of BufferedAllocator must be unmanaged"); + "Underlying allocator of BufferedAllocator must not be null"); if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } @@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { while (!allocations_.empty()) { // free the largest auto it = --allocations_.end(); cur += it->second->size(); - delete it->second.release(); + underlying_allocator_->Free(it->second.release()); allocations_.erase(it); if (cur >= size) return; } } -bool BufferedAllocator::IsAllocThreadSafe() const { - return this->underlying_allocator_->IsAllocThreadSafe(); -} -void BufferedAllocator::Free(Allocation *allocation) { +bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } + +void BufferedAllocator::FreeImpl(Allocation *allocation) { platform::LockGuardPtr guard(mtx_); allocations_.emplace(allocation->size(), AllocationPtr(allocation)); } + Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { { platform::LockGuardPtr guard(mtx_); @@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (it != allocations_.end() && it->first < size * 2) { AllocationPtr result(std::move(it->second)); allocations_.erase(it); - return new AllocationWithUnderlying(std::move(result)); + return result.release(); } } try { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } catch (BadAlloc &) { FreeCache(size); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index d44a3f85beba712b1e735ba14008689bce7d0d64..c728395705842d29a7b2a8441a7048a7e4bf5e6b 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -31,7 +31,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::unique_ptr &&allocator); + explicit BufferedAllocator(std::shared_ptr allocator); ~BufferedAllocator(); @@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::multimap allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index 41ebb9dbeaf36eafe3dff4ae294b84427f660cbf..854a117b0e7532962d5e0c95fd947527ac3b307a 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/memory/allocation/buffered_allocator.h" #include +#include #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" @@ -64,7 +65,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void Free(Allocation *allocation) override { + void FreeImpl(Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(alloc); if (alloc->ptr()) delete[] static_cast(alloc->ptr()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index cc81a6f7b8b1950b07b6fb1571b53d9b5ddb1b9f..90c49c87a677aa38bce35774b3a7bb698e6f43e7 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -20,25 +20,27 @@ namespace paddle { namespace memory { namespace allocation { -CPUAllocation::CPUAllocation(void *ptr, size_t size) - : Allocation(ptr, size, platform::CPUPlace()) {} - bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::Free(Allocation *allocation) { - PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); - free(allocation->ptr()); +void CPUAllocator::FreeImpl(Allocation *allocation) { + void *p = allocation->ptr(); +#ifdef _WIN32 + _aligned_free(p); +#else + free(p); +#endif delete allocation; } Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - void *ptr; - auto status = posix_memalign(&ptr, kAlignment, size); - if (UNLIKELY(status) != 0) { - throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", - size, status)); - } - return new CPUAllocation(ptr, size); + void *p; +#ifdef _WIN32 + p = _aligned_malloc(size, kAlignment); +#else + PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", + size); +#endif + return new Allocation(p, size, platform::CPUPlace()); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 26d3643f4edff1f2d71b1c761e915a6dacb485ad..3eb1416b0efa9327f2052e1f128359bc93f94986 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -31,19 +31,13 @@ namespace allocation { // // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // an open-sourced allocator into Paddle. -class CPUAllocator; -class CPUAllocation : public Allocation { - public: - CPUAllocation(void* ptr, size_t size); -}; - class CPUAllocator : public Allocator { public: - constexpr static size_t kAlignment = 64u; + constexpr static size_t kAlignment = 4096UL; bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 430bf0be98e08787ac4412a8b6e0fcc310ffe2b4..895a24a6a2a6b8e399ec2ace48136d1ef16c62f6 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -23,15 +23,14 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::Free(Allocation* allocation) { +void CUDAAllocator::FreeImpl(Allocation* allocation) { platform::CUDADeviceGuard guard(place_.device); - auto* cuda_allocation = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(cuda_allocation); - PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), + PADDLE_ENFORCE_EQ(boost::get(allocation->place()), place_); PADDLE_ENFORCE(cudaFree(allocation->ptr())); delete allocation; } + Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::CUDADeviceGuard guard(place_.device); void* ptr; @@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, status, cudaGetErrorString(status))); } - return new CUDAAllocation(ptr, size, platform::Place(place_)); + return new Allocation(ptr, size, platform::Place(place_)); } + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 63726f5820b1c81565117c7a9bf798c17c9681f6..580a2d1df1d5997a27180740393741ec8973bf18 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -20,13 +20,6 @@ namespace paddle { namespace memory { namespace allocation { -// CUDA System allocator and allocation. -// Just a flag type. -class CUDAAllocation : public Allocation { - public: - using Allocation::Allocation; -}; - class CUDAAllocator : public Allocator { public: explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} @@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 514ac7883ad2effdf3518be8afe3f448a5ac10b2..0dc2de37467b7e7d23c88b4a255c14795db4c275 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -134,26 +134,22 @@ size_t Used(const platform::CPUPlace &place) { } #ifdef PADDLE_WITH_CUDA -BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { - static std::once_flag init_flag; - static detail::BuddyAllocator **a_arr = nullptr; - static std::vector devices; - - std::call_once(init_flag, [gpu_id]() { - devices = platform::GetSelectedDevices(); - int gpu_num = devices.size(); - - allocation::GPUMemMonitor.Initialize(devices.size()); +class GPUBuddyAllocatorList { + public: + GPUBuddyAllocatorList() + : allocators_(platform::GetCUDADeviceCount()), + flags_(platform::GetCUDADeviceCount()) { + allocation::GPUMemMonitor.Initialize(allocators_.size()); + } - a_arr = new BuddyAllocator *[gpu_num]; - for (size_t i = 0; i < devices.size(); ++i) { - int dev_id = devices[i]; - a_arr[i] = nullptr; + BuddyAllocator *Get(size_t dev_id) { + PADDLE_ENFORCE(dev_id < flags_.size(), "Invalid device id %s", dev_id); + std::call_once(flags_[dev_id], [this, dev_id] { platform::SetDeviceId(dev_id); - a_arr[i] = new BuddyAllocator(std::unique_ptr( - new detail::GPUAllocator(dev_id)), - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); + allocators_[dev_id] = new BuddyAllocator( + std::unique_ptr( + new detail::GPUAllocator(dev_id)), + platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); VLOG(10) << "\n\nNOTE:\n" << "You can set GFlags environment variable " @@ -167,13 +163,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { << FLAGS_initial_gpu_memory_in_mb << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; - } - }); + }); + return allocators_[dev_id]; + } + + private: + std::vector allocators_; + std::vector flags_; +}; +BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { + static GPUBuddyAllocatorList allocators; platform::SetDeviceId(gpu_id); - auto pos = std::distance(devices.begin(), - std::find(devices.begin(), devices.end(), gpu_id)); - return a_arr[pos]; + return allocators.Get(gpu_id); } #endif @@ -192,7 +194,7 @@ void *Alloc(const platform::CUDAPlace &place, #ifdef PADDLE_WITH_CUDA auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *ptr = buddy_allocator->Alloc(size); - if (ptr == nullptr) { + if (ptr == nullptr && size > 0) { int cur_dev = platform::GetCurrentDeviceId(); platform::SetDeviceId(place.device); size_t avail, total; @@ -347,7 +349,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { return tmp_alloc; } -void LegacyAllocator::Free(Allocation *allocation) { +void LegacyAllocator::FreeImpl(Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index d9bdae153da6439598f76f5cac226897e6e0c596..27cd42ea35012f07ae7db79c46d767138ddaafff 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { protected: Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index 835f6527c8a1d83340167bd9079f7cee25ad24cf..c43099cc88f839ad92d36774d49aafd7192f916f 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -14,8 +14,10 @@ #include "paddle/fluid/memory/allocation/locked_allocator.h" #include // NOLINT +#include #include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/platform/lock_guard_ptr.h" + namespace paddle { namespace memory { namespace allocation { @@ -23,26 +25,24 @@ namespace allocation { bool LockedAllocator::IsAllocThreadSafe() const { return true; } LockedAllocator::LockedAllocator( - std::unique_ptr &&underlying_allocator) + std::shared_ptr underlying_allocator) : underlying_allocator_(std::move(underlying_allocator)) { PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); if (!underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } } -void LockedAllocator::Free(Allocation *allocation) { - { - platform::LockGuardPtr guard(mtx_); - reinterpret_cast(allocation) - ->allocation_.reset(); // Destroy inner allocation - } - delete allocation; + +void LockedAllocator::FreeImpl(Allocation *allocation) { + platform::LockGuardPtr guard(mtx_); + underlying_allocator_->Free(allocation); } + Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::LockGuardPtr guard(mtx_); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index 4967b9bb8d3ad101cff4657b0a45b49b76e2deb2..b735ccef101417b3f880eb6dcdd9964cffbe875c 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -24,15 +24,15 @@ namespace allocation { // A allocator to make underlying allocator thread safe. class LockedAllocator : public Allocator { public: - explicit LockedAllocator(std::unique_ptr &&underlying_allocator); + explicit LockedAllocator(std::shared_ptr underlying_allocator); bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3334589a4beb407447cf89c173f6128654bb245a --- /dev/null +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/memory/allocation/allocator_facade.h" + +#ifdef PADDLE_WITH_CUDA +DECLARE_double(fraction_of_gpu_memory_to_use); +DECLARE_double(fraction_of_cuda_pinned_memory_to_use); +DECLARE_int64(gpu_allocator_retry_time); +#endif + +DECLARE_string(allocator_strategy); + +namespace paddle { +namespace memory { +namespace allocation { + +TEST(allocator, allocator) { +#ifdef PADDLE_WITH_CUDA + FLAGS_fraction_of_gpu_memory_to_use = 0.01; + FLAGS_gpu_allocator_retry_time = 500; + FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5; +#endif + + FLAGS_allocator_strategy = "naive_best_fit"; + + auto &instance = AllocatorFacade::Instance(); + platform::Place place; + size_t size = 1024; + + { + place = platform::CPUPlace(); + size = 1024; + auto cpu_allocation = instance.Alloc(place, size); + ASSERT_NE(cpu_allocation, nullptr); + ASSERT_NE(cpu_allocation->ptr(), nullptr); + ASSERT_EQ(cpu_allocation->place(), place); + ASSERT_EQ(cpu_allocation->size(), size); + } + +#ifdef PADDLE_WITH_CUDA + { + place = platform::CUDAPlace(0); + size = 1024; + auto gpu_allocation = instance.Alloc(place, size); + ASSERT_NE(gpu_allocation, nullptr); + ASSERT_NE(gpu_allocation->ptr(), nullptr); + ASSERT_EQ(gpu_allocation->place(), place); + ASSERT_GE(gpu_allocation->size(), size); + } + + { + // Allocate 2GB gpu memory + place = platform::CUDAPlace(0); + size = 2 * static_cast(1 << 30); + auto gpu_allocation = instance.Alloc(place, size); + ASSERT_NE(gpu_allocation, nullptr); + ASSERT_NE(gpu_allocation->ptr(), nullptr); + ASSERT_EQ(gpu_allocation->place(), place); + ASSERT_GE(gpu_allocation->size(), size); + } + + { + place = platform::CUDAPinnedPlace(); + size = (1 << 20); + auto cuda_pinned_allocation = + instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20); + ASSERT_NE(cuda_pinned_allocation, nullptr); + ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr); + ASSERT_EQ(cuda_pinned_allocation->place(), place); + ASSERT_GE(cuda_pinned_allocation->size(), size); + } +#endif +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index de81d12cca6ca280289371abdec225c9e2b8f4d0..5a3d817211750d3e19e65344d1eab5a96800c674 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -20,20 +20,15 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::Free(Allocation *allocation) { - PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); +void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); delete allocation; } Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - // PADDLE_ENFORCE_EQ( - // attr, kCrossDevice, - // "CPUPinnedAllocator should be used for Cross-Device Communication"); - void *ptr; PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); - return new CPUPinnedAllocation(ptr, size); + return new Allocation(ptr, size, platform::CUDAPinnedPlace()); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index 42d0938f2afbb1efca8bfdd7035bc0eada30f06b..deeb55a8fb0396a312286f5c2692114e9e4afc8d 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -20,18 +20,12 @@ namespace memory { namespace allocation { // Allocator uses `cudaHostAlloc` -class CPUPinnedAllocation : public Allocation { - public: - CPUPinnedAllocation(void *ptr, size_t size) - : Allocation(ptr, size, platform::CUDAPinnedPlace()) {} -}; - class CPUPinnedAllocator : public Allocator { public: bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 981705051b449e6a35c2dcce9138dc2efae52920..7e888988f9602e362d73f64c1b45552e84e3349c 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -18,25 +18,15 @@ namespace paddle { namespace memory { namespace allocation { -bool RetryAllocator::IsAllocThreadSafe() const { - return underlying_allocator_->IsAllocThreadSafe(); -} - -void RetryAllocator::Free(Allocation* allocation) { +void RetryAllocator::FreeImpl(Allocation* allocation) { // Delete underlying allocation first. - reinterpret_cast(allocation)->allocation_.reset(); - { - // notify all waited allocators, they can try to allocate memory after free. - std::lock_guard lock(mutex_); - cv_.notify_all(); - } - delete allocation; + underlying_allocator_->Free(allocation); + cv_.notify_all(); } Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { auto alloc_func = [&, this]() { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); }; // In fact, we can unify the code of allocation success and failure // But it would add lock even when allocation success at the first time diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 5efcac8b108002a2a2da920173d237096de4fffa..379f576d6e1ed8f256a0233b203423a487ee73e4 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -18,38 +18,32 @@ #include // NOLINT #include #include // NOLINT +#include #include "paddle/fluid/memory/allocation/allocator.h" namespace paddle { namespace memory { namespace allocation { -class RetryAllocator; - class RetryAllocator : public Allocator { public: - RetryAllocator(std::unique_ptr&& allocator, size_t retry_ms) + RetryAllocator(std::shared_ptr allocator, size_t retry_ms) : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { - EnforceCheck(); - } - - bool IsAllocThreadSafe() const override; - - private: - void EnforceCheck() { PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_.get(), - "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); + underlying_allocator_, + "UnderlyingAllocator of RetryAllocator must not be null"); PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), "UnderlyingAllocator of RetryAllocator must be thread-safe"); } + bool IsAllocThreadSafe() const override { return true; } + protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::chrono::milliseconds retry_time_; std::mutex mutex_; std::condition_variable cv_; @@ -57,8 +51,6 @@ class RetryAllocator : public Allocator { // For debug, We can add an atomic integer to record how many memory sizes are // waited to allocate // std::atomic waited_allocate_size_{0}; - - friend class RetryAllocation; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc index cb2df1a029815478bbc9d3b09425f3ef145c5fb3..39743bcb10c700c9a8446b9040c8a8707d57ec7d 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ b/paddle/fluid/memory/allocation/zero_size_allocator.cc @@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (size == 0) { - return new ZeroSizeAllocation(place_); + return new Allocation(nullptr, 0, place_); } else { return underlying_allocator_->Allocate(size, attr).release(); } } + +void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { + if (allocation->size() == 0) { + delete allocation; + } else { + underlying_allocator_->Free(allocation); + } +} + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h index 6b80245a34e7a6834aa75a90218845cc92036881..08a7a06dbf290b55994a407fe478f792b0c0964a 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ b/paddle/fluid/memory/allocation/zero_size_allocator.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include "paddle/fluid/memory/allocation/allocator.h" @@ -23,12 +24,6 @@ namespace allocation { // The allocator handles the request's size is zero. Allocator will always // return an allocation even the request size is zero. However, the // allocation.ptr() is nullptr -class ZeroSizeAllocation : public Allocation { - public: - explicit ZeroSizeAllocation(const platform::Place& p) - : Allocation(nullptr, 0, p) {} -}; - class ZeroSizeAllocator : public Allocator { public: ZeroSizeAllocator(std::shared_ptr underlying_allocator, @@ -39,6 +34,7 @@ class ZeroSizeAllocator : public Allocator { protected: Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; + void FreeImpl(Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc index 9cbdfe46e78dc84e58eae6929c887221d9562c69..ddde7baf4cf3b44ac5d8a22fcc98acef50294575 100644 --- a/paddle/fluid/platform/temporary_allocator.cc +++ b/paddle/fluid/platform/temporary_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/temporary_allocator.h" +#include #include "paddle/fluid/memory/allocation/allocator_facade.h" DEFINE_int64(limit_of_tmp_allocation, -1, @@ -29,38 +30,31 @@ namespace paddle { namespace platform { namespace alloc = memory::allocation; -TemporaryAllocation::TemporaryAllocation( - alloc::AllocationPtr &&underlying_allocation) - : Allocation(underlying_allocation->ptr(), underlying_allocation->size(), - underlying_allocation->place()), - underlying_allocation_(std::move(underlying_allocation)) {} - TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } void TemporaryAllocator::Release(const std::function &callback) { - std::unique_ptr> t_allocations; + std::unique_ptr> t_allocations; { std::unique_lock lock(mtx_); callback(); t_allocations.swap(temp_mem_map_); - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); wait_delete_mem_ = 0; } + alloc::AllocationDeleter deleter; for (auto tmp : *t_allocations) { VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() << " size: " << tmp.second->size(); - delete tmp.second; + deleter(tmp.second); } } -void TemporaryAllocator::Free(alloc::Allocation *allocation) { - auto *temp_allocation = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(temp_allocation); +void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { if (platform::is_gpu_place(temp_allocation->place())) { PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), "The place should be the same."); @@ -84,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { } VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() << " size: " << temp_allocation->size(); - delete temp_allocation; + alloc::AllocationDeleter()(temp_allocation); } size_t TemporaryAllocator::TemporaryAllocationQueueSize() { @@ -119,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( } // If not find the the available allocation, get allocation from // AllocatorFacadeInstance. - auto raw_allocation = - alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); - auto temp_mem = new TemporaryAllocation(std::move(raw_allocation)); + auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; - return temp_mem; + return temp_mem.release(); } } // namespace platform diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index d657a14223326aa1e2cb5b154a10a56ae742f95c..912d45eaf17fe8c05840995275dd3e2e688b38ef 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -16,20 +16,13 @@ #include // NOLINT #include #include +#include #include // NOLINT #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/lock_guard_ptr.h" namespace paddle { namespace platform { -class TemporaryAllocation : public memory::allocation::Allocation { - public: - explicit TemporaryAllocation( - memory::allocation::AllocationPtr &&underlying_allocation); - - memory::allocation::AllocationPtr underlying_allocation_; -}; - /*! \brief the TemporaryAllocator is used to alloc the temporary allocation * which used by CUDA's async operation. * @@ -56,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { void SetCallback(const std::function &callback); protected: - void Free(memory::allocation::Allocation *allocation) override; + void FreeImpl(memory::allocation::Allocation *allocation) override; memory::allocation::Allocation *AllocateImpl( size_t size, memory::allocation::Allocator::Attr attr) override; @@ -65,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { platform::Place place_; // When the allocation is not held by any variable, it should be placed // to temp_mem_map immediately. - std::unique_ptr> temp_mem_map_{ - nullptr}; + std::unique_ptr> + temp_mem_map_{nullptr}; std::mutex mtx_; size_t wait_delete_mem_{0}; std::function callback_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 7bf089637862c969d27f957a630469d1644222bf..dca40edf0bbe334d7a9306b9b6b259b0bc9a6f5d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -324,6 +324,7 @@ PYBIND11_MODULE(core, m) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) + .def("_clear", &Tensor::clear) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 16bb3771f2e9bcc07028ef2039fed8691f9aab97..66b768665b6d0b97b4ca1470020132bfc9576bbb 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) { Fprintf(std::cout, fmt, args...); } -template -std::string HumanReadableSize(T size) { +inline std::string HumanReadableSize(double f_size) { size_t i = 0; - double f_size = static_cast(size); double orig = f_size; const std::vector units( {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); - while (f_size > 1024) { + while (f_size >= 1024) { f_size /= 1024; i++; }