diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 6d8ba430bd0b9c9e48b4a80a07feb24b2da7d7b8..a02e53dcf764368601646a900833ac650c5bb31a 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -365,6 +365,9 @@ class ExecutionContext { auto shared_allocation = std::shared_ptr( allocation_ptr, deleter); + PADDLE_ENFORCE( + dynamic_cast(allocation_ptr) != nullptr, + "The AllocationPtr must be TemporaryAllocation."); PADDLE_ENFORCE_GE(allocation_ptr->size(), framework::product(dim) * sizeof(T)); diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 0f6014ae8aa28f090cb51401ee2cb0772bca7a45..ac77c3d2a500816a4eb41ed13f23ee628290f287 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) -cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -38,20 +37,30 @@ else () set(AllocatorFacadeDeps) endif() -list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) - cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) +cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) -cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) -cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) +cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) +cc_library(allocator_facade SRCS allocator_facade.cc DEPS + ${AllocatorFacadeDeps} + cpu_allocator + locked_allocator + best_fit_allocator + aligned_allocator + auto_increment_allocator + zero_size_allocator + conditional_allocator + retry_allocator + buffered_allocator + allocator_strategy + legacy_allocator + ) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) -cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade) - cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index b536d4276e3b6236d0748eee588d345dd15c6954..064acd06e71da98802126913e0af843cfbf717e7 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -94,8 +94,6 @@ class AlignedAllocator : public ThinAlignedAllocator { underlying_allocator_->Allocate(size + kAlignment, attr); return new AlignedAllocation(std::move(raw_allocation), size); } - - void FreeImpl(Allocation* allocation) override { delete allocation; } }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 5a5253d911abc722c026730e7e88eb326bb82afd..8fb8a5fb897a736d7515951ba08c633da9a7706c 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -27,24 +27,16 @@ bool Allocator::IsAllocThreadSafe() const { return false; } AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { auto ptr = AllocateImpl(size, attr); - ptr->RegisterDecoratedAllocator(this); + ptr->set_allocator(this); return AllocationPtr(ptr); } -void Allocator::FreeImpl(Allocation* allocation) { - Allocator* allocator = allocation->TopDecoratedAllocator(); - allocator->Free(allocation); -} - -void Allocator::Free(Allocation* allocation) { - allocation->PopDecoratedAllocator(); - FreeImpl(allocation); -} +void Allocator::Free(Allocation* allocation) { delete allocation; } const char* BadAlloc::what() const noexcept { return msg_.c_str(); } void AllocationDeleter::operator()(Allocation* allocation) const { - Allocator* allocator = allocation->TopDecoratedAllocator(); + auto* allocator = allocation->allocator(); allocator->Free(allocation); } diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 33b816b90812d7fedc450a67743b5d7d20579302..3465278935f7ce05456e94bb3a7d1ae9f114ff96 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -46,56 +46,13 @@ class Allocator; // NOTE: this is the base class of Allocation. Each allocator can use its own // allocation object. // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 - -/** - * Allocation is returned by Allocator::Allocate() method. - * - * An allocator may be decorated by another allocator. For example, we can - * decorate - * a RetryAllocator to any allocator to perform allocation retry when first - * allocation request fails. - * - * Explanations of Allocator design is as follows: - * - * Suppose we have an allocator which is decorated by several allocators: - * - * A(1) <- A(2) <- A(3) <- ... <- A(n) - * - * , and the public allocator is A(1). - * - * The allocation process would be: - * - * A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate() - * - * , and the free process would be: - * - * A(1).Free() -> A(2).Free() -> ... -> A(n).Free() - * - * Therefore, we should record the allocator chain when allocating, so - * that we can free the allocation in the reverse order of allocator chain. - * The field `decorated_allocators_` is used to record this chain. - * - * Another example is that we want to add additional fields in Allocation, - * e.g., something what is done in AlignedAllocator, etc. - * In this case, we should declare a derived class of Allocation, which - * contains an underlying Allocation allocated by the underlying allocator. - * Therefore, `decorated_allocators_` of the new Allocation object would - * be a new chain, differing from the underlying Allocation object. - */ class Allocation { public: Allocation(void* ptr, size_t size, platform::Place place) - : ptr_(ptr), size_(size), place_(place) { - // NOTE(zjl): Since decorated_allocators_ is usually a small vector - // We reserve a small buffer to it to prevent frequent heap allocation - // Not quite sure whether we need something like gtl vector. - decorated_allocators_.reserve(8); - } + : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; - Allocation(Allocation&& o) = delete; - Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method @@ -117,31 +74,17 @@ class Allocation { const platform::Place& place() const { return place_; } - virtual ~Allocation(); - - private: - const std::vector& DecoratedAllocators() const { - return decorated_allocators_; - } - - inline void RegisterDecoratedAllocator(Allocator* allocator) { - decorated_allocators_.push_back(allocator); - } + Allocator* allocator() { return allocator_; } - inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } + void set_allocator(Allocator* allocator) { allocator_ = allocator; } - inline Allocator* TopDecoratedAllocator() { - return decorated_allocators_.back(); - } + virtual ~Allocation(); private: + Allocator* allocator_; void* ptr_; size_t size_; platform::Place place_; - std::vector decorated_allocators_; - - friend class Allocator; - friend class AllocationDeleter; }; using AllocationPtr = std::unique_ptr; @@ -191,12 +134,9 @@ class Allocator { // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; - // This function should not be called outside - void Free(Allocation* allocation); - protected: + virtual void Free(Allocation* allocation); virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; - virtual void FreeImpl(Allocation* allocation); private: friend class AllocationDeleter; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 09328aded58cb0cccd9de0aba399f5c49313042f..a3b73e3ba31c89c2a94955b0fea64df4ab0ffc26 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -49,17 +49,6 @@ namespace paddle { namespace memory { namespace allocation { -static inline std::shared_ptr WrapRetryAllocator( - std::shared_ptr allocator, int64_t retry_time) { - if (retry_time > 0) { - auto* retry_allocator = - new RetryAllocator(std::move(allocator), retry_time); - allocator.reset(retry_allocator); - } - - return allocator; -} - // TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public Allocator { public: @@ -123,10 +112,14 @@ class ChunkedAllocator : public Allocator { std::shared_ptr CreateAllocatorWithChunk() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); - std::shared_ptr allocator(new LockedAllocator( - std::shared_ptr(new BestFitAllocator(allocation)))); + std::unique_ptr allocator(new LockedAllocator( + std::unique_ptr(new BestFitAllocator(allocation)))); - allocator = WrapRetryAllocator(allocator, retry_time_); + if (retry_time_ > 0) { + auto* retry_allocator = + new RetryAllocator(std::move(allocator), retry_time_); + allocator.reset(retry_allocator); + } return std::make_shared>(std::move(allocator)); } @@ -197,23 +190,13 @@ class AllocatorFacadePrivate { ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { - auto strategy = GetAllocatorStrategy(); - switch (strategy) { - case AllocatorStrategy::kLegacy: { - InitLegacyAllocator(); - break; - } - case AllocatorStrategy::kNaiveBestFit: { - InitCPUAllocator(); - InitCUDAAllocator(); - InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); - break; - } - default: { - PADDLE_THROW("Unsupported allocator strategy: %d", - static_cast(strategy)); - } + if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { + InitLegacyAllocator(); + } else { + InitCPUAllocator(); + InitCUDAAllocator(); + InitCUDAPinnedAllocator(); + WrapZeroSizeAllocator(); } } @@ -271,7 +254,8 @@ AllocatorFacade& AllocatorFacade::Instance() { std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, Allocator::Attr attr) { - return std::shared_ptr(Alloc(place, size, attr)); + return std::shared_ptr(Alloc(place, size, attr).release(), + AllocationDeleter()); } AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index fff94c01e709613603eea7150a08df3c2611dec2..8cebda9005b29b5b3259de0830c42eb10ef90e66 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -19,22 +19,16 @@ DEFINE_string( allocator_strategy, "legacy", "The allocation strategy. Legacy means the original allocator of Fluid." - "naive_best_fit means the experimental best fit allocator. " - "allocator. Enum in [legacy, naive_best_fit]."); + "New means the experimental allocators of Fluid. in [legacy, new]"); namespace paddle { namespace memory { namespace allocation { static AllocatorStrategy GetStrategyFromFlag() { - if (FLAGS_allocator_strategy == "legacy") { - return AllocatorStrategy::kLegacy; - } else if (FLAGS_allocator_strategy == "naive_best_fit") { - return AllocatorStrategy::kNaiveBestFit; - } else { - PADDLE_THROW("Unsupported allocator strategy: %s", - FLAGS_allocator_strategy); - } + return FLAGS_allocator_strategy == "legacy" + ? AllocatorStrategy::kLegacy + : AllocatorStrategy::kNaiveBestFit; } AllocatorStrategy GetAllocatorStrategy() { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index d87dd9a4b6df288065389a335a9ddb4047dd096a..e3d6c2f511ef083ef9ecc1fe8df96051b2b85cc2 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::FreeImpl(Allocation* allocation) { +void BestFitAllocator::Free(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(bf_allocation, "The input allocation is not BestFitAllocation."); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index c137438c0c35a575d366a1dfdf950262f711defa..4f10f2b53e8543d4197097f1cae8de765bceeb0f 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index e04c0aa34b1cd6200806cc2a012161e3478eca0b..fc75abc9dfee6c9df5bc87faa493002cc1fe6298 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,11 +22,11 @@ namespace paddle { namespace memory { namespace allocation { -BufferedAllocator::BufferedAllocator(std::shared_ptr allocator) +BufferedAllocator::BufferedAllocator(std::unique_ptr &&allocator) : underlying_allocator_(std::move(allocator)) { PADDLE_ENFORCE_NOT_NULL( underlying_allocator_, - "Underlying allocator of BufferedAllocator must not be null"); + "Underlying allocator of BufferedAllocator must be unmanaged"); if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } @@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { while (!allocations_.empty()) { // free the largest auto it = --allocations_.end(); cur += it->second->size(); - underlying_allocator_->Free(it->second.release()); + delete it->second.release(); allocations_.erase(it); if (cur >= size) return; } } -bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } - -void BufferedAllocator::FreeImpl(Allocation *allocation) { +bool BufferedAllocator::IsAllocThreadSafe() const { + return this->underlying_allocator_->IsAllocThreadSafe(); +} +void BufferedAllocator::Free(Allocation *allocation) { platform::LockGuardPtr guard(mtx_); allocations_.emplace(allocation->size(), AllocationPtr(allocation)); } - Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { { platform::LockGuardPtr guard(mtx_); @@ -61,15 +61,17 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (it != allocations_.end() && it->first < size * 2) { AllocationPtr result(std::move(it->second)); allocations_.erase(it); - return result.release(); + return new AllocationWithUnderlying(std::move(result)); } } try { - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } catch (BadAlloc &) { FreeCache(size); - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index c728395705842d29a7b2a8441a7048a7e4bf5e6b..d44a3f85beba712b1e735ba14008689bce7d0d64 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -31,7 +31,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::shared_ptr allocator); + explicit BufferedAllocator(std::unique_ptr &&allocator); ~BufferedAllocator(); @@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::multimap allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index 854a117b0e7532962d5e0c95fd947527ac3b307a..c8bd5292ca0f6c3e7ebdc7f5908523b0b7c8ba3a 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/memory/allocation/buffered_allocator.h" #include +#include #include #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" @@ -65,7 +66,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void FreeImpl(Allocation *allocation) override { + void Free(Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(alloc); if (alloc->ptr()) delete[] static_cast(alloc->ptr()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index 90c49c87a677aa38bce35774b3a7bb698e6f43e7..cc81a6f7b8b1950b07b6fb1571b53d9b5ddb1b9f 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -20,27 +20,25 @@ namespace paddle { namespace memory { namespace allocation { +CPUAllocation::CPUAllocation(void *ptr, size_t size) + : Allocation(ptr, size, platform::CPUPlace()) {} + bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::FreeImpl(Allocation *allocation) { - void *p = allocation->ptr(); -#ifdef _WIN32 - _aligned_free(p); -#else - free(p); -#endif +void CPUAllocator::Free(Allocation *allocation) { + PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); + free(allocation->ptr()); delete allocation; } Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - void *p; -#ifdef _WIN32 - p = _aligned_malloc(size, kAlignment); -#else - PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", - size); -#endif - return new Allocation(p, size, platform::CPUPlace()); + void *ptr; + auto status = posix_memalign(&ptr, kAlignment, size); + if (UNLIKELY(status) != 0) { + throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", + size, status)); + } + return new CPUAllocation(ptr, size); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 3eb1416b0efa9327f2052e1f128359bc93f94986..26d3643f4edff1f2d71b1c761e915a6dacb485ad 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -31,13 +31,19 @@ namespace allocation { // // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // an open-sourced allocator into Paddle. +class CPUAllocator; +class CPUAllocation : public Allocation { + public: + CPUAllocation(void* ptr, size_t size); +}; + class CPUAllocator : public Allocator { public: - constexpr static size_t kAlignment = 4096UL; + constexpr static size_t kAlignment = 64u; bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 895a24a6a2a6b8e399ec2ace48136d1ef16c62f6..430bf0be98e08787ac4412a8b6e0fcc310ffe2b4 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -23,14 +23,15 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::FreeImpl(Allocation* allocation) { +void CUDAAllocator::Free(Allocation* allocation) { platform::CUDADeviceGuard guard(place_.device); - PADDLE_ENFORCE_EQ(boost::get(allocation->place()), + auto* cuda_allocation = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(cuda_allocation); + PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), place_); PADDLE_ENFORCE(cudaFree(allocation->ptr())); delete allocation; } - Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::CUDADeviceGuard guard(place_.device); void* ptr; @@ -40,9 +41,8 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, status, cudaGetErrorString(status))); } - return new Allocation(ptr, size, platform::Place(place_)); + return new CUDAAllocation(ptr, size, platform::Place(place_)); } - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 580a2d1df1d5997a27180740393741ec8973bf18..63726f5820b1c81565117c7a9bf798c17c9681f6 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -20,6 +20,13 @@ namespace paddle { namespace memory { namespace allocation { +// CUDA System allocator and allocation. +// Just a flag type. +class CUDAAllocation : public Allocation { + public: + using Allocation::Allocation; +}; + class CUDAAllocator : public Allocator { public: explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} @@ -28,7 +35,7 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 0dc2de37467b7e7d23c88b4a255c14795db4c275..514ac7883ad2effdf3518be8afe3f448a5ac10b2 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -134,22 +134,26 @@ size_t Used(const platform::CPUPlace &place) { } #ifdef PADDLE_WITH_CUDA -class GPUBuddyAllocatorList { - public: - GPUBuddyAllocatorList() - : allocators_(platform::GetCUDADeviceCount()), - flags_(platform::GetCUDADeviceCount()) { - allocation::GPUMemMonitor.Initialize(allocators_.size()); - } +BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { + static std::once_flag init_flag; + static detail::BuddyAllocator **a_arr = nullptr; + static std::vector devices; + + std::call_once(init_flag, [gpu_id]() { + devices = platform::GetSelectedDevices(); + int gpu_num = devices.size(); - BuddyAllocator *Get(size_t dev_id) { - PADDLE_ENFORCE(dev_id < flags_.size(), "Invalid device id %s", dev_id); - std::call_once(flags_[dev_id], [this, dev_id] { + allocation::GPUMemMonitor.Initialize(devices.size()); + + a_arr = new BuddyAllocator *[gpu_num]; + for (size_t i = 0; i < devices.size(); ++i) { + int dev_id = devices[i]; + a_arr[i] = nullptr; platform::SetDeviceId(dev_id); - allocators_[dev_id] = new BuddyAllocator( - std::unique_ptr( - new detail::GPUAllocator(dev_id)), - platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); + a_arr[i] = new BuddyAllocator(std::unique_ptr( + new detail::GPUAllocator(dev_id)), + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize()); VLOG(10) << "\n\nNOTE:\n" << "You can set GFlags environment variable " @@ -163,19 +167,13 @@ class GPUBuddyAllocatorList { << FLAGS_initial_gpu_memory_in_mb << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; - }); - return allocators_[dev_id]; - } - - private: - std::vector allocators_; - std::vector flags_; -}; + } + }); -BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { - static GPUBuddyAllocatorList allocators; platform::SetDeviceId(gpu_id); - return allocators.Get(gpu_id); + auto pos = std::distance(devices.begin(), + std::find(devices.begin(), devices.end(), gpu_id)); + return a_arr[pos]; } #endif @@ -194,7 +192,7 @@ void *Alloc(const platform::CUDAPlace &place, #ifdef PADDLE_WITH_CUDA auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *ptr = buddy_allocator->Alloc(size); - if (ptr == nullptr && size > 0) { + if (ptr == nullptr) { int cur_dev = platform::GetCurrentDeviceId(); platform::SetDeviceId(place.device); size_t avail, total; @@ -349,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { return tmp_alloc; } -void LegacyAllocator::FreeImpl(Allocation *allocation) { +void LegacyAllocator::Free(Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index 27cd42ea35012f07ae7db79c46d767138ddaafff..d9bdae153da6439598f76f5cac226897e6e0c596 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { protected: Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index c43099cc88f839ad92d36774d49aafd7192f916f..62d768c580607f32db8c49eb3d62f0f32c9dbeeb 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -17,7 +17,6 @@ #include #include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/platform/lock_guard_ptr.h" - namespace paddle { namespace memory { namespace allocation { @@ -25,24 +24,26 @@ namespace allocation { bool LockedAllocator::IsAllocThreadSafe() const { return true; } LockedAllocator::LockedAllocator( - std::shared_ptr underlying_allocator) + std::unique_ptr &&underlying_allocator) : underlying_allocator_(std::move(underlying_allocator)) { PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); if (!underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } } - -void LockedAllocator::FreeImpl(Allocation *allocation) { - platform::LockGuardPtr guard(mtx_); - underlying_allocator_->Free(allocation); +void LockedAllocator::Free(Allocation *allocation) { + { + platform::LockGuardPtr guard(mtx_); + reinterpret_cast(allocation) + ->allocation_.reset(); // Destroy inner allocation + } + delete allocation; } - Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::LockGuardPtr guard(mtx_); - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index b735ccef101417b3f880eb6dcdd9964cffbe875c..4967b9bb8d3ad101cff4657b0a45b49b76e2deb2 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -24,15 +24,15 @@ namespace allocation { // A allocator to make underlying allocator thread safe. class LockedAllocator : public Allocator { public: - explicit LockedAllocator(std::shared_ptr underlying_allocator); + explicit LockedAllocator(std::unique_ptr &&underlying_allocator); bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc deleted file mode 100644 index 3334589a4beb407447cf89c173f6128654bb245a..0000000000000000000000000000000000000000 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "paddle/fluid/memory/allocation/allocator_facade.h" - -#ifdef PADDLE_WITH_CUDA -DECLARE_double(fraction_of_gpu_memory_to_use); -DECLARE_double(fraction_of_cuda_pinned_memory_to_use); -DECLARE_int64(gpu_allocator_retry_time); -#endif - -DECLARE_string(allocator_strategy); - -namespace paddle { -namespace memory { -namespace allocation { - -TEST(allocator, allocator) { -#ifdef PADDLE_WITH_CUDA - FLAGS_fraction_of_gpu_memory_to_use = 0.01; - FLAGS_gpu_allocator_retry_time = 500; - FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5; -#endif - - FLAGS_allocator_strategy = "naive_best_fit"; - - auto &instance = AllocatorFacade::Instance(); - platform::Place place; - size_t size = 1024; - - { - place = platform::CPUPlace(); - size = 1024; - auto cpu_allocation = instance.Alloc(place, size); - ASSERT_NE(cpu_allocation, nullptr); - ASSERT_NE(cpu_allocation->ptr(), nullptr); - ASSERT_EQ(cpu_allocation->place(), place); - ASSERT_EQ(cpu_allocation->size(), size); - } - -#ifdef PADDLE_WITH_CUDA - { - place = platform::CUDAPlace(0); - size = 1024; - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), size); - } - - { - // Allocate 2GB gpu memory - place = platform::CUDAPlace(0); - size = 2 * static_cast(1 << 30); - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), size); - } - - { - place = platform::CUDAPinnedPlace(); - size = (1 << 20); - auto cuda_pinned_allocation = - instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20); - ASSERT_NE(cuda_pinned_allocation, nullptr); - ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr); - ASSERT_EQ(cuda_pinned_allocation->place(), place); - ASSERT_GE(cuda_pinned_allocation->size(), size); - } -#endif -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index 5a3d817211750d3e19e65344d1eab5a96800c674..de81d12cca6ca280289371abdec225c9e2b8f4d0 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -20,15 +20,20 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { +void CPUPinnedAllocator::Free(Allocation *allocation) { + PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); delete allocation; } Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { + // PADDLE_ENFORCE_EQ( + // attr, kCrossDevice, + // "CPUPinnedAllocator should be used for Cross-Device Communication"); + void *ptr; PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); - return new Allocation(ptr, size, platform::CUDAPinnedPlace()); + return new CPUPinnedAllocation(ptr, size); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index deeb55a8fb0396a312286f5c2692114e9e4afc8d..42d0938f2afbb1efca8bfdd7035bc0eada30f06b 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -20,12 +20,18 @@ namespace memory { namespace allocation { // Allocator uses `cudaHostAlloc` +class CPUPinnedAllocation : public Allocation { + public: + CPUPinnedAllocation(void *ptr, size_t size) + : Allocation(ptr, size, platform::CUDAPinnedPlace()) {} +}; + class CPUPinnedAllocator : public Allocator { public: bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 7e888988f9602e362d73f64c1b45552e84e3349c..981705051b449e6a35c2dcce9138dc2efae52920 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -18,15 +18,25 @@ namespace paddle { namespace memory { namespace allocation { -void RetryAllocator::FreeImpl(Allocation* allocation) { +bool RetryAllocator::IsAllocThreadSafe() const { + return underlying_allocator_->IsAllocThreadSafe(); +} + +void RetryAllocator::Free(Allocation* allocation) { // Delete underlying allocation first. - underlying_allocator_->Free(allocation); - cv_.notify_all(); + reinterpret_cast(allocation)->allocation_.reset(); + { + // notify all waited allocators, they can try to allocate memory after free. + std::lock_guard lock(mutex_); + cv_.notify_all(); + } + delete allocation; } Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { auto alloc_func = [&, this]() { - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); }; // In fact, we can unify the code of allocation success and failure // But it would add lock even when allocation success at the first time diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 379f576d6e1ed8f256a0233b203423a487ee73e4..6ab8ca8fbec0077b2c95cf727731ca0095716197 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -25,25 +25,32 @@ namespace paddle { namespace memory { namespace allocation { +class RetryAllocator; + class RetryAllocator : public Allocator { public: - RetryAllocator(std::shared_ptr allocator, size_t retry_ms) + RetryAllocator(std::unique_ptr&& allocator, size_t retry_ms) : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { + EnforceCheck(); + } + + bool IsAllocThreadSafe() const override; + + private: + void EnforceCheck() { PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_, - "UnderlyingAllocator of RetryAllocator must not be null"); + underlying_allocator_.get(), + "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), "UnderlyingAllocator of RetryAllocator must be thread-safe"); } - bool IsAllocThreadSafe() const override { return true; } - protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::chrono::milliseconds retry_time_; std::mutex mutex_; std::condition_variable cv_; @@ -51,6 +58,8 @@ class RetryAllocator : public Allocator { // For debug, We can add an atomic integer to record how many memory sizes are // waited to allocate // std::atomic waited_allocate_size_{0}; + + friend class RetryAllocation; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc index 39743bcb10c700c9a8446b9040c8a8707d57ec7d..cb2df1a029815478bbc9d3b09425f3ef145c5fb3 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ b/paddle/fluid/memory/allocation/zero_size_allocator.cc @@ -24,20 +24,11 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (size == 0) { - return new Allocation(nullptr, 0, place_); + return new ZeroSizeAllocation(place_); } else { return underlying_allocator_->Allocate(size, attr).release(); } } - -void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { - if (allocation->size() == 0) { - delete allocation; - } else { - underlying_allocator_->Free(allocation); - } -} - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h index 08a7a06dbf290b55994a407fe478f792b0c0964a..0f01dfcdf5b1179c52d8c0204b655cab10770d95 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ b/paddle/fluid/memory/allocation/zero_size_allocator.h @@ -24,6 +24,12 @@ namespace allocation { // The allocator handles the request's size is zero. Allocator will always // return an allocation even the request size is zero. However, the // allocation.ptr() is nullptr +class ZeroSizeAllocation : public Allocation { + public: + explicit ZeroSizeAllocation(const platform::Place& p) + : Allocation(nullptr, 0, p) {} +}; + class ZeroSizeAllocator : public Allocator { public: ZeroSizeAllocator(std::shared_ptr underlying_allocator, @@ -34,7 +40,6 @@ class ZeroSizeAllocator : public Allocator { protected: Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; - void FreeImpl(Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc index ddde7baf4cf3b44ac5d8a22fcc98acef50294575..250efe70fd631fc58be7b7c81c75ea53bd55b5cd 100644 --- a/paddle/fluid/platform/temporary_allocator.cc +++ b/paddle/fluid/platform/temporary_allocator.cc @@ -30,31 +30,38 @@ namespace paddle { namespace platform { namespace alloc = memory::allocation; +TemporaryAllocation::TemporaryAllocation( + alloc::AllocationPtr &&underlying_allocation) + : Allocation(underlying_allocation->ptr(), underlying_allocation->size(), + underlying_allocation->place()), + underlying_allocation_(std::move(underlying_allocation)) {} + TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } void TemporaryAllocator::Release(const std::function &callback) { - std::unique_ptr> t_allocations; + std::unique_ptr> t_allocations; { std::unique_lock lock(mtx_); callback(); t_allocations.swap(temp_mem_map_); - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); wait_delete_mem_ = 0; } - alloc::AllocationDeleter deleter; for (auto tmp : *t_allocations) { VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() << " size: " << tmp.second->size(); - deleter(tmp.second); + delete tmp.second; } } -void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { +void TemporaryAllocator::Free(alloc::Allocation *allocation) { + auto *temp_allocation = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(temp_allocation); if (platform::is_gpu_place(temp_allocation->place())) { PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), "The place should be the same."); @@ -78,7 +85,7 @@ void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { } VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() << " size: " << temp_allocation->size(); - alloc::AllocationDeleter()(temp_allocation); + delete temp_allocation; } size_t TemporaryAllocator::TemporaryAllocationQueueSize() { @@ -113,9 +120,11 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( } // If not find the the available allocation, get allocation from // AllocatorFacadeInstance. - auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); + auto raw_allocation = + alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); + auto temp_mem = new TemporaryAllocation(std::move(raw_allocation)); VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; - return temp_mem.release(); + return temp_mem; } } // namespace platform diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index 912d45eaf17fe8c05840995275dd3e2e688b38ef..f8a43b889d58d5e027aac8e08324cf51b7d82913 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -23,6 +23,14 @@ namespace paddle { namespace platform { +class TemporaryAllocation : public memory::allocation::Allocation { + public: + explicit TemporaryAllocation( + memory::allocation::AllocationPtr &&underlying_allocation); + + memory::allocation::AllocationPtr underlying_allocation_; +}; + /*! \brief the TemporaryAllocator is used to alloc the temporary allocation * which used by CUDA's async operation. * @@ -49,7 +57,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { void SetCallback(const std::function &callback); protected: - void FreeImpl(memory::allocation::Allocation *allocation) override; + void Free(memory::allocation::Allocation *allocation) override; memory::allocation::Allocation *AllocateImpl( size_t size, memory::allocation::Allocator::Attr attr) override; @@ -58,8 +66,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { platform::Place place_; // When the allocation is not held by any variable, it should be placed // to temp_mem_map immediately. - std::unique_ptr> - temp_mem_map_{nullptr}; + std::unique_ptr> temp_mem_map_{ + nullptr}; std::mutex mtx_; size_t wait_delete_mem_{0}; std::function callback_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index dca40edf0bbe334d7a9306b9b6b259b0bc9a6f5d..7bf089637862c969d27f957a630469d1644222bf 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -324,7 +324,6 @@ PYBIND11_MODULE(core, m) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) - .def("_clear", &Tensor::clear) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 66b768665b6d0b97b4ca1470020132bfc9576bbb..16bb3771f2e9bcc07028ef2039fed8691f9aab97 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -105,12 +105,14 @@ void Printf(const char* fmt, const Args&... args) { Fprintf(std::cout, fmt, args...); } -inline std::string HumanReadableSize(double f_size) { +template +std::string HumanReadableSize(T size) { size_t i = 0; + double f_size = static_cast(size); double orig = f_size; const std::vector units( {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); - while (f_size >= 1024) { + while (f_size > 1024) { f_size /= 1024; i++; }