From 174d0d0b90a610807d6f82927aad4def227ee643 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Thu, 28 Mar 2019 08:52:08 +0800 Subject: [PATCH] Revert "Fix allocator bug" add include headers to fix travis-ci test=develop --- paddle/fluid/framework/operator.h | 3 + paddle/fluid/memory/allocation/CMakeLists.txt | 23 +++-- .../memory/allocation/aligned_allocator.h | 2 - paddle/fluid/memory/allocation/allocator.cc | 14 +-- paddle/fluid/memory/allocation/allocator.h | 72 ++------------- .../memory/allocation/allocator_facade.cc | 48 ++++------ .../memory/allocation/allocator_strategy.cc | 14 +-- .../memory/allocation/best_fit_allocator.cc | 2 +- .../memory/allocation/best_fit_allocator.h | 2 +- .../memory/allocation/buffered_allocator.cc | 22 +++-- .../memory/allocation/buffered_allocator.h | 6 +- .../allocation/buffered_allocator_test.cc | 3 +- .../fluid/memory/allocation/cpu_allocator.cc | 28 +++--- .../fluid/memory/allocation/cpu_allocator.h | 10 +- .../fluid/memory/allocation/cuda_allocator.cc | 10 +- .../fluid/memory/allocation/cuda_allocator.h | 9 +- .../memory/allocation/legacy_allocator.cc | 52 +++++------ .../memory/allocation/legacy_allocator.h | 2 +- .../memory/allocation/locked_allocator.cc | 19 ++-- .../memory/allocation/locked_allocator.h | 6 +- .../naive_best_fit_allocator_facade_test.cc | 91 ------------------- .../memory/allocation/pinned_allocator.cc | 9 +- .../memory/allocation/pinned_allocator.h | 8 +- .../memory/allocation/retry_allocator.cc | 18 +++- .../fluid/memory/allocation/retry_allocator.h | 23 +++-- .../memory/allocation/zero_size_allocator.cc | 11 +-- .../memory/allocation/zero_size_allocator.h | 7 +- paddle/fluid/platform/temporary_allocator.cc | 27 ++++-- paddle/fluid/platform/temporary_allocator.h | 14 ++- paddle/fluid/pybind/pybind.cc | 1 - paddle/fluid/string/printf.h | 6 +- 31 files changed, 224 insertions(+), 338 deletions(-) delete mode 100644 paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 6d8ba430b..a02e53dcf 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -365,6 +365,9 @@ class ExecutionContext { auto shared_allocation = std::shared_ptr( allocation_ptr, deleter); + PADDLE_ENFORCE( + dynamic_cast(allocation_ptr) != nullptr, + "The AllocationPtr must be TemporaryAllocation."); PADDLE_ENFORCE_GE(allocation_ptr->size(), framework::product(dim) * sizeof(T)); diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 0f6014ae8..ac77c3d2a 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) -cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -38,20 +37,30 @@ else () set(AllocatorFacadeDeps) endif() -list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) - cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) +cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) -cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) -cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) +cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) +cc_library(allocator_facade SRCS allocator_facade.cc DEPS + ${AllocatorFacadeDeps} + cpu_allocator + locked_allocator + best_fit_allocator + aligned_allocator + auto_increment_allocator + zero_size_allocator + conditional_allocator + retry_allocator + buffered_allocator + allocator_strategy + legacy_allocator + ) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) -cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade) - cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index b536d4276..064acd06e 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -94,8 +94,6 @@ class AlignedAllocator : public ThinAlignedAllocator { underlying_allocator_->Allocate(size + kAlignment, attr); return new AlignedAllocation(std::move(raw_allocation), size); } - - void FreeImpl(Allocation* allocation) override { delete allocation; } }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 5a5253d91..8fb8a5fb8 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -27,24 +27,16 @@ bool Allocator::IsAllocThreadSafe() const { return false; } AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { auto ptr = AllocateImpl(size, attr); - ptr->RegisterDecoratedAllocator(this); + ptr->set_allocator(this); return AllocationPtr(ptr); } -void Allocator::FreeImpl(Allocation* allocation) { - Allocator* allocator = allocation->TopDecoratedAllocator(); - allocator->Free(allocation); -} - -void Allocator::Free(Allocation* allocation) { - allocation->PopDecoratedAllocator(); - FreeImpl(allocation); -} +void Allocator::Free(Allocation* allocation) { delete allocation; } const char* BadAlloc::what() const noexcept { return msg_.c_str(); } void AllocationDeleter::operator()(Allocation* allocation) const { - Allocator* allocator = allocation->TopDecoratedAllocator(); + auto* allocator = allocation->allocator(); allocator->Free(allocation); } diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 33b816b90..346527893 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -46,56 +46,13 @@ class Allocator; // NOTE: this is the base class of Allocation. Each allocator can use its own // allocation object. // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 - -/** - * Allocation is returned by Allocator::Allocate() method. - * - * An allocator may be decorated by another allocator. For example, we can - * decorate - * a RetryAllocator to any allocator to perform allocation retry when first - * allocation request fails. - * - * Explanations of Allocator design is as follows: - * - * Suppose we have an allocator which is decorated by several allocators: - * - * A(1) <- A(2) <- A(3) <- ... <- A(n) - * - * , and the public allocator is A(1). - * - * The allocation process would be: - * - * A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate() - * - * , and the free process would be: - * - * A(1).Free() -> A(2).Free() -> ... -> A(n).Free() - * - * Therefore, we should record the allocator chain when allocating, so - * that we can free the allocation in the reverse order of allocator chain. - * The field `decorated_allocators_` is used to record this chain. - * - * Another example is that we want to add additional fields in Allocation, - * e.g., something what is done in AlignedAllocator, etc. - * In this case, we should declare a derived class of Allocation, which - * contains an underlying Allocation allocated by the underlying allocator. - * Therefore, `decorated_allocators_` of the new Allocation object would - * be a new chain, differing from the underlying Allocation object. - */ class Allocation { public: Allocation(void* ptr, size_t size, platform::Place place) - : ptr_(ptr), size_(size), place_(place) { - // NOTE(zjl): Since decorated_allocators_ is usually a small vector - // We reserve a small buffer to it to prevent frequent heap allocation - // Not quite sure whether we need something like gtl vector. - decorated_allocators_.reserve(8); - } + : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; - Allocation(Allocation&& o) = delete; - Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method @@ -117,31 +74,17 @@ class Allocation { const platform::Place& place() const { return place_; } - virtual ~Allocation(); - - private: - const std::vector& DecoratedAllocators() const { - return decorated_allocators_; - } - - inline void RegisterDecoratedAllocator(Allocator* allocator) { - decorated_allocators_.push_back(allocator); - } + Allocator* allocator() { return allocator_; } - inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } + void set_allocator(Allocator* allocator) { allocator_ = allocator; } - inline Allocator* TopDecoratedAllocator() { - return decorated_allocators_.back(); - } + virtual ~Allocation(); private: + Allocator* allocator_; void* ptr_; size_t size_; platform::Place place_; - std::vector decorated_allocators_; - - friend class Allocator; - friend class AllocationDeleter; }; using AllocationPtr = std::unique_ptr; @@ -191,12 +134,9 @@ class Allocator { // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; - // This function should not be called outside - void Free(Allocation* allocation); - protected: + virtual void Free(Allocation* allocation); virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; - virtual void FreeImpl(Allocation* allocation); private: friend class AllocationDeleter; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 09328aded..a3b73e3ba 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -49,17 +49,6 @@ namespace paddle { namespace memory { namespace allocation { -static inline std::shared_ptr WrapRetryAllocator( - std::shared_ptr allocator, int64_t retry_time) { - if (retry_time > 0) { - auto* retry_allocator = - new RetryAllocator(std::move(allocator), retry_time); - allocator.reset(retry_allocator); - } - - return allocator; -} - // TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public Allocator { public: @@ -123,10 +112,14 @@ class ChunkedAllocator : public Allocator { std::shared_ptr CreateAllocatorWithChunk() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); - std::shared_ptr allocator(new LockedAllocator( - std::shared_ptr(new BestFitAllocator(allocation)))); + std::unique_ptr allocator(new LockedAllocator( + std::unique_ptr(new BestFitAllocator(allocation)))); - allocator = WrapRetryAllocator(allocator, retry_time_); + if (retry_time_ > 0) { + auto* retry_allocator = + new RetryAllocator(std::move(allocator), retry_time_); + allocator.reset(retry_allocator); + } return std::make_shared>(std::move(allocator)); } @@ -197,23 +190,13 @@ class AllocatorFacadePrivate { ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { - auto strategy = GetAllocatorStrategy(); - switch (strategy) { - case AllocatorStrategy::kLegacy: { - InitLegacyAllocator(); - break; - } - case AllocatorStrategy::kNaiveBestFit: { - InitCPUAllocator(); - InitCUDAAllocator(); - InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); - break; - } - default: { - PADDLE_THROW("Unsupported allocator strategy: %d", - static_cast(strategy)); - } + if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { + InitLegacyAllocator(); + } else { + InitCPUAllocator(); + InitCUDAAllocator(); + InitCUDAPinnedAllocator(); + WrapZeroSizeAllocator(); } } @@ -271,7 +254,8 @@ AllocatorFacade& AllocatorFacade::Instance() { std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, Allocator::Attr attr) { - return std::shared_ptr(Alloc(place, size, attr)); + return std::shared_ptr(Alloc(place, size, attr).release(), + AllocationDeleter()); } AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index fff94c01e..8cebda900 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -19,22 +19,16 @@ DEFINE_string( allocator_strategy, "legacy", "The allocation strategy. Legacy means the original allocator of Fluid." - "naive_best_fit means the experimental best fit allocator. " - "allocator. Enum in [legacy, naive_best_fit]."); + "New means the experimental allocators of Fluid. in [legacy, new]"); namespace paddle { namespace memory { namespace allocation { static AllocatorStrategy GetStrategyFromFlag() { - if (FLAGS_allocator_strategy == "legacy") { - return AllocatorStrategy::kLegacy; - } else if (FLAGS_allocator_strategy == "naive_best_fit") { - return AllocatorStrategy::kNaiveBestFit; - } else { - PADDLE_THROW("Unsupported allocator strategy: %s", - FLAGS_allocator_strategy); - } + return FLAGS_allocator_strategy == "legacy" + ? AllocatorStrategy::kLegacy + : AllocatorStrategy::kNaiveBestFit; } AllocatorStrategy GetAllocatorStrategy() { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index d87dd9a4b..e3d6c2f51 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::FreeImpl(Allocation* allocation) { +void BestFitAllocator::Free(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(bf_allocation, "The input allocation is not BestFitAllocation."); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index c137438c0..4f10f2b53 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index e04c0aa34..fc75abc9d 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,11 +22,11 @@ namespace paddle { namespace memory { namespace allocation { -BufferedAllocator::BufferedAllocator(std::shared_ptr allocator) +BufferedAllocator::BufferedAllocator(std::unique_ptr &&allocator) : underlying_allocator_(std::move(allocator)) { PADDLE_ENFORCE_NOT_NULL( underlying_allocator_, - "Underlying allocator of BufferedAllocator must not be null"); + "Underlying allocator of BufferedAllocator must be unmanaged"); if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } @@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { while (!allocations_.empty()) { // free the largest auto it = --allocations_.end(); cur += it->second->size(); - underlying_allocator_->Free(it->second.release()); + delete it->second.release(); allocations_.erase(it); if (cur >= size) return; } } -bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } - -void BufferedAllocator::FreeImpl(Allocation *allocation) { +bool BufferedAllocator::IsAllocThreadSafe() const { + return this->underlying_allocator_->IsAllocThreadSafe(); +} +void BufferedAllocator::Free(Allocation *allocation) { platform::LockGuardPtr guard(mtx_); allocations_.emplace(allocation->size(), AllocationPtr(allocation)); } - Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { { platform::LockGuardPtr guard(mtx_); @@ -61,15 +61,17 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (it != allocations_.end() && it->first < size * 2) { AllocationPtr result(std::move(it->second)); allocations_.erase(it); - return result.release(); + return new AllocationWithUnderlying(std::move(result)); } } try { - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } catch (BadAlloc &) { FreeCache(size); - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index c72839570..d44a3f85b 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -31,7 +31,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::shared_ptr allocator); + explicit BufferedAllocator(std::unique_ptr &&allocator); ~BufferedAllocator(); @@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::multimap allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index 854a117b0..c8bd5292c 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/memory/allocation/buffered_allocator.h" #include +#include #include #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" @@ -65,7 +66,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void FreeImpl(Allocation *allocation) override { + void Free(Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(alloc); if (alloc->ptr()) delete[] static_cast(alloc->ptr()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index 90c49c87a..cc81a6f7b 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -20,27 +20,25 @@ namespace paddle { namespace memory { namespace allocation { +CPUAllocation::CPUAllocation(void *ptr, size_t size) + : Allocation(ptr, size, platform::CPUPlace()) {} + bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::FreeImpl(Allocation *allocation) { - void *p = allocation->ptr(); -#ifdef _WIN32 - _aligned_free(p); -#else - free(p); -#endif +void CPUAllocator::Free(Allocation *allocation) { + PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); + free(allocation->ptr()); delete allocation; } Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - void *p; -#ifdef _WIN32 - p = _aligned_malloc(size, kAlignment); -#else - PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", - size); -#endif - return new Allocation(p, size, platform::CPUPlace()); + void *ptr; + auto status = posix_memalign(&ptr, kAlignment, size); + if (UNLIKELY(status) != 0) { + throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", + size, status)); + } + return new CPUAllocation(ptr, size); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 3eb1416b0..26d3643f4 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -31,13 +31,19 @@ namespace allocation { // // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // an open-sourced allocator into Paddle. +class CPUAllocator; +class CPUAllocation : public Allocation { + public: + CPUAllocation(void* ptr, size_t size); +}; + class CPUAllocator : public Allocator { public: - constexpr static size_t kAlignment = 4096UL; + constexpr static size_t kAlignment = 64u; bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 895a24a6a..430bf0be9 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -23,14 +23,15 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::FreeImpl(Allocation* allocation) { +void CUDAAllocator::Free(Allocation* allocation) { platform::CUDADeviceGuard guard(place_.device); - PADDLE_ENFORCE_EQ(boost::get(allocation->place()), + auto* cuda_allocation = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(cuda_allocation); + PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), place_); PADDLE_ENFORCE(cudaFree(allocation->ptr())); delete allocation; } - Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::CUDADeviceGuard guard(place_.device); void* ptr; @@ -40,9 +41,8 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, status, cudaGetErrorString(status))); } - return new Allocation(ptr, size, platform::Place(place_)); + return new CUDAAllocation(ptr, size, platform::Place(place_)); } - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 580a2d1df..63726f582 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -20,6 +20,13 @@ namespace paddle { namespace memory { namespace allocation { +// CUDA System allocator and allocation. +// Just a flag type. +class CUDAAllocation : public Allocation { + public: + using Allocation::Allocation; +}; + class CUDAAllocator : public Allocator { public: explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} @@ -28,7 +35,7 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 0dc2de374..514ac7883 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -134,22 +134,26 @@ size_t Used(const platform::CPUPlace &place) { } #ifdef PADDLE_WITH_CUDA -class GPUBuddyAllocatorList { - public: - GPUBuddyAllocatorList() - : allocators_(platform::GetCUDADeviceCount()), - flags_(platform::GetCUDADeviceCount()) { - allocation::GPUMemMonitor.Initialize(allocators_.size()); - } +BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { + static std::once_flag init_flag; + static detail::BuddyAllocator **a_arr = nullptr; + static std::vector devices; + + std::call_once(init_flag, [gpu_id]() { + devices = platform::GetSelectedDevices(); + int gpu_num = devices.size(); - BuddyAllocator *Get(size_t dev_id) { - PADDLE_ENFORCE(dev_id < flags_.size(), "Invalid device id %s", dev_id); - std::call_once(flags_[dev_id], [this, dev_id] { + allocation::GPUMemMonitor.Initialize(devices.size()); + + a_arr = new BuddyAllocator *[gpu_num]; + for (size_t i = 0; i < devices.size(); ++i) { + int dev_id = devices[i]; + a_arr[i] = nullptr; platform::SetDeviceId(dev_id); - allocators_[dev_id] = new BuddyAllocator( - std::unique_ptr( - new detail::GPUAllocator(dev_id)), - platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); + a_arr[i] = new BuddyAllocator(std::unique_ptr( + new detail::GPUAllocator(dev_id)), + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize()); VLOG(10) << "\n\nNOTE:\n" << "You can set GFlags environment variable " @@ -163,19 +167,13 @@ class GPUBuddyAllocatorList { << FLAGS_initial_gpu_memory_in_mb << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; - }); - return allocators_[dev_id]; - } - - private: - std::vector allocators_; - std::vector flags_; -}; + } + }); -BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { - static GPUBuddyAllocatorList allocators; platform::SetDeviceId(gpu_id); - return allocators.Get(gpu_id); + auto pos = std::distance(devices.begin(), + std::find(devices.begin(), devices.end(), gpu_id)); + return a_arr[pos]; } #endif @@ -194,7 +192,7 @@ void *Alloc(const platform::CUDAPlace &place, #ifdef PADDLE_WITH_CUDA auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *ptr = buddy_allocator->Alloc(size); - if (ptr == nullptr && size > 0) { + if (ptr == nullptr) { int cur_dev = platform::GetCurrentDeviceId(); platform::SetDeviceId(place.device); size_t avail, total; @@ -349,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { return tmp_alloc; } -void LegacyAllocator::FreeImpl(Allocation *allocation) { +void LegacyAllocator::Free(Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index 27cd42ea3..d9bdae153 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { protected: Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index c43099cc8..62d768c58 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -17,7 +17,6 @@ #include #include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/platform/lock_guard_ptr.h" - namespace paddle { namespace memory { namespace allocation { @@ -25,24 +24,26 @@ namespace allocation { bool LockedAllocator::IsAllocThreadSafe() const { return true; } LockedAllocator::LockedAllocator( - std::shared_ptr underlying_allocator) + std::unique_ptr &&underlying_allocator) : underlying_allocator_(std::move(underlying_allocator)) { PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); if (!underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } } - -void LockedAllocator::FreeImpl(Allocation *allocation) { - platform::LockGuardPtr guard(mtx_); - underlying_allocator_->Free(allocation); +void LockedAllocator::Free(Allocation *allocation) { + { + platform::LockGuardPtr guard(mtx_); + reinterpret_cast(allocation) + ->allocation_.reset(); // Destroy inner allocation + } + delete allocation; } - Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::LockGuardPtr guard(mtx_); - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); } - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index b735ccef1..4967b9bb8 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -24,15 +24,15 @@ namespace allocation { // A allocator to make underlying allocator thread safe. class LockedAllocator : public Allocator { public: - explicit LockedAllocator(std::shared_ptr underlying_allocator); + explicit LockedAllocator(std::unique_ptr &&underlying_allocator); bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc deleted file mode 100644 index 3334589a4..000000000 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "paddle/fluid/memory/allocation/allocator_facade.h" - -#ifdef PADDLE_WITH_CUDA -DECLARE_double(fraction_of_gpu_memory_to_use); -DECLARE_double(fraction_of_cuda_pinned_memory_to_use); -DECLARE_int64(gpu_allocator_retry_time); -#endif - -DECLARE_string(allocator_strategy); - -namespace paddle { -namespace memory { -namespace allocation { - -TEST(allocator, allocator) { -#ifdef PADDLE_WITH_CUDA - FLAGS_fraction_of_gpu_memory_to_use = 0.01; - FLAGS_gpu_allocator_retry_time = 500; - FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5; -#endif - - FLAGS_allocator_strategy = "naive_best_fit"; - - auto &instance = AllocatorFacade::Instance(); - platform::Place place; - size_t size = 1024; - - { - place = platform::CPUPlace(); - size = 1024; - auto cpu_allocation = instance.Alloc(place, size); - ASSERT_NE(cpu_allocation, nullptr); - ASSERT_NE(cpu_allocation->ptr(), nullptr); - ASSERT_EQ(cpu_allocation->place(), place); - ASSERT_EQ(cpu_allocation->size(), size); - } - -#ifdef PADDLE_WITH_CUDA - { - place = platform::CUDAPlace(0); - size = 1024; - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), size); - } - - { - // Allocate 2GB gpu memory - place = platform::CUDAPlace(0); - size = 2 * static_cast(1 << 30); - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), size); - } - - { - place = platform::CUDAPinnedPlace(); - size = (1 << 20); - auto cuda_pinned_allocation = - instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20); - ASSERT_NE(cuda_pinned_allocation, nullptr); - ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr); - ASSERT_EQ(cuda_pinned_allocation->place(), place); - ASSERT_GE(cuda_pinned_allocation->size(), size); - } -#endif -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index 5a3d81721..de81d12cc 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -20,15 +20,20 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { +void CPUPinnedAllocator::Free(Allocation *allocation) { + PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); delete allocation; } Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { + // PADDLE_ENFORCE_EQ( + // attr, kCrossDevice, + // "CPUPinnedAllocator should be used for Cross-Device Communication"); + void *ptr; PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); - return new Allocation(ptr, size, platform::CUDAPinnedPlace()); + return new CPUPinnedAllocation(ptr, size); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index deeb55a8f..42d0938f2 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -20,12 +20,18 @@ namespace memory { namespace allocation { // Allocator uses `cudaHostAlloc` +class CPUPinnedAllocation : public Allocation { + public: + CPUPinnedAllocation(void *ptr, size_t size) + : Allocation(ptr, size, platform::CUDAPinnedPlace()) {} +}; + class CPUPinnedAllocator : public Allocator { public: bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; + void Free(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 7e888988f..981705051 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -18,15 +18,25 @@ namespace paddle { namespace memory { namespace allocation { -void RetryAllocator::FreeImpl(Allocation* allocation) { +bool RetryAllocator::IsAllocThreadSafe() const { + return underlying_allocator_->IsAllocThreadSafe(); +} + +void RetryAllocator::Free(Allocation* allocation) { // Delete underlying allocation first. - underlying_allocator_->Free(allocation); - cv_.notify_all(); + reinterpret_cast(allocation)->allocation_.reset(); + { + // notify all waited allocators, they can try to allocate memory after free. + std::lock_guard lock(mutex_); + cv_.notify_all(); + } + delete allocation; } Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { auto alloc_func = [&, this]() { - return underlying_allocator_->Allocate(size, attr).release(); + return new AllocationWithUnderlying( + underlying_allocator_->Allocate(size, attr)); }; // In fact, we can unify the code of allocation success and failure // But it would add lock even when allocation success at the first time diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 379f576d6..6ab8ca8fb 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -25,25 +25,32 @@ namespace paddle { namespace memory { namespace allocation { +class RetryAllocator; + class RetryAllocator : public Allocator { public: - RetryAllocator(std::shared_ptr allocator, size_t retry_ms) + RetryAllocator(std::unique_ptr&& allocator, size_t retry_ms) : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { + EnforceCheck(); + } + + bool IsAllocThreadSafe() const override; + + private: + void EnforceCheck() { PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_, - "UnderlyingAllocator of RetryAllocator must not be null"); + underlying_allocator_.get(), + "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), "UnderlyingAllocator of RetryAllocator must be thread-safe"); } - bool IsAllocThreadSafe() const override { return true; } - protected: - void FreeImpl(Allocation* allocation) override; + void Free(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::shared_ptr underlying_allocator_; + std::unique_ptr underlying_allocator_; std::chrono::milliseconds retry_time_; std::mutex mutex_; std::condition_variable cv_; @@ -51,6 +58,8 @@ class RetryAllocator : public Allocator { // For debug, We can add an atomic integer to record how many memory sizes are // waited to allocate // std::atomic waited_allocate_size_{0}; + + friend class RetryAllocation; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc index 39743bcb1..cb2df1a02 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ b/paddle/fluid/memory/allocation/zero_size_allocator.cc @@ -24,20 +24,11 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (size == 0) { - return new Allocation(nullptr, 0, place_); + return new ZeroSizeAllocation(place_); } else { return underlying_allocator_->Allocate(size, attr).release(); } } - -void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { - if (allocation->size() == 0) { - delete allocation; - } else { - underlying_allocator_->Free(allocation); - } -} - } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h index 08a7a06db..0f01dfcdf 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ b/paddle/fluid/memory/allocation/zero_size_allocator.h @@ -24,6 +24,12 @@ namespace allocation { // The allocator handles the request's size is zero. Allocator will always // return an allocation even the request size is zero. However, the // allocation.ptr() is nullptr +class ZeroSizeAllocation : public Allocation { + public: + explicit ZeroSizeAllocation(const platform::Place& p) + : Allocation(nullptr, 0, p) {} +}; + class ZeroSizeAllocator : public Allocator { public: ZeroSizeAllocator(std::shared_ptr underlying_allocator, @@ -34,7 +40,6 @@ class ZeroSizeAllocator : public Allocator { protected: Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; - void FreeImpl(Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc index ddde7baf4..250efe70f 100644 --- a/paddle/fluid/platform/temporary_allocator.cc +++ b/paddle/fluid/platform/temporary_allocator.cc @@ -30,31 +30,38 @@ namespace paddle { namespace platform { namespace alloc = memory::allocation; +TemporaryAllocation::TemporaryAllocation( + alloc::AllocationPtr &&underlying_allocation) + : Allocation(underlying_allocation->ptr(), underlying_allocation->size(), + underlying_allocation->place()), + underlying_allocation_(std::move(underlying_allocation)) {} + TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } void TemporaryAllocator::Release(const std::function &callback) { - std::unique_ptr> t_allocations; + std::unique_ptr> t_allocations; { std::unique_lock lock(mtx_); callback(); t_allocations.swap(temp_mem_map_); - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); wait_delete_mem_ = 0; } - alloc::AllocationDeleter deleter; for (auto tmp : *t_allocations) { VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() << " size: " << tmp.second->size(); - deleter(tmp.second); + delete tmp.second; } } -void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { +void TemporaryAllocator::Free(alloc::Allocation *allocation) { + auto *temp_allocation = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(temp_allocation); if (platform::is_gpu_place(temp_allocation->place())) { PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), "The place should be the same."); @@ -78,7 +85,7 @@ void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { } VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() << " size: " << temp_allocation->size(); - alloc::AllocationDeleter()(temp_allocation); + delete temp_allocation; } size_t TemporaryAllocator::TemporaryAllocationQueueSize() { @@ -113,9 +120,11 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( } // If not find the the available allocation, get allocation from // AllocatorFacadeInstance. - auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); + auto raw_allocation = + alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); + auto temp_mem = new TemporaryAllocation(std::move(raw_allocation)); VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; - return temp_mem.release(); + return temp_mem; } } // namespace platform diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index 912d45eaf..f8a43b889 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -23,6 +23,14 @@ namespace paddle { namespace platform { +class TemporaryAllocation : public memory::allocation::Allocation { + public: + explicit TemporaryAllocation( + memory::allocation::AllocationPtr &&underlying_allocation); + + memory::allocation::AllocationPtr underlying_allocation_; +}; + /*! \brief the TemporaryAllocator is used to alloc the temporary allocation * which used by CUDA's async operation. * @@ -49,7 +57,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { void SetCallback(const std::function &callback); protected: - void FreeImpl(memory::allocation::Allocation *allocation) override; + void Free(memory::allocation::Allocation *allocation) override; memory::allocation::Allocation *AllocateImpl( size_t size, memory::allocation::Allocator::Attr attr) override; @@ -58,8 +66,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { platform::Place place_; // When the allocation is not held by any variable, it should be placed // to temp_mem_map immediately. - std::unique_ptr> - temp_mem_map_{nullptr}; + std::unique_ptr> temp_mem_map_{ + nullptr}; std::mutex mtx_; size_t wait_delete_mem_{0}; std::function callback_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index dca40edf0..7bf089637 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -324,7 +324,6 @@ PYBIND11_MODULE(core, m) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) - .def("_clear", &Tensor::clear) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 66b768665..16bb3771f 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -105,12 +105,14 @@ void Printf(const char* fmt, const Args&... args) { Fprintf(std::cout, fmt, args...); } -inline std::string HumanReadableSize(double f_size) { +template +std::string HumanReadableSize(T size) { size_t i = 0; + double f_size = static_cast(size); double orig = f_size; const std::vector units( {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); - while (f_size >= 1024) { + while (f_size > 1024) { f_size /= 1024; i++; } -- GitLab