From c6189637cd7f680501edc0593bb3875700f3da05 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Thu, 23 May 2019 18:44:18 +0800 Subject: [PATCH] Fix allocator bug (#16712) * Revert "Revert "Fix allocator bug"" This reverts commit 174d0d0b90a610807d6f82927aad4def227ee643. * Revert "fix travis ci" This reverts commit 5656fa9f7ca278aff7319485c0d289a4ffc2f9d0. test=develop * add inlined_vector.h, test=develop * add inlined_vector_test,test=develop --- paddle/fluid/framework/CMakeLists.txt | 2 + paddle/fluid/framework/inlined_vector.h | 69 ++++++++++ paddle/fluid/framework/inlined_vector_test.cc | 82 ++++++++++++ paddle/fluid/framework/operator.h | 3 - paddle/fluid/memory/allocation/CMakeLists.txt | 24 ++-- .../memory/allocation/aligned_allocator.h | 2 + paddle/fluid/memory/allocation/allocator.cc | 17 +-- paddle/fluid/memory/allocation/allocator.h | 119 ++++++++++++++---- .../memory/allocation/allocator_facade.cc | 48 ++++--- .../memory/allocation/allocator_strategy.cc | 14 ++- .../memory/allocation/best_fit_allocator.cc | 2 +- .../memory/allocation/best_fit_allocator.h | 2 +- .../memory/allocation/buffered_allocator.cc | 22 ++-- .../memory/allocation/buffered_allocator.h | 6 +- .../allocation/buffered_allocator_test.cc | 3 +- .../fluid/memory/allocation/cpu_allocator.cc | 28 +++-- .../fluid/memory/allocation/cpu_allocator.h | 10 +- .../fluid/memory/allocation/cuda_allocator.cc | 10 +- .../fluid/memory/allocation/cuda_allocator.h | 9 +- .../memory/allocation/legacy_allocator.cc | 2 +- .../memory/allocation/legacy_allocator.h | 2 +- .../memory/allocation/locked_allocator.cc | 19 ++- .../memory/allocation/locked_allocator.h | 6 +- .../naive_best_fit_allocator_facade_test.cc | 91 ++++++++++++++ .../memory/allocation/pinned_allocator.cc | 9 +- .../memory/allocation/pinned_allocator.h | 8 +- .../memory/allocation/retry_allocator.cc | 18 +-- .../fluid/memory/allocation/retry_allocator.h | 23 ++-- .../memory/allocation/zero_size_allocator.cc | 11 +- .../memory/allocation/zero_size_allocator.h | 7 +- paddle/fluid/platform/temporary_allocator.cc | 28 ++--- paddle/fluid/platform/temporary_allocator.h | 14 +-- paddle/fluid/pybind/pybind.cc | 1 + paddle/fluid/string/printf.h | 6 +- 34 files changed, 484 insertions(+), 233 deletions(-) create mode 100644 paddle/fluid/framework/inlined_vector.h create mode 100644 paddle/fluid/framework/inlined_vector_test.cc create mode 100644 paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4e00630bb..c41efc5e0 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -225,6 +225,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) +cc_test(inlined_vector_test SRCS inlined_vector_test.cc) + if (NOT WIN32) cc_test(rw_lock_test SRCS rw_lock_test.cc) endif (NOT WIN32) diff --git a/paddle/fluid/framework/inlined_vector.h b/paddle/fluid/framework/inlined_vector.h new file mode 100644 index 000000000..2a7f26b9f --- /dev/null +++ b/paddle/fluid/framework/inlined_vector.h @@ -0,0 +1,69 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { + +template +class InlinedVector { + static_assert(N > 0, "N must be larger than 0"); + + public: + inline InlinedVector() { len_ = 0; } + + inline size_t size() const { return len_; } + + inline T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; } + + inline const T& operator[](size_t i) const { + return i < N ? head_[i] : tail_[i - N]; + } + + inline void emplace_back(const T& item) { + if (LIKELY(len_ < N)) { + head_[len_++] = item; + } else { + tail_.emplace_back(item); + ++len_; + } + } + + inline void pop_back() { + if (UNLIKELY(len_ > N)) { + tail_.pop_back(); + } + --len_; + } + + inline T& back() { + if (LIKELY(len_ <= N)) { + return head_[len_ - 1]; + } else { + return tail_.back(); + } + } + + private: + T head_[N]; + size_t len_; + std::vector tail_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/inlined_vector_test.cc b/paddle/fluid/framework/inlined_vector_test.cc new file mode 100644 index 000000000..003c0d7bb --- /dev/null +++ b/paddle/fluid/framework/inlined_vector_test.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/inlined_vector.h" +#include +#include +#include +#include +#include "gtest/gtest.h" + +namespace paddle { +namespace framework { + +template +static std::vector ToStdVector(const framework::InlinedVector &vec) { + std::vector std_vec; + std_vec.reserve(vec.size()); + for (size_t i = 0; i < vec.size(); ++i) { + std_vec.emplace_back(vec[i]); + } + return std_vec; +} + +template +void InlinedVectorCheck(size_t n) { + std::srand(std::time(nullptr)); + + std::vector std_vec; + framework::InlinedVector vec; + + for (size_t i = 0; i < n; ++i) { + int value = rand(); // NOLINT + + std_vec.emplace_back(value); + vec.emplace_back(value); + + CHECK_EQ(std_vec.size(), vec.size()); + CHECK_EQ(std_vec.back(), vec.back()); + + CHECK_EQ(vec.back(), value); + } + + bool is_equal = (std_vec == ToStdVector(vec)); + + CHECK_EQ(is_equal, true); + + for (size_t i = 0; i < n; ++i) { + CHECK_EQ(std_vec.size(), vec.size()); + CHECK_EQ(std_vec.back(), vec.back()); + std_vec.pop_back(); + vec.pop_back(); + CHECK_EQ(std_vec.size(), vec.size()); + } + + CHECK_EQ(std_vec.size(), static_cast(0)); + CHECK_EQ(vec.size(), static_cast(0)); +} + +TEST(inlined_vector, inlined_vector) { + for (size_t i = 0; i < 20; ++i) { + InlinedVectorCheck<1>(i); + InlinedVectorCheck<10>(i); + InlinedVectorCheck<15>(i); + InlinedVectorCheck<20>(i); + InlinedVectorCheck<21>(i); + InlinedVectorCheck<25>(i); + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 0c8ab533f..f216f3949 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -366,9 +366,6 @@ class ExecutionContext { auto shared_allocation = std::shared_ptr( allocation_ptr, deleter); - PADDLE_ENFORCE( - dynamic_cast(allocation_ptr) != nullptr, - "The AllocationPtr must be TemporaryAllocation."); PADDLE_ENFORCE_GE(allocation_ptr->size(), framework::product(dim) * sizeof(T)); diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 3dbbea3dd..4c4ae72ef 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) +cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -37,30 +38,19 @@ else () set(AllocatorFacadeDeps) endif() +list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) + cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) -cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) -cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) -cc_library(allocator_facade SRCS allocator_facade.cc DEPS - ${AllocatorFacadeDeps} - cpu_allocator - locked_allocator - best_fit_allocator - aligned_allocator - auto_increment_allocator - zero_size_allocator - conditional_allocator - retry_allocator - buffered_allocator - allocator_strategy - legacy_allocator - ) +cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) +cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) -cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) +cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade) +cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) if (WITH_TESTING) set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") endif() diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index 064acd06e..b536d4276 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -94,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator { underlying_allocator_->Allocate(size + kAlignment, attr); return new AlignedAllocation(std::move(raw_allocation), size); } + + void FreeImpl(Allocation* allocation) override { delete allocation; } }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 8fb8a5fb8..dc0f34fec 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -19,24 +19,11 @@ namespace paddle { namespace memory { namespace allocation { -Allocation::~Allocation() {} - -Allocator::~Allocator() {} bool Allocator::IsAllocThreadSafe() const { return false; } -AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { - auto ptr = AllocateImpl(size, attr); - ptr->set_allocator(this); - return AllocationPtr(ptr); -} - -void Allocator::Free(Allocation* allocation) { delete allocation; } - -const char* BadAlloc::what() const noexcept { return msg_.c_str(); } - -void AllocationDeleter::operator()(Allocation* allocation) const { - auto* allocator = allocation->allocator(); +void Allocator::FreeImpl(Allocation* allocation) { + Allocator* allocator = allocation->TopDecoratedAllocator(); allocator->Free(allocation); } diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 346527893..5acdd9d0f 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -15,8 +15,10 @@ #pragma once #include #include +#include #include #include +#include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/platform/place.h" namespace paddle { @@ -26,40 +28,73 @@ namespace allocation { // Exception when `Alloc`/`AllocShared` failed class BadAlloc : public std::exception { public: - explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {} - const char* what() const noexcept override; + inline explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {} + + inline const char* what() const noexcept override { return msg_.c_str(); } private: std::string msg_; }; -class Allocation; -class AllocationDeleter { - public: - void operator()(Allocation* allocation) const; -}; - class Allocator; + // Allocation is the object holding the actually pointer. Use // `Allocation::ptr()` will returns the pointer that allocated. // // NOTE: this is the base class of Allocation. Each allocator can use its own // allocation object. // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 + +/** + * Allocation is returned by Allocator::Allocate() method. + * + * An allocator may be decorated by another allocator. For example, we can + * decorate a RetryAllocator to any allocator to perform allocation retry when + * first allocation request fails. + * + * Explanations of Allocator design is as follows: + * + * Suppose we have an allocator which is decorated by several allocators: + * + * A(1) <- A(2) <- A(3) <- ... <- A(n) + * + * , and the public allocator is A(1). + * + * The allocation process would be: + * + * A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate() + * + * , and the free process would be: + * + * A(1).Free() -> A(2).Free() -> ... -> A(n).Free() + * + * Therefore, we should record the allocator chain when allocating, so + * that we can free the allocation in the reverse order of allocator chain. + * The field `decorated_allocators_` is used to record this chain. + * + * Another example is that we want to add additional fields in Allocation, + * e.g., something what is done in AlignedAllocator, etc. + * In this case, we should declare a derived class of Allocation, which + * contains an underlying Allocation allocated by the underlying allocator. + * Therefore, `decorated_allocators_` of the new Allocation object would + * be a new chain, differing from the underlying Allocation object. + */ class Allocation { public: - Allocation(void* ptr, size_t size, platform::Place place) - : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} + inline Allocation(void* ptr, size_t size, platform::Place place) + : ptr_(ptr), size_(size), place_(place) {} Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; + Allocation(Allocation&& o) = delete; + Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method // as a virtual method. If we want to implement a `defragmentation` later, // we might need to make `ptr_` field as a protected field, and add a virtual // method like `defragmentation` to change `ptr_`. - void* ptr() const { return ptr_; } + inline void* ptr() const { return ptr_; } // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the // last valid element. @@ -70,24 +105,38 @@ class Allocation { // The raw pointer might not aligned, so an offset might be added to raw // the pointer. The size of this allocation will be // `size + kAlignemnt - offset`. - size_t size() const { return size_; } + inline size_t size() const { return size_; } + + inline const platform::Place& place() const { return place_; } - const platform::Place& place() const { return place_; } + virtual ~Allocation() {} - Allocator* allocator() { return allocator_; } + private: + inline void RegisterDecoratedAllocator(Allocator* allocator) { + decorated_allocators_.emplace_back(allocator); + } - void set_allocator(Allocator* allocator) { allocator_ = allocator; } + inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } - virtual ~Allocation(); + inline Allocator* TopDecoratedAllocator() { + return decorated_allocators_.back(); + } private: - Allocator* allocator_; void* ptr_; size_t size_; platform::Place place_; -}; -using AllocationPtr = std::unique_ptr; + // NOTE(zjl): Since decorated_allocators_ is usually a small vector + // We reserve a small buffer to it to prevent frequent heap allocation + static constexpr size_t kReserveAllocatorNum = 8; + using DecoratedAllocatorStack = + framework::InlinedVector; + + DecoratedAllocatorStack decorated_allocators_; + + friend class Allocator; +}; // Base interface class of memory Allocator. // To allocate a memory, allocator needs two parameters: @@ -126,22 +175,42 @@ class Allocator { NumOfAttrs = 5 // The number of all attributes. It is used internally. }; - virtual ~Allocator(); + virtual ~Allocator() {} + + class AllocationDeleter { + public: + inline void operator()(Allocation* allocation) const { + Allocator* allocator = allocation->TopDecoratedAllocator(); + allocator->Free(allocation); + } + }; + + using AllocationPtr = std::unique_ptr; // Allocate an allocation. - AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault); + inline AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault) { + auto ptr = AllocateImpl(size, attr); + ptr->RegisterDecoratedAllocator(this); + return AllocationPtr(ptr); + } + + // This function should not be called outside Allocator class + inline void Free(Allocation* allocation) { + allocation->PopDecoratedAllocator(); + FreeImpl(allocation); + } // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; protected: - virtual void Free(Allocation* allocation); virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; - - private: - friend class AllocationDeleter; + virtual void FreeImpl(Allocation* allocation); }; +using AllocationDeleter = Allocator::AllocationDeleter; +using AllocationPtr = Allocator::AllocationPtr; + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index a3b73e3ba..09328aded 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -49,6 +49,17 @@ namespace paddle { namespace memory { namespace allocation { +static inline std::shared_ptr WrapRetryAllocator( + std::shared_ptr allocator, int64_t retry_time) { + if (retry_time > 0) { + auto* retry_allocator = + new RetryAllocator(std::move(allocator), retry_time); + allocator.reset(retry_allocator); + } + + return allocator; +} + // TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public Allocator { public: @@ -112,14 +123,10 @@ class ChunkedAllocator : public Allocator { std::shared_ptr CreateAllocatorWithChunk() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); - std::unique_ptr allocator(new LockedAllocator( - std::unique_ptr(new BestFitAllocator(allocation)))); + std::shared_ptr allocator(new LockedAllocator( + std::shared_ptr(new BestFitAllocator(allocation)))); - if (retry_time_ > 0) { - auto* retry_allocator = - new RetryAllocator(std::move(allocator), retry_time_); - allocator.reset(retry_allocator); - } + allocator = WrapRetryAllocator(allocator, retry_time_); return std::make_shared>(std::move(allocator)); } @@ -190,13 +197,23 @@ class AllocatorFacadePrivate { ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { - if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { - InitLegacyAllocator(); - } else { - InitCPUAllocator(); - InitCUDAAllocator(); - InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); + auto strategy = GetAllocatorStrategy(); + switch (strategy) { + case AllocatorStrategy::kLegacy: { + InitLegacyAllocator(); + break; + } + case AllocatorStrategy::kNaiveBestFit: { + InitCPUAllocator(); + InitCUDAAllocator(); + InitCUDAPinnedAllocator(); + WrapZeroSizeAllocator(); + break; + } + default: { + PADDLE_THROW("Unsupported allocator strategy: %d", + static_cast(strategy)); + } } } @@ -254,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() { std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, Allocator::Attr attr) { - return std::shared_ptr(Alloc(place, size, attr).release(), - AllocationDeleter()); + return std::shared_ptr(Alloc(place, size, attr)); } AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index 8cebda900..fff94c01e 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -19,16 +19,22 @@ DEFINE_string( allocator_strategy, "legacy", "The allocation strategy. Legacy means the original allocator of Fluid." - "New means the experimental allocators of Fluid. in [legacy, new]"); + "naive_best_fit means the experimental best fit allocator. " + "allocator. Enum in [legacy, naive_best_fit]."); namespace paddle { namespace memory { namespace allocation { static AllocatorStrategy GetStrategyFromFlag() { - return FLAGS_allocator_strategy == "legacy" - ? AllocatorStrategy::kLegacy - : AllocatorStrategy::kNaiveBestFit; + if (FLAGS_allocator_strategy == "legacy") { + return AllocatorStrategy::kLegacy; + } else if (FLAGS_allocator_strategy == "naive_best_fit") { + return AllocatorStrategy::kNaiveBestFit; + } else { + PADDLE_THROW("Unsupported allocator strategy: %s", + FLAGS_allocator_strategy); + } } AllocatorStrategy GetAllocatorStrategy() { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index e3d6c2f51..d87dd9a4b 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::Free(Allocation* allocation) { +void BestFitAllocator::FreeImpl(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(bf_allocation, "The input allocation is not BestFitAllocation."); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 4f10f2b53..c137438c0 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index fc75abc9d..e04c0aa34 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,11 +22,11 @@ namespace paddle { namespace memory { namespace allocation { -BufferedAllocator::BufferedAllocator(std::unique_ptr &&allocator) +BufferedAllocator::BufferedAllocator(std::shared_ptr allocator) : underlying_allocator_(std::move(allocator)) { PADDLE_ENFORCE_NOT_NULL( underlying_allocator_, - "Underlying allocator of BufferedAllocator must be unmanaged"); + "Underlying allocator of BufferedAllocator must not be null"); if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } @@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { while (!allocations_.empty()) { // free the largest auto it = --allocations_.end(); cur += it->second->size(); - delete it->second.release(); + underlying_allocator_->Free(it->second.release()); allocations_.erase(it); if (cur >= size) return; } } -bool BufferedAllocator::IsAllocThreadSafe() const { - return this->underlying_allocator_->IsAllocThreadSafe(); -} -void BufferedAllocator::Free(Allocation *allocation) { +bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } + +void BufferedAllocator::FreeImpl(Allocation *allocation) { platform::LockGuardPtr guard(mtx_); allocations_.emplace(allocation->size(), AllocationPtr(allocation)); } + Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { { platform::LockGuardPtr guard(mtx_); @@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (it != allocations_.end() && it->first < size * 2) { AllocationPtr result(std::move(it->second)); allocations_.erase(it); - return new AllocationWithUnderlying(std::move(result)); + return result.release(); } } try { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } catch (BadAlloc &) { FreeCache(size); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index d44a3f85b..c72839570 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -31,7 +31,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::unique_ptr &&allocator); + explicit BufferedAllocator(std::shared_ptr allocator); ~BufferedAllocator(); @@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::multimap allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index c8bd5292c..854a117b0 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/memory/allocation/buffered_allocator.h" #include -#include #include #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" @@ -66,7 +65,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void Free(Allocation *allocation) override { + void FreeImpl(Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(alloc); if (alloc->ptr()) delete[] static_cast(alloc->ptr()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index cc81a6f7b..90c49c87a 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -20,25 +20,27 @@ namespace paddle { namespace memory { namespace allocation { -CPUAllocation::CPUAllocation(void *ptr, size_t size) - : Allocation(ptr, size, platform::CPUPlace()) {} - bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::Free(Allocation *allocation) { - PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); - free(allocation->ptr()); +void CPUAllocator::FreeImpl(Allocation *allocation) { + void *p = allocation->ptr(); +#ifdef _WIN32 + _aligned_free(p); +#else + free(p); +#endif delete allocation; } Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - void *ptr; - auto status = posix_memalign(&ptr, kAlignment, size); - if (UNLIKELY(status) != 0) { - throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", - size, status)); - } - return new CPUAllocation(ptr, size); + void *p; +#ifdef _WIN32 + p = _aligned_malloc(size, kAlignment); +#else + PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", + size); +#endif + return new Allocation(p, size, platform::CPUPlace()); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 26d3643f4..3eb1416b0 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -31,19 +31,13 @@ namespace allocation { // // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // an open-sourced allocator into Paddle. -class CPUAllocator; -class CPUAllocation : public Allocation { - public: - CPUAllocation(void* ptr, size_t size); -}; - class CPUAllocator : public Allocator { public: - constexpr static size_t kAlignment = 64u; + constexpr static size_t kAlignment = 4096UL; bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 430bf0be9..895a24a6a 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -23,15 +23,14 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::Free(Allocation* allocation) { +void CUDAAllocator::FreeImpl(Allocation* allocation) { platform::CUDADeviceGuard guard(place_.device); - auto* cuda_allocation = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(cuda_allocation); - PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), + PADDLE_ENFORCE_EQ(boost::get(allocation->place()), place_); PADDLE_ENFORCE(cudaFree(allocation->ptr())); delete allocation; } + Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::CUDADeviceGuard guard(place_.device); void* ptr; @@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, status, cudaGetErrorString(status))); } - return new CUDAAllocation(ptr, size, platform::Place(place_)); + return new Allocation(ptr, size, platform::Place(place_)); } + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 63726f582..580a2d1df 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -20,13 +20,6 @@ namespace paddle { namespace memory { namespace allocation { -// CUDA System allocator and allocation. -// Just a flag type. -class CUDAAllocation : public Allocation { - public: - using Allocation::Allocation; -}; - class CUDAAllocator : public Allocator { public: explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} @@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 2ecb44ff1..0f4a55ced 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -347,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { return tmp_alloc; } -void LegacyAllocator::Free(Allocation *allocation) { +void LegacyAllocator::FreeImpl(Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index d9bdae153..27cd42ea3 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { protected: Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index 62d768c58..c43099cc8 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -17,6 +17,7 @@ #include #include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/platform/lock_guard_ptr.h" + namespace paddle { namespace memory { namespace allocation { @@ -24,26 +25,24 @@ namespace allocation { bool LockedAllocator::IsAllocThreadSafe() const { return true; } LockedAllocator::LockedAllocator( - std::unique_ptr &&underlying_allocator) + std::shared_ptr underlying_allocator) : underlying_allocator_(std::move(underlying_allocator)) { PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); if (!underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } } -void LockedAllocator::Free(Allocation *allocation) { - { - platform::LockGuardPtr guard(mtx_); - reinterpret_cast(allocation) - ->allocation_.reset(); // Destroy inner allocation - } - delete allocation; + +void LockedAllocator::FreeImpl(Allocation *allocation) { + platform::LockGuardPtr guard(mtx_); + underlying_allocator_->Free(allocation); } + Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { platform::LockGuardPtr guard(mtx_); - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); } + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index 4967b9bb8..b735ccef1 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -24,15 +24,15 @@ namespace allocation { // A allocator to make underlying allocator thread safe. class LockedAllocator : public Allocator { public: - explicit LockedAllocator(std::unique_ptr &&underlying_allocator); + explicit LockedAllocator(std::shared_ptr underlying_allocator); bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc new file mode 100644 index 000000000..3334589a4 --- /dev/null +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/memory/allocation/allocator_facade.h" + +#ifdef PADDLE_WITH_CUDA +DECLARE_double(fraction_of_gpu_memory_to_use); +DECLARE_double(fraction_of_cuda_pinned_memory_to_use); +DECLARE_int64(gpu_allocator_retry_time); +#endif + +DECLARE_string(allocator_strategy); + +namespace paddle { +namespace memory { +namespace allocation { + +TEST(allocator, allocator) { +#ifdef PADDLE_WITH_CUDA + FLAGS_fraction_of_gpu_memory_to_use = 0.01; + FLAGS_gpu_allocator_retry_time = 500; + FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5; +#endif + + FLAGS_allocator_strategy = "naive_best_fit"; + + auto &instance = AllocatorFacade::Instance(); + platform::Place place; + size_t size = 1024; + + { + place = platform::CPUPlace(); + size = 1024; + auto cpu_allocation = instance.Alloc(place, size); + ASSERT_NE(cpu_allocation, nullptr); + ASSERT_NE(cpu_allocation->ptr(), nullptr); + ASSERT_EQ(cpu_allocation->place(), place); + ASSERT_EQ(cpu_allocation->size(), size); + } + +#ifdef PADDLE_WITH_CUDA + { + place = platform::CUDAPlace(0); + size = 1024; + auto gpu_allocation = instance.Alloc(place, size); + ASSERT_NE(gpu_allocation, nullptr); + ASSERT_NE(gpu_allocation->ptr(), nullptr); + ASSERT_EQ(gpu_allocation->place(), place); + ASSERT_GE(gpu_allocation->size(), size); + } + + { + // Allocate 2GB gpu memory + place = platform::CUDAPlace(0); + size = 2 * static_cast(1 << 30); + auto gpu_allocation = instance.Alloc(place, size); + ASSERT_NE(gpu_allocation, nullptr); + ASSERT_NE(gpu_allocation->ptr(), nullptr); + ASSERT_EQ(gpu_allocation->place(), place); + ASSERT_GE(gpu_allocation->size(), size); + } + + { + place = platform::CUDAPinnedPlace(); + size = (1 << 20); + auto cuda_pinned_allocation = + instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20); + ASSERT_NE(cuda_pinned_allocation, nullptr); + ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr); + ASSERT_EQ(cuda_pinned_allocation->place(), place); + ASSERT_GE(cuda_pinned_allocation->size(), size); + } +#endif +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index de81d12cc..5a3d81721 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -20,20 +20,15 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::Free(Allocation *allocation) { - PADDLE_ENFORCE_NOT_NULL(dynamic_cast(allocation)); +void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); delete allocation; } Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - // PADDLE_ENFORCE_EQ( - // attr, kCrossDevice, - // "CPUPinnedAllocator should be used for Cross-Device Communication"); - void *ptr; PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); - return new CPUPinnedAllocation(ptr, size); + return new Allocation(ptr, size, platform::CUDAPinnedPlace()); } } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index 42d0938f2..deeb55a8f 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -20,18 +20,12 @@ namespace memory { namespace allocation { // Allocator uses `cudaHostAlloc` -class CPUPinnedAllocation : public Allocation { - public: - CPUPinnedAllocation(void *ptr, size_t size) - : Allocation(ptr, size, platform::CUDAPinnedPlace()) {} -}; - class CPUPinnedAllocator : public Allocator { public: bool IsAllocThreadSafe() const override; protected: - void Free(Allocation *allocation) override; + void FreeImpl(Allocation *allocation) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 981705051..7e888988f 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -18,25 +18,15 @@ namespace paddle { namespace memory { namespace allocation { -bool RetryAllocator::IsAllocThreadSafe() const { - return underlying_allocator_->IsAllocThreadSafe(); -} - -void RetryAllocator::Free(Allocation* allocation) { +void RetryAllocator::FreeImpl(Allocation* allocation) { // Delete underlying allocation first. - reinterpret_cast(allocation)->allocation_.reset(); - { - // notify all waited allocators, they can try to allocate memory after free. - std::lock_guard lock(mutex_); - cv_.notify_all(); - } - delete allocation; + underlying_allocator_->Free(allocation); + cv_.notify_all(); } Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { auto alloc_func = [&, this]() { - return new AllocationWithUnderlying( - underlying_allocator_->Allocate(size, attr)); + return underlying_allocator_->Allocate(size, attr).release(); }; // In fact, we can unify the code of allocation success and failure // But it would add lock even when allocation success at the first time diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 6ab8ca8fb..379f576d6 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -25,32 +25,25 @@ namespace paddle { namespace memory { namespace allocation { -class RetryAllocator; - class RetryAllocator : public Allocator { public: - RetryAllocator(std::unique_ptr&& allocator, size_t retry_ms) + RetryAllocator(std::shared_ptr allocator, size_t retry_ms) : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { - EnforceCheck(); - } - - bool IsAllocThreadSafe() const override; - - private: - void EnforceCheck() { PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_.get(), - "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); + underlying_allocator_, + "UnderlyingAllocator of RetryAllocator must not be null"); PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), "UnderlyingAllocator of RetryAllocator must be thread-safe"); } + bool IsAllocThreadSafe() const override { return true; } + protected: - void Free(Allocation* allocation) override; + void FreeImpl(Allocation* allocation) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; private: - std::unique_ptr underlying_allocator_; + std::shared_ptr underlying_allocator_; std::chrono::milliseconds retry_time_; std::mutex mutex_; std::condition_variable cv_; @@ -58,8 +51,6 @@ class RetryAllocator : public Allocator { // For debug, We can add an atomic integer to record how many memory sizes are // waited to allocate // std::atomic waited_allocate_size_{0}; - - friend class RetryAllocation; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc index cb2df1a02..39743bcb1 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ b/paddle/fluid/memory/allocation/zero_size_allocator.cc @@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { if (size == 0) { - return new ZeroSizeAllocation(place_); + return new Allocation(nullptr, 0, place_); } else { return underlying_allocator_->Allocate(size, attr).release(); } } + +void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { + if (allocation->size() == 0) { + delete allocation; + } else { + underlying_allocator_->Free(allocation); + } +} + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h index 0f01dfcdf..08a7a06db 100644 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ b/paddle/fluid/memory/allocation/zero_size_allocator.h @@ -24,12 +24,6 @@ namespace allocation { // The allocator handles the request's size is zero. Allocator will always // return an allocation even the request size is zero. However, the // allocation.ptr() is nullptr -class ZeroSizeAllocation : public Allocation { - public: - explicit ZeroSizeAllocation(const platform::Place& p) - : Allocation(nullptr, 0, p) {} -}; - class ZeroSizeAllocator : public Allocator { public: ZeroSizeAllocator(std::shared_ptr underlying_allocator, @@ -40,6 +34,7 @@ class ZeroSizeAllocator : public Allocator { protected: Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; + void FreeImpl(Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/platform/temporary_allocator.cc b/paddle/fluid/platform/temporary_allocator.cc index 03b581484..fe2f528be 100644 --- a/paddle/fluid/platform/temporary_allocator.cc +++ b/paddle/fluid/platform/temporary_allocator.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/platform/temporary_allocator.h" #include -#include #include "paddle/fluid/memory/allocation/allocator_facade.h" DEFINE_int64(limit_of_tmp_allocation, -1, @@ -31,38 +30,31 @@ namespace paddle { namespace platform { namespace alloc = memory::allocation; -TemporaryAllocation::TemporaryAllocation( - alloc::AllocationPtr &&underlying_allocation) - : Allocation(underlying_allocation->ptr(), underlying_allocation->size(), - underlying_allocation->place()), - underlying_allocation_(std::move(underlying_allocation)) {} - TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } void TemporaryAllocator::Release(const std::function &callback) { - std::unique_ptr> t_allocations; + std::unique_ptr> t_allocations; { std::unique_lock lock(mtx_); callback(); t_allocations.swap(temp_mem_map_); - temp_mem_map_.reset(new std::multimap()); + temp_mem_map_.reset(new std::multimap()); wait_delete_mem_ = 0; } + alloc::AllocationDeleter deleter; for (auto tmp : *t_allocations) { VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() << " size: " << tmp.second->size(); - delete tmp.second; + deleter(tmp.second); } } -void TemporaryAllocator::Free(alloc::Allocation *allocation) { - auto *temp_allocation = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(temp_allocation); +void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { if (platform::is_gpu_place(temp_allocation->place())) { PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), "The place should be the same."); @@ -86,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { } VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() << " size: " << temp_allocation->size(); - delete temp_allocation; + alloc::AllocationDeleter()(temp_allocation); } size_t TemporaryAllocator::TemporaryAllocationQueueSize() { @@ -121,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( } // If not find the the available allocation, get allocation from // AllocatorFacadeInstance. - auto raw_allocation = - alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); - auto temp_mem = new TemporaryAllocation(std::move(raw_allocation)); + auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; - return temp_mem; + return temp_mem.release(); } } // namespace platform diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index f8a43b889..912d45eaf 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -23,14 +23,6 @@ namespace paddle { namespace platform { -class TemporaryAllocation : public memory::allocation::Allocation { - public: - explicit TemporaryAllocation( - memory::allocation::AllocationPtr &&underlying_allocation); - - memory::allocation::AllocationPtr underlying_allocation_; -}; - /*! \brief the TemporaryAllocator is used to alloc the temporary allocation * which used by CUDA's async operation. * @@ -57,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { void SetCallback(const std::function &callback); protected: - void Free(memory::allocation::Allocation *allocation) override; + void FreeImpl(memory::allocation::Allocation *allocation) override; memory::allocation::Allocation *AllocateImpl( size_t size, memory::allocation::Allocator::Attr attr) override; @@ -66,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { platform::Place place_; // When the allocation is not held by any variable, it should be placed // to temp_mem_map immediately. - std::unique_ptr> temp_mem_map_{ - nullptr}; + std::unique_ptr> + temp_mem_map_{nullptr}; std::mutex mtx_; size_t wait_delete_mem_{0}; std::function callback_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index f3f5f7cc7..e0eaefad6 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -357,6 +357,7 @@ PYBIND11_MODULE(core, m) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) + .def("_clear", &Tensor::clear) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 16bb3771f..66b768665 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) { Fprintf(std::cout, fmt, args...); } -template -std::string HumanReadableSize(T size) { +inline std::string HumanReadableSize(double f_size) { size_t i = 0; - double f_size = static_cast(size); double orig = f_size; const std::vector units( {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); - while (f_size > 1024) { + while (f_size >= 1024) { f_size /= 1024; i++; } -- GitLab