diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index 89ce628c5d51bda7e819a3a8e9ebdb3822a26f53..ca67765044c8819911756a11b37751e84e910e3c 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -22,41 +22,6 @@ namespace memory { namespace allocation { BufferedAllocator::BufferedAllocator(std::unique_ptr&& allocator) { - std::vector division_plan(8 * sizeof(size_t)); - for (size_t i = 0; i < 8 * sizeof(size_t); ++i) { - division_plan[i] = (static_cast(1) << i); - } - InitAndEnforceCheck(std::move(allocator), division_plan); -} - -BufferedAllocator::BufferedAllocator(std::unique_ptr&& allocator, - const std::vector& division_plan) { - InitAndEnforceCheck(std::move(allocator), division_plan); -} - -BufferedAllocator::~BufferedAllocator() { FlushImpl(); } - -void BufferedAllocator::FlushImpl() { - for (auto& v : allocations_) { - for (auto& pair : v) { - underlying_allocator_->FreeUniquePtr(std::move(pair.second)); - } - v.clear(); - } -} - -void BufferedAllocator::Flush() { - if (mtx_) { - std::lock_guard lock(*mtx_); - FlushImpl(); - } else { - FlushImpl(); - } -} - -void BufferedAllocator::InitAndEnforceCheck( - std::unique_ptr&& allocator, - const std::vector& division_plan) { underlying_allocator_.reset( dynamic_cast(allocator.release())); PADDLE_ENFORCE_NOT_NULL( @@ -65,141 +30,54 @@ void BufferedAllocator::InitAndEnforceCheck( if (underlying_allocator_->IsAllocThreadSafe()) { mtx_.reset(new std::mutex()); } - constexpr size_t kMax = std::numeric_limits::max(); - if (division_plan.empty()) { - division_plan_.assign({0, kMax}); - } else { - auto from = division_plan.front() == 0 ? division_plan.begin() + 1 - : division_plan.begin(); - auto to = division_plan.back() == kMax ? division_plan.end() - 1 - : division_plan.end(); - division_plan_.reserve(to - from + 2); - division_plan_.push_back(0); - division_plan_.insert(division_plan_.end(), from, to); - division_plan_.push_back(kMax); - for (size_t i = 1; i < division_plan_.size(); ++i) { - PADDLE_ENFORCE_LT(division_plan_[i - 1], division_plan_[i], - "Division plan must be strictly sorted"); - } - } - allocations_.resize(division_plan_.size() - 1); -} - -void BufferedAllocator::InsertAllocationImpl( - std::unique_ptr&& allocation) { - auto size = allocation->size(); - auto idx = GetListIndex(size); - allocations_[idx].emplace(size, std::move(allocation)); -} - -void BufferedAllocator::InsertAllocation( - std::unique_ptr&& allocation) { - if (mtx_) { - std::lock_guard lock(*mtx_); - InsertAllocationImpl(std::move(allocation)); - } else { - InsertAllocationImpl(std::move(allocation)); - } } -bool BufferedAllocator::Match(size_t actual_size, size_t requested_size) { - return (actual_size >> 1) < requested_size; -} - -size_t BufferedAllocator::GetListIndex(size_t size) { - auto it = - std::upper_bound(division_plan_.begin(), division_plan_.end(), size); - return static_cast(it - division_plan_.begin()) - 1; -} +BufferedAllocator::~BufferedAllocator() { FreeCache(-1UL); } -std::unique_ptr BufferedAllocator::RemoveAllocationImpl( - size_t size) { - auto idx = GetListIndex(size); - auto& allocation_map = allocations_[idx]; - auto it = allocation_map.lower_bound(size); - // Only remove allocation whose size is not more than twice of requested size - if (it != allocation_map.end()) { - if (Match(it->second->size(), size)) { - auto ret = std::move(it->second); - allocation_map.erase(it); - return ret; - } else { - return nullptr; - } - } else { - while (++idx < allocations_.size() && Match(division_plan_[idx], size)) { - auto& allocation_map = allocations_[idx]; - if (!allocation_map.empty()) { - auto it = allocation_map.begin(); - if (Match(it->second->size(), size)) { - auto ret = std::move(it->second); - allocation_map.erase(it); - return ret; - } else { - return nullptr; - } - } +std::unique_ptr BufferedAllocator::Allocate(size_t size, + Allocator::Attr attr) { + std::unique_ptr result; + { + platform::LockGuardPtr guard(mtx_); + auto it = allocations_.lower_bound(size); + if (it != allocations_.end() && it->first < size * 2) { + result = std::move(it->second); + allocations_.erase(it); } - return nullptr; } -} -std::unique_ptr BufferedAllocator::RemoveAllocation(size_t size) { - if (mtx_) { - std::lock_guard lock(*mtx_); - return RemoveAllocationImpl(size); - } else { - return RemoveAllocationImpl(size); + if (result) { + return result; } -} -std::unique_ptr BufferedAllocator::Allocate(size_t size, - Allocator::Attr attr) { - auto ret = RemoveAllocation(size); - if (!ret) { - try { - return underlying_allocator_->Allocate(size, attr); - } catch (BadAlloc&) { - // if allocation failed, try to free some memorys from buffers - FreeAllocations(size); - return underlying_allocator_->Allocate(size, attr); - } + try { + return underlying_allocator_->Allocate(size, attr); + } catch (BadAlloc&) { + FreeCache(size); + return underlying_allocator_->Allocate(size, attr); } - return ret; } -void BufferedAllocator::FreeAllocationsImpl(size_t size) { +void BufferedAllocator::FreeCache(size_t size) { + platform::LockGuardPtr guard(mtx_); if (UNLIKELY(size == 0)) return; size_t cur = 0; - for (auto& alloc_map : allocations_) { - // use reverse iterator to free large allocations first - while (!alloc_map.empty()) { - auto it = --(alloc_map.end()); - cur += it->second->size(); - underlying_allocator_->FreeUniquePtr(std::move(it->second)); - alloc_map.erase(it); - if (cur >= size) return; - } - } -} - -void BufferedAllocator::FreeAllocations(size_t size) { - if (mtx_) { - std::lock_guard lock(*mtx_); - FreeAllocationsImpl(size); - } else { - FreeAllocationsImpl(size); + while (!allocations_.empty()) { // free the largest + auto it = --allocations_.end(); + cur += it->second->size(); + underlying_allocator_->FreeUniquePtr(std::move(it->second)); + allocations_.erase(it); + if (cur >= size) return; } } void BufferedAllocator::FreeUniquePtr(std::unique_ptr allocation) { - InsertAllocation(std::move(allocation)); + platform::LockGuardPtr guard(mtx_); + allocations_.emplace(allocation->size(), std::move(allocation)); } -bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } - -const std::vector& BufferedAllocator::GetDivisionPlan() const { - return division_plan_; +bool BufferedAllocator::IsAllocThreadSafe() const { + return this->underlying_allocator_->IsAllocThreadSafe(); } } // namespace allocation diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index 0fe6e5a19a84995a9d143f4c3803ff54b77a1f92..1284661df1aba9ba756799372a6cb8cca1e2bc8a 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -19,6 +19,7 @@ #include #include #include "paddle/fluid/memory/allocation/allocator.h" +#include "paddle/fluid/platform/lock_guard_ptr.h" namespace paddle { namespace memory { @@ -32,9 +33,6 @@ class BufferedAllocator : public UnmanagedAllocator { public: explicit BufferedAllocator(std::unique_ptr&& allocator); - BufferedAllocator(std::unique_ptr&& allocator, - const std::vector& division_plan); - ~BufferedAllocator(); std::unique_ptr Allocate( @@ -44,31 +42,14 @@ class BufferedAllocator : public UnmanagedAllocator { bool IsAllocThreadSafe() const override; - const std::vector& GetDivisionPlan() const; - - void Flush(); + // only used in unittest + inline void ClearCache() { FreeCache(-1UL); } private: - void InitAndEnforceCheck(std::unique_ptr&& allocator, - const std::vector& division_plan); - - void InsertAllocation(std::unique_ptr&& allocation); - void InsertAllocationImpl(std::unique_ptr&& allocation); - - static bool Match(size_t actual_size, size_t requested_size); - std::unique_ptr RemoveAllocation(size_t size); - std::unique_ptr RemoveAllocationImpl(size_t size); - - void FreeAllocations(size_t size); - void FreeAllocationsImpl(size_t size); - - void FlushImpl(); - - size_t GetListIndex(size_t size); + void FreeCache(size_t size); std::unique_ptr underlying_allocator_; - std::vector>> allocations_; - std::vector division_plan_; + std::multimap> allocations_; std::unique_ptr mtx_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index a9fb4f3926c7b9fab50abb9c6b9f25ddd44b093f..9445d305ce108ca4b5dc0239c0c1681917f51e72 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -124,7 +124,7 @@ TEST(buffered_allocator, lazy_free) { { underlying_allocator->ResetCounter(); - allocator->Flush(); + allocator->ClearCache(); ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo); } diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 75686df4341aafc238b619c4d859ee001a6218d9..20f3bfbd3e8449d759360b1cc6c64ae3a141df2d 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -30,9 +30,10 @@ DEFINE_bool(init_allocated_mem, false, "during unit testing."); DECLARE_double(fraction_of_gpu_memory_to_use); -DEFINE_bool(use_legacy_allocator, true, - "Whether to use the legacy allocator. If the new allocators have" - "been well tested, we should remove these flag."); +DEFINE_string( + allocator_strategy, "legacy", + "The allocation strategy. Legacy means the original allocator of Fluid." + "New means the experimental allocators of Fluid. in [legacy, new]"); namespace paddle { namespace memory { @@ -274,15 +275,11 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const { #endif } -size_t memory_usage(const platform::Place& p) { - return boost::apply_visitor(Usage(), p); -} - class LegacyAllocation : public Allocation { public: using Allocation::Allocation; - ~LegacyAllocation() { + ~LegacyAllocation() final { boost::apply_visitor(FreeVisitor(this->ptr()), this->place()); } }; @@ -291,7 +288,7 @@ class LegacyAllocation : public Allocation { std::shared_ptr AllocShared(const platform::Place& place, size_t size, Allocator::Attr attr) { - if (FLAGS_use_legacy_allocator) { + if (FLAGS_allocator_strategy == "legacy") { void* p = boost::apply_visitor(legacy::AllocVisitor(size), place); return std::shared_ptr( new legacy::LegacyAllocation(p, size, place)); @@ -303,7 +300,7 @@ std::shared_ptr AllocShared(const platform::Place& place, std::unique_ptr Alloc(const platform::Place& place, size_t size, Allocator::Attr attr) { - if (FLAGS_use_legacy_allocator) { + if (FLAGS_allocator_strategy == "legacy") { void* p = boost::apply_visitor(legacy::AllocVisitor(size), place); return std::unique_ptr( new legacy::LegacyAllocation(p, size, place)); diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc index a08a9dbd0da46e73082cdd24c019e8d210d8bcc4..d7a048257f92c1c58c34decf1a93ff95f5f736c7 100644 --- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc +++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/reader/reader_op_registry.h" +#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/recordio/scanner.h" namespace paddle { @@ -33,11 +34,7 @@ class RecordIOFileReader : public framework::FileReader { protected: void ReadNextImpl(std::vector* out) override { - std::unique_ptr> guard; - if (ThreadSafe) { - guard.reset(new std::lock_guard(*mutex_)); - } - + platform::LockGuardPtr guard(mutex_); bool ok = framework::ReadFromRecordIO(&scanner_, dev_ctx_, out); if (!ok) { out->clear(); diff --git a/paddle/fluid/platform/lock_guard_ptr.h b/paddle/fluid/platform/lock_guard_ptr.h new file mode 100644 index 0000000000000000000000000000000000000000..220c538bc78101d397f08ce5617886b381169542 --- /dev/null +++ b/paddle/fluid/platform/lock_guard_ptr.h @@ -0,0 +1,55 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include // NOLINT +namespace paddle { +namespace platform { + +/** + * LockGuard for std::unique_ptr. It will do nothing when guarded ptr + * is nullptr. + * + * The advantage of using `LockGuardPtr` instead of + * std::unique> is this type is totally a stack + * variable. There is no heap allocation at all. + */ +template +class LockGuardPtr { + using LockGuardType = std::lock_guard; + + public: + class LockGuardDeleter { + public: + void operator()(LockGuardType* guard) { guard->~LockGuardType(); } + }; + + explicit LockGuardPtr(std::unique_ptr& lock_ptr) // NOLINT + : guard_ptr_(lock_ptr ? new (guard_buffer_) LockGuardType(*lock_ptr) + : nullptr) {} + + LockGuardPtr(const LockGuardPtr&) = delete; + LockGuardPtr& operator=(const LockGuardPtr&) = delete; + LockGuardPtr(LockGuardPtr&&) = delete; + LockGuardPtr& operator=(LockGuardPtr&&) = delete; + + private: + uint8_t guard_buffer_[sizeof(LockGuardType)]; + std::unique_ptr guard_ptr_; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index b18bd70005c07596c928377c6606d02cc962a0d7..32d433b698566b7cfafe83937afc8118a3c0c527 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -27,10 +27,12 @@ int main(int argc, char** argv) { new_argv.push_back(argv[i]); } #ifdef PADDLE_WITH_CUDA - new_argv.push_back(strdup("--tryfromenv=fraction_of_gpu_memory_to_use")); + new_argv.push_back( + strdup("--tryfromenv=fraction_of_gpu_memory_to_use,allocator_strategy")); #else - new_argv.push_back(strdup( - "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb")); + new_argv.push_back( + strdup("--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_" + "mb,allocator_strategy")); new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb")); #endif int new_argc = static_cast(new_argv.size()); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index ce792664920d44c3d9510e625d57f80b6559fc7f..a57c3287afa53a8fa8e671919ea54ff2447093fc 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -114,7 +114,7 @@ def __bootstrap__(): 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb', - 'use_legacy_allocator', 'reader_queue_speed_test_mode' + 'allocator_strategy', 'reader_queue_speed_test_mode' ] if core.is_compiled_with_dist(): read_env_flags.append('rpc_deadline')