From c20db6357bde3bf40c7164fe29d52b1b24e79f90 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Mon, 25 Mar 2019 04:45:15 +0000 Subject: [PATCH] split PR test=develop --- paddle/fluid/framework/CMakeLists.txt | 2 - paddle/fluid/framework/inlined_vector.h | 82 ----- paddle/fluid/framework/inlined_vector_test.cc | 53 ---- paddle/fluid/memory/allocation/CMakeLists.txt | 17 +- paddle/fluid/memory/allocation/allocator.h | 11 +- .../memory/allocation/allocator_facade.cc | 55 +--- .../memory/allocation/allocator_strategy.cc | 5 +- .../memory/allocation/allocator_strategy.h | 2 +- .../auto_growth_best_fit_allocator.cc | 134 -------- .../auto_growth_best_fit_allocator.h | 87 ----- ...o_growth_best_fit_allocator_facade_test.cc | 96 ------ .../auto_growth_best_fit_allocator_test.cc | 70 ---- .../fluid/memory/allocation/cpu_allocator.cc | 4 +- .../fluid/memory/allocation/cpu_allocator.h | 2 +- .../memory/allocation/legacy_allocator.cc | 18 +- .../multi_bin_buffered_allocator.cc | 300 ------------------ .../allocation/multi_bin_buffered_allocator.h | 62 ---- .../multi_bin_buffered_allocator_test.cc | 170 ---------- .../naive_best_fit_allocator_facade_test.cc | 3 - ...ti_bin_buffered_allocator_division_plan.cc | 56 ---- paddle/fluid/pybind/pybind.cc | 3 - python/paddle/fluid/__init__.py | 4 +- 22 files changed, 31 insertions(+), 1205 deletions(-) delete mode 100644 paddle/fluid/framework/inlined_vector.h delete mode 100644 paddle/fluid/framework/inlined_vector_test.cc delete mode 100644 paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc delete mode 100644 paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h delete mode 100644 paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc delete mode 100644 paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc delete mode 100644 paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc delete mode 100644 paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h delete mode 100644 paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc delete mode 100644 paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 265a5c6fe2..ad19d729eb 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -202,8 +202,6 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) -cc_test(inlined_vector_test SRCS inlined_vector_test.cc) - if (NOT WIN32) cc_test(rw_lock_test SRCS rw_lock_test.cc) endif (NOT WIN32) diff --git a/paddle/fluid/framework/inlined_vector.h b/paddle/fluid/framework/inlined_vector.h deleted file mode 100644 index 0adff9d212..0000000000 --- a/paddle/fluid/framework/inlined_vector.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace framework { - -template -class InlinedVector { - static_assert(N > 0, "N must be larger than 0"); - - public: - inline void push_back(const T& item) { - if (size_ < N) { - head_[size_] = item; - } else { - tail_.emplace_back(item); - } - ++size_; - } - - inline void pop_back() { - PADDLE_ENFORCE(!empty(), "Try to pop back element from empty vector."); - if (size_ > N) { - tail_.pop_back(); - } - --size_; - } - - inline const T& back() const { - PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector."); - return size_ <= N ? head_[size_ - 1] : tail_.back(); - } - - inline T& back() { - PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector."); - return size_ <= N ? head_[size_ - 1] : tail_.back(); - } - - inline bool empty() const { return size_ == 0; } - - inline size_t size() const { return size_; } - - // This API can only be used in unittest - T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; } - - const T& operator[](size_t i) const { - return i < N ? head_[i] : tail_[i - N]; - } - - operator std::vector() const { - std::vector ret; - ret.reserve(size_); - for (size_t i = 0; i < size_; ++i) { - ret.emplace_back((*this)[i]); - } - return ret; - } - - private: - T head_[N]; - size_t size_{0}; - std::vector tail_; -}; - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/inlined_vector_test.cc b/paddle/fluid/framework/inlined_vector_test.cc deleted file mode 100644 index b2b7a95b5e..0000000000 --- a/paddle/fluid/framework/inlined_vector_test.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/inlined_vector.h" -#include -#include "gtest/gtest.h" - -namespace paddle { -namespace framework { - -TEST(inlined_stack, inlined_stack) { - size_t max_num = 10; - - InlinedVector stack; - - for (size_t i = 0; i < max_num; ++i) { - ASSERT_EQ(stack.size(), i); - stack.push_back(i); - ASSERT_EQ(stack.size(), i + 1); - } - - std::vector vec = stack; - - ASSERT_EQ(stack.size(), vec.size()); - - for (size_t i = 0; i < vec.size(); ++i) { - ASSERT_EQ(stack[i], vec[i]); - } - - for (size_t i = 0; i < max_num; ++i) { - ASSERT_EQ(stack[i], i); - } - - for (size_t i = 0; i < max_num; ++i) { - ASSERT_EQ(stack.back(), max_num - 1 - i); - stack.pop_back(); - ASSERT_EQ(stack.size(), max_num - 1 - i); - } -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 7552eee77e..0f6014ae8a 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -3,18 +3,9 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) -cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator gflags) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) -cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator) - -cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator) -cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator) - -if (NOT WIN32) - cc_test(test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator) -endif() if (WITH_GPU) nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard) @@ -47,7 +38,7 @@ else () set(AllocatorFacadeDeps) endif() -list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator multi_bin_buffered_allocator auto_growth_best_fit_allocator legacy_allocator zero_size_allocator) +list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) @@ -59,8 +50,8 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) -cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade) - cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade) -cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade) +cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) + +cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 3497e46516..6c42dd7691 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -17,7 +17,6 @@ #include #include #include -#include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/platform/place.h" namespace paddle { @@ -50,7 +49,9 @@ class Allocator; class Allocation { public: Allocation(void* ptr, size_t size, platform::Place place) - : ptr_(ptr), size_(size), place_(place) {} + : ptr_(ptr), size_(size), place_(place) { + decorated_allocators_.reserve(8); + } Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; @@ -80,8 +81,8 @@ class Allocation { virtual ~Allocation(); private: - std::vector DecoratedAllocators() const { - return static_cast>(decorated_allocators_); + const std::vector& DecoratedAllocators() const { + return decorated_allocators_; } inline void RegisterDecoratedAllocator(Allocator* allocator) { @@ -98,7 +99,7 @@ class Allocation { void* ptr_; size_t size_; platform::Place place_; - framework::InlinedVector decorated_allocators_; + std::vector decorated_allocators_; friend class Allocator; friend class AllocationDeleter; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 0f7d5926f1..96cda01f7b 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -22,14 +22,12 @@ #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" -#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" #include "paddle/fluid/memory/allocation/auto_increment_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/conditional_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" -#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/platform/cpu_info.h" @@ -47,24 +45,18 @@ DEFINE_int64( "The retry time (milliseconds) when allocator fails " "to allocate memory. No retry if this value is not greater than 0"); -DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator"); - namespace paddle { namespace memory { namespace allocation { -static inline std::shared_ptr WrapRetryAndBufferedAllocator( - std::shared_ptr allocator, int64_t retry_time, - bool enable_buffered) { +static inline std::shared_ptr WrapRetryAllocator( + std::shared_ptr allocator, int64_t retry_time) { if (retry_time > 0) { auto* retry_allocator = new RetryAllocator(std::move(allocator), retry_time); allocator.reset(retry_allocator); } - if (enable_buffered) { - allocator.reset(new MultiBinBufferedAllocator(allocator)); - } return allocator; } @@ -134,8 +126,7 @@ class ChunkedAllocator : public Allocator { std::shared_ptr allocator(new LockedAllocator( std::shared_ptr(new BestFitAllocator(allocation)))); - allocator = WrapRetryAndBufferedAllocator(allocator, retry_time_, - FLAGS_enable_buffered_allocator); + allocator = WrapRetryAllocator(allocator, retry_time_); return std::make_shared>(std::move(allocator)); } @@ -219,13 +210,6 @@ class AllocatorFacadePrivate { WrapZeroSizeAllocator(); break; } - case AllocatorStrategy::kAutoGrowthBestFit: { - InitAutoGrowthCPUAllocator(); - InitAutoGrowthCUDAAllocator(); - InitAutoGrowthCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); - break; - } default: { PADDLE_THROW("Unsupported allocator strategy: %d", static_cast(strategy)); @@ -234,39 +218,6 @@ class AllocatorFacadePrivate { } private: - void InitAutoGrowthCPUAllocator() { - auto cpu_allocator = std::make_shared>( - std::make_shared()); - allocators_[platform::CPUPlace()] = - std::make_shared( - cpu_allocator, platform::CpuMaxChunkSize(), 4096); - } - - void InitAutoGrowthCUDAAllocator() { -#ifdef PADDLE_WITH_CUDA - int dev_cnt = platform::GetCUDADeviceCount(); - for (int dev_id = 0; dev_id < dev_cnt; ++dev_id) { - auto cuda_allocator = std::make_shared>( - std::make_shared(platform::CUDAPlace(dev_id))); - auto allocator = std::make_shared( - cuda_allocator, platform::GpuMaxChunkSize(), 4096); - - allocators_[platform::CUDAPlace(dev_id)] = WrapRetryAndBufferedAllocator( - allocator, FLAGS_gpu_allocator_retry_time, false); - } -#endif - } - - void InitAutoGrowthCUDAPinnedAllocator() { -#ifdef PADDLE_WITH_CUDA - auto cuda_pinned_allocator = std::make_shared>( - std::make_shared()); - allocators_[platform::CUDAPinnedPlace()] = - std::make_shared( - cuda_pinned_allocator, platform::CUDAPinnedMaxChunkSize(), 4096); -#endif - } - void InitLegacyAllocator() { std::vector places{platform::CPUPlace()}; #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index e2a9c8414a..fff94c01e7 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -20,8 +20,7 @@ DEFINE_string( allocator_strategy, "legacy", "The allocation strategy. Legacy means the original allocator of Fluid." "naive_best_fit means the experimental best fit allocator. " - "auto_growth_best_fit means the experimental auto growth best fit " - "allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit]."); + "allocator. Enum in [legacy, naive_best_fit]."); namespace paddle { namespace memory { @@ -32,8 +31,6 @@ static AllocatorStrategy GetStrategyFromFlag() { return AllocatorStrategy::kLegacy; } else if (FLAGS_allocator_strategy == "naive_best_fit") { return AllocatorStrategy::kNaiveBestFit; - } else if (FLAGS_allocator_strategy == "auto_growth_best_fit") { - return AllocatorStrategy::kAutoGrowthBestFit; } else { PADDLE_THROW("Unsupported allocator strategy: %s", FLAGS_allocator_strategy); diff --git a/paddle/fluid/memory/allocation/allocator_strategy.h b/paddle/fluid/memory/allocation/allocator_strategy.h index 9dad9c0190..9adbd87993 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.h +++ b/paddle/fluid/memory/allocation/allocator_strategy.h @@ -18,7 +18,7 @@ namespace paddle { namespace memory { namespace allocation { -enum class AllocatorStrategy { kLegacy, kNaiveBestFit, kAutoGrowthBestFit }; +enum class AllocatorStrategy { kLegacy, kNaiveBestFit }; extern AllocatorStrategy GetAllocatorStrategy(); diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc deleted file mode 100644 index 3d901e04d0..0000000000 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" -#include -#include -#include -#include -#include // NOLINT -#include - -namespace paddle { -namespace memory { -namespace allocation { - -static size_t align(size_t size, size_t alignment) { - auto remaining = size % alignment; - return remaining == 0 ? size : size + alignment - remaining; -} - -AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( - const std::shared_ptr &underlying_allocator, size_t chunk_size, - size_t alignment) - : underlying_allocator_(underlying_allocator), - chunk_size_(align(chunk_size, alignment)), - alignment_(alignment) {} - -Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size, Attr attr) { - size = align(size, alignment_); - std::lock_guard guard(mtx_); - auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr)); - BlockIt block_it; - if (iter != free_blocks_.end()) { - VLOG(2) << "Found " << iter->second->size_ << " for " << size; - block_it = iter->second; - free_blocks_.erase(iter); - auto *chunk = block_it->chunk_; - size_t remaining_size = block_it->size_ - size; - if (remaining_size == 0) { - block_it->is_free_ = false; - VLOG(2) << "Found and no remaining"; - } else { - auto remaining_free_block = chunk->blocks_.insert( - block_it, Chunk::Block(block_it->ptr_, remaining_size, true, chunk)); - free_blocks_.emplace(std::make_pair(remaining_size, block_it->ptr_), - remaining_free_block); - block_it->ptr_ = - reinterpret_cast(block_it->ptr_) + remaining_size; - block_it->size_ = size; - block_it->is_free_ = false; - VLOG(2) << "Found and remaining " << remaining_size; - } - } else { - size_t alloc_size = size; - if (!underlying_allocator_exhaustive_ && chunk_size_ > size) { - alloc_size = chunk_size_; - } - - try { - chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr)); - } catch (BadAlloc &ex) { - if (size == alloc_size) throw ex; - underlying_allocator_exhaustive_ = true; - alloc_size = size; - chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr)); - } - auto *chunk = &(*chunks_.rbegin()); - uint8_t *p = reinterpret_cast(chunk->allocation_->ptr()); - auto &blocks = chunk->blocks_; - - size_t remaining_size = alloc_size - size; - if (remaining_size > 0) { - blocks.emplace_back(p, remaining_size, true, chunk); - free_blocks_.emplace(std::make_pair(remaining_size, p), --(blocks.end())); - } - blocks.emplace_back(p + remaining_size, size, false, chunk); - block_it = --(blocks.end()); - VLOG(2) << "Not found and allocate " << alloc_size << ", and remaining " - << remaining_size; - } - VLOG(2) << "After allocate, free blocks " << free_blocks_.size(); - return new Chunk::BlockAllocation(block_it); -} - -void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { - auto &block_it = static_cast(allocation)->block_it_; - auto &blocks = block_it->chunk_->blocks_; - - std::lock_guard guard(mtx_); - block_it->is_free_ = true; - - if (block_it != blocks.begin()) { - auto prev_it = block_it; - --prev_it; - - if (prev_it->is_free_) { - free_blocks_.erase(std::make_pair(prev_it->size_, prev_it->ptr_)); - prev_it->size_ += block_it->size_; - blocks.erase(block_it); - block_it = prev_it; - } - } - - auto next_it = block_it; - ++next_it; - - if (next_it != blocks.end() && next_it->is_free_) { - free_blocks_.erase(std::make_pair(next_it->size_, next_it->ptr_)); - block_it->size_ += next_it->size_; - blocks.erase(next_it); - } - - free_blocks_.emplace(std::make_pair(block_it->size_, block_it->ptr_), - block_it); - - VLOG(2) << "Combine " << block_it->size_ << ", " << blocks.size() << ", " - << free_blocks_.size(); - delete allocation; -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h deleted file mode 100644 index f60dad8112..0000000000 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include // NOLINT -#include -#include "paddle/fluid/memory/allocation/allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -class AutoGrowthBestFitAllocator : public Allocator { - public: - explicit AutoGrowthBestFitAllocator( - const std::shared_ptr &underlying_allocator, size_t chunk_size, - size_t alignment); - - bool IsAllocThreadSafe() const override { return true; } - - using AllocationList = std::list; - using AllocationListIt = AllocationList::iterator; - - struct Chunk { - struct Block { - Block(void *ptr, size_t size, bool is_free, Chunk *chunk) - : ptr_(ptr), size_(size), is_free_(is_free), chunk_(chunk) {} - - void *ptr_; - size_t size_; - bool is_free_; - Chunk *chunk_; // which chunk it is from - }; - - explicit Chunk(AllocationPtr allocation) - : allocation_(std::move(allocation)) {} - - AllocationPtr allocation_; - std::list blocks_; - // std::mutex mtx_; - - struct BlockAllocation : public Allocation { - explicit BlockAllocation(const std::list::iterator &it) - : Allocation(it->ptr_, it->size_, it->chunk_->allocation_->place()), - block_it_(it) {} - - std::list::iterator block_it_; - }; - }; - - protected: - Allocation *AllocateImpl(size_t size, Attr attr) override; - - void FreeImpl(Allocation *allocation) override; - - private: - using BlockIt = std::list::iterator; - - std::shared_ptr underlying_allocator_; - std::list chunks_; - std::map, BlockIt> free_blocks_; - size_t chunk_size_; - size_t alignment_; - - bool underlying_allocator_exhaustive_{false}; - - mutable std::mutex mtx_; -}; - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc deleted file mode 100644 index 518f5e0131..0000000000 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "paddle/fluid/memory/allocation/allocator_facade.h" - -#ifdef PADDLE_WITH_CUDA -DECLARE_double(fraction_of_gpu_memory_to_use); -DECLARE_double(fraction_of_cuda_pinned_memory_to_use); -DECLARE_int64(gpu_allocator_retry_time); -#endif - -DECLARE_string(allocator_strategy); - -namespace paddle { -namespace memory { -namespace allocation { - -static inline size_t AlignTo(size_t size, size_t alignment = 4096) { - auto remaining = size % alignment; - return remaining == 0 ? size : size + alignment - remaining; -} - -TEST(allocator, allocator) { -#ifdef PADDLE_WITH_CUDA - FLAGS_fraction_of_gpu_memory_to_use = 0.01; - FLAGS_gpu_allocator_retry_time = 500; - FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5; -#endif - - FLAGS_allocator_strategy = "auto_growth_best_fit"; - - auto &instance = AllocatorFacade::Instance(); - size_t size = 1024; - platform::Place place; - - { - place = platform::CPUPlace(); - size = 1024; - auto cpu_allocation = instance.Alloc(place, size); - ASSERT_NE(cpu_allocation, nullptr); - ASSERT_NE(cpu_allocation->ptr(), nullptr); - ASSERT_EQ(cpu_allocation->place(), place); - ASSERT_EQ(cpu_allocation->size(), AlignTo(size)); - } - -#ifdef PADDLE_WITH_CUDA - { - place = platform::CUDAPlace(0); - size = 1024; - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), AlignTo(size)); - } - - { - // Allocate 2GB gpu memory - place = platform::CUDAPlace(0); - size = 2 * static_cast(1 << 30); - auto gpu_allocation = instance.Alloc(place, size); - ASSERT_NE(gpu_allocation, nullptr); - ASSERT_NE(gpu_allocation->ptr(), nullptr); - ASSERT_EQ(gpu_allocation->place(), place); - ASSERT_GE(gpu_allocation->size(), AlignTo(size)); - } - - { - place = platform::CUDAPinnedPlace(); - size = (1 << 20); - auto cuda_pinned_allocation = - instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20); - ASSERT_NE(cuda_pinned_allocation, nullptr); - ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr); - ASSERT_EQ(cuda_pinned_allocation->place(), place); - ASSERT_GE(cuda_pinned_allocation->size(), AlignTo(size)); - } -#endif -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc deleted file mode 100644 index 087eb8c9cc..0000000000 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include // NOLINT -#include // NOLINT -#include // NOLINT -#include - -#include - -#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" -#include "paddle/fluid/memory/allocation/cpu_allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -TEST(allocator, auto_growth_best_fit_allocator) { - auto cpu_allocator = std::make_shared(); - - auto allocator = - std::make_shared(cpu_allocator, 0, 4096); - - std::mutex mtx; - std::condition_variable cv; - bool flag = false; - - auto thread_main = [&] { - { - std::unique_lock lock(mtx); - cv.wait(lock, [&] { return flag; }); - } - for (size_t i = 10; i > 0; --i) { - allocator->Allocate((i + 1) * 1000); - } - }; - - std::vector ths; - for (size_t i = 10; i < 10; ++i) { - ths.emplace_back(thread_main); - } - - { - std::lock_guard lock(mtx); - flag = true; - } - cv.notify_all(); - - for (auto &th : ths) { - th.join(); - } -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index 8dd4de49b6..90c49c87a6 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -35,9 +35,9 @@ void CPUAllocator::FreeImpl(Allocation *allocation) { Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { void *p; #ifdef _WIN32 - p = _aligned_malloc(size, 4096); + p = _aligned_malloc(size, kAlignment); #else - PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096, size), 0, "Alloc %ld error!", + PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", size); #endif return new Allocation(p, size, platform::CPUPlace()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index b4d215a434..3eb1416b0e 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -33,7 +33,7 @@ namespace allocation { // an open-sourced allocator into Paddle. class CPUAllocator : public Allocator { public: - constexpr static size_t kAlignment = 64u; + constexpr static size_t kAlignment = 4096UL; bool IsAllocThreadSafe() const override; protected: diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 0fd68b2a22..eac9fce58f 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -148,12 +148,18 @@ class GPUBuddyAllocatorList { std::unique_ptr( new detail::GPUAllocator(dev_id)), platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); - VLOG(10) << "\n\nNOTE: each GPU device use " - << FLAGS_fraction_of_gpu_memory_to_use * 100 - << "% of GPU memory.\n" - << "You can set GFlags environment variable '" - << "FLAGS_fraction_of_gpu_memory_to_use" - << "' to change the fraction of GPU usage.\n\n"; + VLOG(10) << "\n\nNOTE:\n" + << "You can set GFlags environment variable " + << "'FLAGS_fraction_of_gpu_memory_to_use' " + << "or 'FLAGS_initial_gpu_memory_in_mb' " + << "or 'FLAGS_reallocate_gpu_memory_in_mb' " + << "to change the memory size for GPU usage.\n" + << "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is " + << FLAGS_fraction_of_gpu_memory_to_use + << ". Current 'FLAGS_initial_gpu_memory_in_mb' value is " + << FLAGS_initial_gpu_memory_in_mb + << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " + << FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; }); return allocators_[dev_id]; } diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc deleted file mode 100644 index c649a7161e..0000000000 --- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc +++ /dev/null @@ -1,300 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" -#include -#include -#include -#include -#include // NOLINT -#include -#include -#include -#include "paddle/fluid/platform/lock_guard_ptr.h" - -DEFINE_double( - buffered_allocator_excess_times, 2, - "Excess memory size times of buffered_allocator. BufferedAllocator" - " would try to reuse memory freed previously, but the size of freed" - " allocation may not be exactly the same as the requested. Here, we" - " use a flag to control the excess times of reused memory size. " - "Not quite sure what is the best excess times value."); - -DEFINE_string( - buffered_allocator_division_plan_path, "", - "The file path which " - "determines the memory size division plans of BufferedAllocator." - "If it is empty, use the default division plan. The file must be a " - "text file which each lines indicates the bound of division plan. " - "For example, if the text file has 3 lines, which are '500M', '1G', " - " '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) " - "and [2G, +inf). Allocation request whose requested memory size is " - "inside the last interval of division plan would be dispatched to " - " underlying_allocator directly without caching when freed."); - -namespace paddle { -namespace memory { -namespace allocation { - -static std::string TrimStringAndToUpperCase(const std::string &str) { - auto not_space = [](char ch) { return std::isspace(ch) == 0; }; - auto first_idx = static_cast( - std::find_if(str.begin(), str.end(), not_space) - str.begin()); - auto last_idx = static_cast( - std::find_if(str.rbegin(), str.rend(), not_space) - str.rbegin()); - if (first_idx == str.size() || last_idx == str.size()) return ""; - - last_idx = str.size() - last_idx; - auto ret = str.substr(first_idx, last_idx - first_idx); - std::for_each(ret.begin(), ret.end(), - [](char &ch) { ch = std::toupper(ch); }); - return ret; -} - -namespace { - -enum DivisionPlanFileStatus { kEOF, kException, kNormal }; - -} // NOLINT - -static size_t ParseStringToBytes(const std::string &original_str, - DivisionPlanFileStatus *ret_code) { - std::string str = TrimStringAndToUpperCase(original_str); - - if (str.empty()) { - *ret_code = kEOF; - return 0; - } - - if (str.back() == 'B') { - str.pop_back(); - if (str.empty()) { - *ret_code = kException; - return 0; - } - } - - size_t multiples = 1; - switch (str.back()) { - case 'G': - multiples *= (static_cast(1) << 30); - break; - case 'M': - multiples *= (static_cast(1) << 20); - break; - case 'K': - multiples *= (static_cast(1) << 10); - break; - default: - break; - } - - if (multiples != 1) { - str.pop_back(); - if (str.empty()) { - *ret_code = kException; - return 0; - } - } - - str = TrimStringAndToUpperCase(str); - double mem_val = -1.0; - std::stringstream ss(str); - if (!(ss >> mem_val) || mem_val < 0) { - *ret_code = kException; - return 0; - } - - *ret_code = kNormal; - return static_cast(mem_val * multiples); -} - -static std::string GetDebugStringOfPlan(const std::vector &plan) { - std::string ret("["); - for (auto sz : plan) { - ret += string::HumanReadableSize(sz); - ret += ", "; - } - return ret + "]"; -} - -std::vector ReadBufferedAllocatorDivisionPlanFromFile( - const std::string &filepath) { - std::ifstream is(filepath.c_str()); - PADDLE_ENFORCE(is.good(), "File %s not exist", filepath); - std::string str; - std::vector plan; - size_t line_num = 1; - while (std::getline(is, str).good()) { - DivisionPlanFileStatus status; - size_t ret = ParseStringToBytes(str, &status); - if (status == kEOF) { - break; - } - if (status == kException) { - PADDLE_THROW( - "Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, " - "GB.", - line_num, filepath, str); - } - plan.push_back(ret); - ++line_num; - } - return plan; -} - -static void CheckAndModifyMemoryDivisionPlan( - std::vector *division_plan) { - // Check whether the division plan is strictly sorted - bool is_strictly_sorted = true; - for (size_t i = 1; i < division_plan->size(); ++i) { - if ((*division_plan)[i - 1] >= (*division_plan)[i]) { - is_strictly_sorted = false; - break; - } - } - PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted"); - - // Insert 0 to disivion plan for clean binary searching code - if (division_plan->empty() || division_plan->front() != 0) { - division_plan->insert(division_plan->begin(), 0); - } - - // Remove MAX from disivion plan for clean binary searching code - constexpr auto kSizeTypeMax = std::numeric_limits::max(); - if (division_plan->back() == kSizeTypeMax) { - division_plan->pop_back(); - } - - PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty"); -} - -static std::vector GetDefaultDivisionPlan() { - if (!FLAGS_buffered_allocator_division_plan_path.empty()) { - return ReadBufferedAllocatorDivisionPlanFromFile( - FLAGS_buffered_allocator_division_plan_path); - } - - // Default division plan is 4K, 8K, 16K, ..., 500M, 1G - constexpr size_t kMaxLogSize = 30; - std::vector plan; - for (size_t i = 12; i <= kMaxLogSize; ++i) { - plan.push_back(static_cast(1) << i); - } - return plan; -} - -inline static size_t FindDivisionPlanBinIndex(const std::vector &bins, - size_t size) { - return static_cast(std::upper_bound(bins.begin(), bins.end(), size) - - bins.begin() - 1); -} - -inline static size_t TolerantUpperSize(size_t size) { - return static_cast(size * FLAGS_buffered_allocator_excess_times); -} - -MultiBinBufferedAllocator::MultiBinBufferedAllocator( - std::shared_ptr underlying_allocator) - : MultiBinBufferedAllocator(std::move(underlying_allocator), - GetDefaultDivisionPlan()) {} - -MultiBinBufferedAllocator::MultiBinBufferedAllocator( - std::shared_ptr underlying_allocator, - const std::vector &division_plan) - : underlying_allocator_(std::move(underlying_allocator)), - division_plan_(division_plan) { - CheckAndModifyMemoryDivisionPlan(&division_plan_); - allocations_.resize(division_plan_.size() - 1); - accumulated_cache_size_.assign(division_plan_.size() - 1, 0UL); - mtx_.resize(division_plan_.size() - 1); - if (underlying_allocator_->IsAllocThreadSafe()) { - for (auto &mtx : mtx_) { - mtx.reset(new std::mutex()); - } - } - - VLOG(1) << "Division plan is: " << GetDebugStringOfPlan(division_plan_); - VLOG(1) << "FLAGS_buffered_allocator_excess_times = " - << FLAGS_buffered_allocator_excess_times; -} - -void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) { - auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size()); - if (bin_index < allocations_.size()) { - platform::LockGuardPtr guard(mtx_[bin_index]); - allocations_[bin_index].emplace(allocation->size(), - AllocationPtr(allocation)); - accumulated_cache_size_[bin_index] += allocation->size(); - } else { - underlying_allocator_->Free(allocation); - } -} - -// Maybe we can design more flexible FreeCache strategy based on bin_index -// and require size. -size_t MultiBinBufferedAllocator::ClearCache() { - size_t accumulated_size = 0; - // FIXME(zjl): free the largest first when there is no extra - for (size_t i = allocations_.size() - 1; i != static_cast(-1); --i) { - platform::LockGuardPtr lock(mtx_[i]); - allocations_[i].clear(); - accumulated_size += accumulated_cache_size_[i]; - accumulated_cache_size_[i] = 0; - } - return accumulated_size; -} - -Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) { - auto bin_index = FindDivisionPlanBinIndex(division_plan_, size); - auto upper_size = TolerantUpperSize(size); - - for (; bin_index < allocations_.size() && - upper_size >= division_plan_[bin_index]; - ++bin_index) { - auto &allocation = allocations_[bin_index]; - platform::LockGuardPtr lock(mtx_[bin_index]); - auto it = allocation.lower_bound(size); - if (it != allocation.end() && it->second->size() <= upper_size) { - size_t sz = it->second->size(); - auto ret = std::move(it->second); - allocation.erase(it); - accumulated_cache_size_[bin_index] -= sz; - VLOG(3) << "Allocate " << sz << "(required " << size - << ") from cache directly"; - return ret.release(); - } - } - - size_t retry_time = 1; - while (true) { - try { - auto ret = underlying_allocator_->Allocate(size, attr).release(); - VLOG(2) << "Allocate " << size << " from underlying directly"; - return ret; - } catch (BadAlloc &) { - size_t actual_free_size = ClearCache(); - VLOG(1) << retry_time << "-th free " << actual_free_size - << " bytes caches"; - if (actual_free_size == 0) throw; - } - ++retry_time; - } -} - -void UseMultiBinBufferedAllocatorGFlags() {} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h deleted file mode 100644 index b93f4c062b..0000000000 --- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include // NOLINT -#include -#include - -#include "paddle/fluid/memory/allocation/allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -std::vector ReadBufferedAllocatorDivisionPlanFromFile( - const std::string& filepath); - -class MultiBinBufferedAllocator : public Allocator { - public: - explicit MultiBinBufferedAllocator( - std::shared_ptr underlying_allocator); - - MultiBinBufferedAllocator(std::shared_ptr underlying_allocator, - const std::vector& division_plan); - - bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; } - - size_t ClearCache(); - - const std::vector& DivisionPlan() const { return division_plan_; } - - protected: - Allocation* AllocateImpl(size_t size, Attr attr) override; - void FreeImpl(Allocation* allocation) override; - - private: - std::shared_ptr underlying_allocator_; - std::vector> allocations_; - std::vector accumulated_cache_size_; - std::vector division_plan_; - std::vector> mtx_; -}; - -extern void UseMultiBinBufferedAllocatorGFlags(); - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc b/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc deleted file mode 100644 index be5dfba644..0000000000 --- a/paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" -#include -#include -#include -#include "paddle/fluid/memory/allocation/best_fit_allocator.h" -#include "paddle/fluid/memory/allocation/cpu_allocator.h" -#include "paddle/fluid/memory/allocation/locked_allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -inline std::shared_ptr GetBufferedAllocator( - Allocation *allocation, bool thread_safe) { - std::shared_ptr allocator(new BestFitAllocator(allocation)); - if (thread_safe) { - allocator.reset(new LockedAllocator(std::move(allocator))); - } - - return std::make_shared(allocator); -} - -TEST(buffered_allocator, thread_safety) { - std::unique_ptr allocator(new CPUAllocator()); - auto chunk = allocator->Allocate(1 << 20, allocator->kDefault); - { - auto buf_allocator = GetBufferedAllocator(chunk.get(), true); - ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true); - } - - { - auto buf_allocator = GetBufferedAllocator(chunk.get(), false); - ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false); - } -} - -class StubAllocation : public Allocation { - public: - using Allocation::Allocation; -}; - -class StubAllocator : public Allocator { - public: - void ResetCounter() { - construct_count_ = 0; - destruct_count_ = 0; - } - - size_t GetAllocCount() const { return construct_count_; } - - size_t GetFreeCount() const { return destruct_count_; } - - protected: - void FreeImpl(Allocation *allocation) override { - auto *alloc = dynamic_cast(allocation); - PADDLE_ENFORCE_NOT_NULL(alloc); - if (alloc->ptr()) delete[] static_cast(alloc->ptr()); - ++destruct_count_; - delete allocation; - } - - Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override { - ++construct_count_; - if (size == 0) { - return new StubAllocation(nullptr, 0, platform::CPUPlace()); - } else { - return new StubAllocation(new uint8_t[size], size, platform::CPUPlace()); - } - } - - private: - size_t construct_count_ = 0; - size_t destruct_count_ = 0; -}; - -constexpr size_t kZero = 0; -constexpr size_t kOne = 1; -constexpr size_t kTwo = 2; - -TEST(buffered_allocator, lazy_free) { - std::vector original_alloc_size({1022, 1023, 1024, 1025, 1026}); - for (auto alloc_size : original_alloc_size) { - auto stub_allocator = std::make_shared(); - auto *underlying_allocator = stub_allocator.get(); - auto allocator = - std::make_shared(stub_allocator); - - { - underlying_allocator->ResetCounter(); - auto x = allocator->Allocate(alloc_size, allocator->kDefault); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - x = nullptr; - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - } - - { - underlying_allocator->ResetCounter(); - auto x = allocator->Allocate(900, allocator->kDefault); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - auto y = allocator->Allocate(2048, allocator->kDefault); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - x = nullptr; - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - y = nullptr; - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - } - - { - underlying_allocator->ResetCounter(); - size_t cache_size = allocator->ClearCache(); - ASSERT_EQ(cache_size, static_cast(alloc_size + 2048)); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo); - } - - { - underlying_allocator->ResetCounter(); - auto p = allocator->Allocate(allocator->DivisionPlan().back(), - allocator->kDefault); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - } - - ASSERT_EQ(underlying_allocator->GetFreeCount(), kOne); - - { - underlying_allocator->ResetCounter(); - auto p = allocator->Allocate(allocator->DivisionPlan().back() - 1, - allocator->kDefault); - ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - } - - ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); - } -} - -TEST(buffered_allocator, garbage_collection) { - std::unique_ptr cpu_allocator(new CPUAllocator()); - auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault); - auto allocator = GetBufferedAllocator(chunk.get(), false); - auto x1 = allocator->Allocate(1600, allocator->kDefault); - auto x2 = allocator->Allocate(400, allocator->kDefault); - x1 = nullptr; - x2 = nullptr; - auto x3 = allocator->Allocate(1600, allocator->kDefault); - ASSERT_NE(x3, nullptr); - ASSERT_NE(x3->ptr(), nullptr); -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc index 6952c19092..3334589a4b 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc @@ -22,8 +22,6 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use); DECLARE_int64(gpu_allocator_retry_time); #endif -DECLARE_bool(enable_buffered_allocator); - DECLARE_string(allocator_strategy); namespace paddle { @@ -38,7 +36,6 @@ TEST(allocator, allocator) { #endif FLAGS_allocator_strategy = "naive_best_fit"; - FLAGS_enable_buffered_allocator = true; auto &instance = AllocatorFacade::Instance(); platform::Place place; diff --git a/paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc b/paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc deleted file mode 100644 index 15daa8413f..0000000000 --- a/paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include "gtest/gtest.h" -#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" - -DECLARE_string(buffered_allocator_division_plan_path); - -namespace paddle { -namespace memory { -namespace allocation { - -TEST(buffered_allocator, division_plan) { - std::string path = "/tmp/buffered_allocator_divison_plan"; - FLAGS_buffered_allocator_division_plan_path = path; - - { - std::vector plan( - {"100b", "300.7K", "500.3m", "1.02gB", "2g", "4G"}); - - std::ofstream os(path); - for (auto &p : plan) { - os << p << std::endl; - } - os.close(); - } - - auto plan = ReadBufferedAllocatorDivisionPlanFromFile( - FLAGS_buffered_allocator_division_plan_path); - ASSERT_EQ(plan.size(), 6UL); - ASSERT_EQ(plan[0], 100UL); - ASSERT_EQ(plan[1], static_cast(300.7 * 1024)); - ASSERT_EQ(plan[2], static_cast(500.3 * 1024 * 1024)); - ASSERT_EQ(plan[3], static_cast(1.02 * 1024 * 1024 * 1024)); - ASSERT_EQ(plan[4], static_cast(2.0 * 1024 * 1024 * 1024)); - ASSERT_EQ(plan[5], static_cast(4.0 * 1024 * 1024 * 1024)); -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 6f2e41c159..609f9c76bf 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -39,7 +39,6 @@ limitations under the License. */ #include "paddle/fluid/imperative/profiler.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h" -#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" @@ -135,8 +134,6 @@ PYBIND11_MODULE(core, m) { paddle::memory::allocation::UseAllocatorStrategyGFlag(); - paddle::memory::allocation::UseMultiBinBufferedAllocatorGFlags(); - m.doc() = "C++ core of PaddlePaddle"; // using framework in this function. Since it is inside a function, it will diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index ad2ce30ab5..cb9c75a14f 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -129,9 +129,7 @@ def __bootstrap__(): 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion', - 'allocator_strategy', 'enable_buffered_allocator', - 'buffered_allocator_excess_times', - 'buffered_allocator_division_plan_path', 'reader_queue_speed_test_mode', + 'allocator_strategy', 'reader_queue_speed_test_mode', 'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism', 'enable_parallel_graph', 'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize', -- GitLab