From aa4d6a5d6caf4320fdb6f6eb1a7303713e39bc83 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 18 Dec 2019 21:30:48 -0600 Subject: [PATCH] Add some debug flags to auto growth allocator (#21766) * add some debug flags to auto growth allocator, test=develop * add comments about auto growth, test=develop --- paddle/fluid/memory/allocation/CMakeLists.txt | 1 + .../auto_growth_best_fit_allocator.cc | 24 ++- .../auto_growth_best_fit_allocator_test.cc | 168 ++++++++++++++++++ .../pybind/global_value_getter_setter.cc | 4 + python/paddle/fluid/__init__.py | 2 +- 5 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index ffae6e64808..dc3d9a1f56e 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -65,3 +65,4 @@ cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.c cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator) cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator) +cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator) diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index 9ce4fd07829..f71a4b8e1a8 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -21,6 +21,18 @@ #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" +DEFINE_bool(free_idle_chunk, false, + "Whether to free idle chunk when each allocation is freed. " + "If false, all freed allocation would be cached to speed up next " + "allocation request. If true, no allocation would be cached. This " + "flag only works when FLAGS_allocator_strategy=auto_growth."); + +DEFINE_bool(free_when_no_cache_hit, true, + "Whether to free idle chunks when no cache hit. If true, idle " + "chunk would be freed when no cache hit; if false, idle " + "chunk would be freed when out of memory occurs. This flag " + "only works when FLAGS_allocator_strategy=auto_growth."); + namespace paddle { namespace memory { namespace allocation { @@ -57,14 +69,16 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) { block_it->is_free_ = false; } } else { - FreeIdleChunks(); + if (FLAGS_free_when_no_cache_hit) { + FreeIdleChunks(); + } size_t realloc_size = std::max(size, chunk_size_); try { chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); } catch (BadAlloc &ex) { - if (size == realloc_size) throw ex; - realloc_size = size; + if (FLAGS_free_when_no_cache_hit) throw ex; + FreeIdleChunks(); chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); } @@ -118,6 +132,10 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { block_it); delete allocation; + + if (FLAGS_free_idle_chunk) { + FreeIdleChunks(); + } } void AutoGrowthBestFitAllocator::FreeIdleChunks() { diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc new file mode 100644 index 00000000000..8865bf0b0db --- /dev/null +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc @@ -0,0 +1,168 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" +#include +#include +#include "gtest/gtest.h" + +DECLARE_bool(free_idle_chunk); +DECLARE_bool(free_when_no_cache_hit); + +namespace paddle { +namespace memory { +namespace allocation { + +class RecordedAllocator : public Allocator { + protected: + Allocation *AllocateImpl(size_t size) override { + allocated_size_ += size; + return new Allocation(malloc(size), size, platform::CPUPlace()); + } + + void FreeImpl(Allocation *allocation) { + allocated_size_ -= allocation->size(); + free(allocation->ptr()); + delete allocation; + } + + public: + size_t AllocatedSize() const { return allocated_size_; } + + private: + size_t allocated_size_{0}; +}; + +static void TestFreeIdleChunk(bool free_idle_chunk, + bool free_when_no_cache_hit) { + FLAGS_free_idle_chunk = free_idle_chunk; + FLAGS_free_when_no_cache_hit = free_when_no_cache_hit; + auto recorded_allocator = std::make_shared(); + size_t alignment = 4096; + size_t memory_size = 8192; + auto ag_allocator = std::make_shared( + recorded_allocator, alignment); + + for (size_t i = 0; i < 10; ++i) { + auto allocation = ag_allocator->Allocate(memory_size); + ASSERT_EQ(recorded_allocator->AllocatedSize(), memory_size + alignment); + allocation.reset(); + if (free_idle_chunk) { + ASSERT_EQ(recorded_allocator->AllocatedSize(), 0UL); + } else { + ASSERT_EQ(recorded_allocator->AllocatedSize(), memory_size + alignment); + } + } +} + +class LimitedResourceAllocator : public Allocator { + public: + explicit LimitedResourceAllocator(size_t capacity) : capacity_(capacity) {} + + size_t AllocatedSize() const { return allocated_size_; } + + protected: + Allocation *AllocateImpl(size_t size) override { + if (allocated_size_ + size > capacity_) { + throw BadAlloc("", __FILE__, __LINE__); + } + + allocated_size_ += size; + return new Allocation(malloc(size), size, platform::CPUPlace()); + } + + void FreeImpl(Allocation *allocation) { + allocated_size_ -= allocation->size(); + free(allocation->ptr()); + delete allocation; + } + + private: + size_t allocated_size_{0}; + const size_t capacity_; +}; + +static void TestFreeWhenNoCacheHit(bool free_when_no_cache_hit) { + FLAGS_free_idle_chunk = false; + FLAGS_free_when_no_cache_hit = free_when_no_cache_hit; + size_t alignment = 256; + size_t base_memory_size = 4096; + + /* + * Suppose that we have 3 memory allocation request, that is: + * - allocate x1, and then free x1 + * - allocate x2, and then free x2 + * - allocate x3, and then free x3 + * + * where: + * - x1 + alignment < x2 + * - x2 + alignment < x3 + * - x1 + x2 <= memory_capacity < x1 + x2 + x3 + * + * In this unittest, we obtain memory_capacity by + * ((x1 + x2) + (x1 + x2 + x3) / 2 = x1 + x2 + x3 / 2. + * + * In this case, when FLAGS_free_when_no_cache_hit is true, + * the cached memory size when each allocation request ends + * would be: x1 + alignment, x2 + alignment, x3 + alignment. + * + * When FLAGS_free_when_no_cache_hit is false, the cached + * memory size when each allocation request ends would be: + * x1 + alignment, x1 + x2 + 2 * alignment, x3 + alignment. + */ + std::vector allocate_size = {base_memory_size, + base_memory_size + alignment * 2, + base_memory_size + alignment * 4}; + size_t memory_capacity = + allocate_size[0] + allocate_size[1] + allocate_size[2] / 2; + + auto underlying_allocator = + std::make_shared(memory_capacity); + auto ag_allocator = std::make_shared( + underlying_allocator, alignment); + + ag_allocator->Allocate(allocate_size[0]); + ASSERT_EQ(underlying_allocator->AllocatedSize(), + allocate_size[0] + alignment); + + ag_allocator->Allocate(allocate_size[1]); + if (free_when_no_cache_hit) { + ASSERT_EQ(underlying_allocator->AllocatedSize(), + allocate_size[1] + alignment); + } else { + ASSERT_EQ(underlying_allocator->AllocatedSize(), + allocate_size[0] + allocate_size[1] + 2 * alignment); + } + + ag_allocator->Allocate(allocate_size[2]); + ASSERT_EQ(underlying_allocator->AllocatedSize(), + allocate_size[2] + alignment); +} + +TEST(test_auto_growth_allocator, test_free_idle_chunk) { + for (auto free_idle_chunk : {false, true}) { + for (auto free_when_no_cache_hit : {false, true}) { + TestFreeIdleChunk(free_idle_chunk, free_when_no_cache_hit); + } + } +} + +TEST(test_auto_growth_allocator, test_free_when_no_cache_hit) { + TestFreeWhenNoCacheHit(false); + TestFreeWhenNoCacheHit(true); +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index d84e0d94a68..4a0e09bb2ae 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -30,6 +30,8 @@ DECLARE_double(eager_delete_tensor_gb); DECLARE_bool(use_mkldnn); DECLARE_bool(use_ngraph); DECLARE_bool(use_system_allocator); +DECLARE_bool(free_idle_chunk); +DECLARE_bool(free_when_no_cache_hit); namespace paddle { namespace pybind { @@ -167,6 +169,8 @@ static void RegisterGlobalVarGetterSetter() { REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_use_ngraph); REGISTER_GLOBAL_VAR_GETTER_SETTER(FLAGS_eager_delete_tensor_gb); REGISTER_GLOBAL_VAR_GETTER_SETTER(FLAGS_use_system_allocator); + REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_free_idle_chunk); + REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_free_when_no_cache_hit); } } // namespace pybind diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 71c47eb4e08..e6eb7f0c999 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -171,7 +171,7 @@ def __bootstrap__(): 'enable_parallel_graph', 'fuse_parameter_groups_size', 'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size', 'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator', - 'enable_unused_var_check' + 'enable_unused_var_check', 'free_idle_chunk', 'free_when_no_cache_hit' ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') -- GitLab