未验证 提交 aa4d6a5d 编写于 作者: Z Zeng Jinle 提交者: GitHub

Add some debug flags to auto growth allocator (#21766)

* add some debug flags to auto growth allocator, test=develop

* add comments about auto growth, test=develop
上级 c50ebeac
......@@ -65,3 +65,4 @@ cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.c
cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator)
cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator)
cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator)
......@@ -21,6 +21,18 @@
#include <unordered_map>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
DEFINE_bool(free_idle_chunk, false,
"Whether to free idle chunk when each allocation is freed. "
"If false, all freed allocation would be cached to speed up next "
"allocation request. If true, no allocation would be cached. This "
"flag only works when FLAGS_allocator_strategy=auto_growth.");
DEFINE_bool(free_when_no_cache_hit, true,
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
"chunk would be freed when out of memory occurs. This flag "
"only works when FLAGS_allocator_strategy=auto_growth.");
namespace paddle {
namespace memory {
namespace allocation {
......@@ -57,14 +69,16 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
block_it->is_free_ = false;
}
} else {
FreeIdleChunks();
if (FLAGS_free_when_no_cache_hit) {
FreeIdleChunks();
}
size_t realloc_size = std::max(size, chunk_size_);
try {
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
} catch (BadAlloc &ex) {
if (size == realloc_size) throw ex;
realloc_size = size;
if (FLAGS_free_when_no_cache_hit) throw ex;
FreeIdleChunks();
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
}
......@@ -118,6 +132,10 @@ void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
block_it);
delete allocation;
if (FLAGS_free_idle_chunk) {
FreeIdleChunks();
}
}
void AutoGrowthBestFitAllocator::FreeIdleChunks() {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include <cstdlib>
#include <vector>
#include "gtest/gtest.h"
DECLARE_bool(free_idle_chunk);
DECLARE_bool(free_when_no_cache_hit);
namespace paddle {
namespace memory {
namespace allocation {
class RecordedAllocator : public Allocator {
protected:
Allocation *AllocateImpl(size_t size) override {
allocated_size_ += size;
return new Allocation(malloc(size), size, platform::CPUPlace());
}
void FreeImpl(Allocation *allocation) {
allocated_size_ -= allocation->size();
free(allocation->ptr());
delete allocation;
}
public:
size_t AllocatedSize() const { return allocated_size_; }
private:
size_t allocated_size_{0};
};
static void TestFreeIdleChunk(bool free_idle_chunk,
bool free_when_no_cache_hit) {
FLAGS_free_idle_chunk = free_idle_chunk;
FLAGS_free_when_no_cache_hit = free_when_no_cache_hit;
auto recorded_allocator = std::make_shared<RecordedAllocator>();
size_t alignment = 4096;
size_t memory_size = 8192;
auto ag_allocator = std::make_shared<AutoGrowthBestFitAllocator>(
recorded_allocator, alignment);
for (size_t i = 0; i < 10; ++i) {
auto allocation = ag_allocator->Allocate(memory_size);
ASSERT_EQ(recorded_allocator->AllocatedSize(), memory_size + alignment);
allocation.reset();
if (free_idle_chunk) {
ASSERT_EQ(recorded_allocator->AllocatedSize(), 0UL);
} else {
ASSERT_EQ(recorded_allocator->AllocatedSize(), memory_size + alignment);
}
}
}
class LimitedResourceAllocator : public Allocator {
public:
explicit LimitedResourceAllocator(size_t capacity) : capacity_(capacity) {}
size_t AllocatedSize() const { return allocated_size_; }
protected:
Allocation *AllocateImpl(size_t size) override {
if (allocated_size_ + size > capacity_) {
throw BadAlloc("", __FILE__, __LINE__);
}
allocated_size_ += size;
return new Allocation(malloc(size), size, platform::CPUPlace());
}
void FreeImpl(Allocation *allocation) {
allocated_size_ -= allocation->size();
free(allocation->ptr());
delete allocation;
}
private:
size_t allocated_size_{0};
const size_t capacity_;
};
static void TestFreeWhenNoCacheHit(bool free_when_no_cache_hit) {
FLAGS_free_idle_chunk = false;
FLAGS_free_when_no_cache_hit = free_when_no_cache_hit;
size_t alignment = 256;
size_t base_memory_size = 4096;
/*
* Suppose that we have 3 memory allocation request, that is:
* - allocate x1, and then free x1
* - allocate x2, and then free x2
* - allocate x3, and then free x3
*
* where:
* - x1 + alignment < x2
* - x2 + alignment < x3
* - x1 + x2 <= memory_capacity < x1 + x2 + x3
*
* In this unittest, we obtain memory_capacity by
* ((x1 + x2) + (x1 + x2 + x3) / 2 = x1 + x2 + x3 / 2.
*
* In this case, when FLAGS_free_when_no_cache_hit is true,
* the cached memory size when each allocation request ends
* would be: x1 + alignment, x2 + alignment, x3 + alignment.
*
* When FLAGS_free_when_no_cache_hit is false, the cached
* memory size when each allocation request ends would be:
* x1 + alignment, x1 + x2 + 2 * alignment, x3 + alignment.
*/
std::vector<size_t> allocate_size = {base_memory_size,
base_memory_size + alignment * 2,
base_memory_size + alignment * 4};
size_t memory_capacity =
allocate_size[0] + allocate_size[1] + allocate_size[2] / 2;
auto underlying_allocator =
std::make_shared<LimitedResourceAllocator>(memory_capacity);
auto ag_allocator = std::make_shared<AutoGrowthBestFitAllocator>(
underlying_allocator, alignment);
ag_allocator->Allocate(allocate_size[0]);
ASSERT_EQ(underlying_allocator->AllocatedSize(),
allocate_size[0] + alignment);
ag_allocator->Allocate(allocate_size[1]);
if (free_when_no_cache_hit) {
ASSERT_EQ(underlying_allocator->AllocatedSize(),
allocate_size[1] + alignment);
} else {
ASSERT_EQ(underlying_allocator->AllocatedSize(),
allocate_size[0] + allocate_size[1] + 2 * alignment);
}
ag_allocator->Allocate(allocate_size[2]);
ASSERT_EQ(underlying_allocator->AllocatedSize(),
allocate_size[2] + alignment);
}
TEST(test_auto_growth_allocator, test_free_idle_chunk) {
for (auto free_idle_chunk : {false, true}) {
for (auto free_when_no_cache_hit : {false, true}) {
TestFreeIdleChunk(free_idle_chunk, free_when_no_cache_hit);
}
}
}
TEST(test_auto_growth_allocator, test_free_when_no_cache_hit) {
TestFreeWhenNoCacheHit(false);
TestFreeWhenNoCacheHit(true);
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -30,6 +30,8 @@ DECLARE_double(eager_delete_tensor_gb);
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_ngraph);
DECLARE_bool(use_system_allocator);
DECLARE_bool(free_idle_chunk);
DECLARE_bool(free_when_no_cache_hit);
namespace paddle {
namespace pybind {
......@@ -167,6 +169,8 @@ static void RegisterGlobalVarGetterSetter() {
REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_use_ngraph);
REGISTER_GLOBAL_VAR_GETTER_SETTER(FLAGS_eager_delete_tensor_gb);
REGISTER_GLOBAL_VAR_GETTER_SETTER(FLAGS_use_system_allocator);
REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_free_idle_chunk);
REGISTER_GLOBAL_VAR_GETTER_ONLY(FLAGS_free_when_no_cache_hit);
}
} // namespace pybind
......
......@@ -171,7 +171,7 @@ def __bootstrap__():
'enable_parallel_graph', 'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size',
'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator',
'enable_unused_var_check'
'enable_unused_var_check', 'free_idle_chunk', 'free_when_no_cache_hit'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册