提交 c20db635 编写于 作者: S sneaxiy

split PR

test=develop
上级 c75a8803
......@@ -202,8 +202,6 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test(tuple_test SRCS tuple_test.cc )
cc_test(inlined_vector_test SRCS inlined_vector_test.cc)
if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class InlinedVector {
static_assert(N > 0, "N must be larger than 0");
public:
inline void push_back(const T& item) {
if (size_ < N) {
head_[size_] = item;
} else {
tail_.emplace_back(item);
}
++size_;
}
inline void pop_back() {
PADDLE_ENFORCE(!empty(), "Try to pop back element from empty vector.");
if (size_ > N) {
tail_.pop_back();
}
--size_;
}
inline const T& back() const {
PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline T& back() {
PADDLE_ENFORCE(!empty(), "Try to get back element of empty vector.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline bool empty() const { return size_ == 0; }
inline size_t size() const { return size_; }
// This API can only be used in unittest
T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}
operator std::vector<T>() const {
std::vector<T> ret;
ret.reserve(size_);
for (size_t i = 0; i < size_; ++i) {
ret.emplace_back((*this)[i]);
}
return ret;
}
private:
T head_[N];
size_t size_{0};
std::vector<T> tail_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <vector>
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
TEST(inlined_stack, inlined_stack) {
size_t max_num = 10;
InlinedVector<size_t, 5> stack;
for (size_t i = 0; i < max_num; ++i) {
ASSERT_EQ(stack.size(), i);
stack.push_back(i);
ASSERT_EQ(stack.size(), i + 1);
}
std::vector<size_t> vec = stack;
ASSERT_EQ(stack.size(), vec.size());
for (size_t i = 0; i < vec.size(); ++i) {
ASSERT_EQ(stack[i], vec[i]);
}
for (size_t i = 0; i < max_num; ++i) {
ASSERT_EQ(stack[i], i);
}
for (size_t i = 0; i < max_num; ++i) {
ASSERT_EQ(stack.back(), max_num - 1 - i);
stack.pop_back();
ASSERT_EQ(stack.size(), max_num - 1 - i);
}
}
} // namespace framework
} // namespace paddle
......@@ -3,18 +3,9 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator gflags)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator)
cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator)
cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator)
if (NOT WIN32)
cc_test(test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator)
endif()
if (WITH_GPU)
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
......@@ -47,7 +38,7 @@ else ()
set(AllocatorFacadeDeps)
endif()
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator multi_bin_buffered_allocator auto_growth_best_fit_allocator legacy_allocator zero_size_allocator)
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
......@@ -59,8 +50,8 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
......@@ -17,7 +17,6 @@
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
......@@ -50,7 +49,9 @@ class Allocator;
class Allocation {
public:
Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), size_(size), place_(place) {}
: ptr_(ptr), size_(size), place_(place) {
decorated_allocators_.reserve(8);
}
Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete;
......@@ -80,8 +81,8 @@ class Allocation {
virtual ~Allocation();
private:
std::vector<Allocator*> DecoratedAllocators() const {
return static_cast<std::vector<Allocator*>>(decorated_allocators_);
const std::vector<Allocator*>& DecoratedAllocators() const {
return decorated_allocators_;
}
inline void RegisterDecoratedAllocator(Allocator* allocator) {
......@@ -98,7 +99,7 @@ class Allocation {
void* ptr_;
size_t size_;
platform::Place place_;
framework::InlinedVector<Allocator*, 8> decorated_allocators_;
std::vector<Allocator*> decorated_allocators_;
friend class Allocator;
friend class AllocationDeleter;
......
......@@ -22,14 +22,12 @@
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
......@@ -47,24 +45,18 @@ DEFINE_int64(
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0");
DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator");
namespace paddle {
namespace memory {
namespace allocation {
static inline std::shared_ptr<Allocator> WrapRetryAndBufferedAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time,
bool enable_buffered) {
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
if (retry_time > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time);
allocator.reset(retry_allocator);
}
if (enable_buffered) {
allocator.reset(new MultiBinBufferedAllocator(allocator));
}
return allocator;
}
......@@ -134,8 +126,7 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
allocator = WrapRetryAndBufferedAllocator(allocator, retry_time_,
FLAGS_enable_buffered_allocator);
allocator = WrapRetryAllocator(allocator, retry_time_);
return std::make_shared<AlignedAllocator<4096>>(std::move(allocator));
}
......@@ -219,13 +210,6 @@ class AllocatorFacadePrivate {
WrapZeroSizeAllocator();
break;
}
case AllocatorStrategy::kAutoGrowthBestFit: {
InitAutoGrowthCPUAllocator();
InitAutoGrowthCUDAAllocator();
InitAutoGrowthCUDAPinnedAllocator();
WrapZeroSizeAllocator();
break;
}
default: {
PADDLE_THROW("Unsupported allocator strategy: %d",
static_cast<int>(strategy));
......@@ -234,39 +218,6 @@ class AllocatorFacadePrivate {
}
private:
void InitAutoGrowthCPUAllocator() {
auto cpu_allocator = std::make_shared<AlignedAllocator<4096>>(
std::make_shared<CPUAllocator>());
allocators_[platform::CPUPlace()] =
std::make_shared<AutoGrowthBestFitAllocator>(
cpu_allocator, platform::CpuMaxChunkSize(), 4096);
}
void InitAutoGrowthCUDAAllocator() {
#ifdef PADDLE_WITH_CUDA
int dev_cnt = platform::GetCUDADeviceCount();
for (int dev_id = 0; dev_id < dev_cnt; ++dev_id) {
auto cuda_allocator = std::make_shared<AlignedAllocator<4096>>(
std::make_shared<CUDAAllocator>(platform::CUDAPlace(dev_id)));
auto allocator = std::make_shared<AutoGrowthBestFitAllocator>(
cuda_allocator, platform::GpuMaxChunkSize(), 4096);
allocators_[platform::CUDAPlace(dev_id)] = WrapRetryAndBufferedAllocator(
allocator, FLAGS_gpu_allocator_retry_time, false);
}
#endif
}
void InitAutoGrowthCUDAPinnedAllocator() {
#ifdef PADDLE_WITH_CUDA
auto cuda_pinned_allocator = std::make_shared<AlignedAllocator<4096>>(
std::make_shared<CPUPinnedAllocator>());
allocators_[platform::CUDAPinnedPlace()] =
std::make_shared<AutoGrowthBestFitAllocator>(
cuda_pinned_allocator, platform::CUDAPinnedMaxChunkSize(), 4096);
#endif
}
void InitLegacyAllocator() {
std::vector<platform::Place> places{platform::CPUPlace()};
#ifdef PADDLE_WITH_CUDA
......
......@@ -20,8 +20,7 @@ DEFINE_string(
allocator_strategy, "legacy",
"The allocation strategy. Legacy means the original allocator of Fluid."
"naive_best_fit means the experimental best fit allocator. "
"auto_growth_best_fit means the experimental auto growth best fit "
"allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit].");
"allocator. Enum in [legacy, naive_best_fit].");
namespace paddle {
namespace memory {
......@@ -32,8 +31,6 @@ static AllocatorStrategy GetStrategyFromFlag() {
return AllocatorStrategy::kLegacy;
} else if (FLAGS_allocator_strategy == "naive_best_fit") {
return AllocatorStrategy::kNaiveBestFit;
} else if (FLAGS_allocator_strategy == "auto_growth_best_fit") {
return AllocatorStrategy::kAutoGrowthBestFit;
} else {
PADDLE_THROW("Unsupported allocator strategy: %s",
FLAGS_allocator_strategy);
......
......@@ -18,7 +18,7 @@ namespace paddle {
namespace memory {
namespace allocation {
enum class AllocatorStrategy { kLegacy, kNaiveBestFit, kAutoGrowthBestFit };
enum class AllocatorStrategy { kLegacy, kNaiveBestFit };
extern AllocatorStrategy GetAllocatorStrategy();
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include <algorithm>
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <unordered_map>
namespace paddle {
namespace memory {
namespace allocation {
static size_t align(size_t size, size_t alignment) {
auto remaining = size % alignment;
return remaining == 0 ? size : size + alignment - remaining;
}
AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
const std::shared_ptr<Allocator> &underlying_allocator, size_t chunk_size,
size_t alignment)
: underlying_allocator_(underlying_allocator),
chunk_size_(align(chunk_size, alignment)),
alignment_(alignment) {}
Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size, Attr attr) {
size = align(size, alignment_);
std::lock_guard<std::mutex> guard(mtx_);
auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
BlockIt block_it;
if (iter != free_blocks_.end()) {
VLOG(2) << "Found " << iter->second->size_ << " for " << size;
block_it = iter->second;
free_blocks_.erase(iter);
auto *chunk = block_it->chunk_;
size_t remaining_size = block_it->size_ - size;
if (remaining_size == 0) {
block_it->is_free_ = false;
VLOG(2) << "Found and no remaining";
} else {
auto remaining_free_block = chunk->blocks_.insert(
block_it, Chunk::Block(block_it->ptr_, remaining_size, true, chunk));
free_blocks_.emplace(std::make_pair(remaining_size, block_it->ptr_),
remaining_free_block);
block_it->ptr_ =
reinterpret_cast<uint8_t *>(block_it->ptr_) + remaining_size;
block_it->size_ = size;
block_it->is_free_ = false;
VLOG(2) << "Found and remaining " << remaining_size;
}
} else {
size_t alloc_size = size;
if (!underlying_allocator_exhaustive_ && chunk_size_ > size) {
alloc_size = chunk_size_;
}
try {
chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr));
} catch (BadAlloc &ex) {
if (size == alloc_size) throw ex;
underlying_allocator_exhaustive_ = true;
alloc_size = size;
chunks_.emplace_back(underlying_allocator_->Allocate(alloc_size, attr));
}
auto *chunk = &(*chunks_.rbegin());
uint8_t *p = reinterpret_cast<uint8_t *>(chunk->allocation_->ptr());
auto &blocks = chunk->blocks_;
size_t remaining_size = alloc_size - size;
if (remaining_size > 0) {
blocks.emplace_back(p, remaining_size, true, chunk);
free_blocks_.emplace(std::make_pair(remaining_size, p), --(blocks.end()));
}
blocks.emplace_back(p + remaining_size, size, false, chunk);
block_it = --(blocks.end());
VLOG(2) << "Not found and allocate " << alloc_size << ", and remaining "
<< remaining_size;
}
VLOG(2) << "After allocate, free blocks " << free_blocks_.size();
return new Chunk::BlockAllocation(block_it);
}
void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
auto &block_it = static_cast<Chunk::BlockAllocation *>(allocation)->block_it_;
auto &blocks = block_it->chunk_->blocks_;
std::lock_guard<std::mutex> guard(mtx_);
block_it->is_free_ = true;
if (block_it != blocks.begin()) {
auto prev_it = block_it;
--prev_it;
if (prev_it->is_free_) {
free_blocks_.erase(std::make_pair(prev_it->size_, prev_it->ptr_));
prev_it->size_ += block_it->size_;
blocks.erase(block_it);
block_it = prev_it;
}
}
auto next_it = block_it;
++next_it;
if (next_it != blocks.end() && next_it->is_free_) {
free_blocks_.erase(std::make_pair(next_it->size_, next_it->ptr_));
block_it->size_ += next_it->size_;
blocks.erase(next_it);
}
free_blocks_.emplace(std::make_pair(block_it->size_, block_it->ptr_),
block_it);
VLOG(2) << "Combine " << block_it->size_ << ", " << blocks.size() << ", "
<< free_blocks_.size();
delete allocation;
}
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class AutoGrowthBestFitAllocator : public Allocator {
public:
explicit AutoGrowthBestFitAllocator(
const std::shared_ptr<Allocator> &underlying_allocator, size_t chunk_size,
size_t alignment);
bool IsAllocThreadSafe() const override { return true; }
using AllocationList = std::list<AllocationPtr>;
using AllocationListIt = AllocationList::iterator;
struct Chunk {
struct Block {
Block(void *ptr, size_t size, bool is_free, Chunk *chunk)
: ptr_(ptr), size_(size), is_free_(is_free), chunk_(chunk) {}
void *ptr_;
size_t size_;
bool is_free_;
Chunk *chunk_; // which chunk it is from
};
explicit Chunk(AllocationPtr allocation)
: allocation_(std::move(allocation)) {}
AllocationPtr allocation_;
std::list<Block> blocks_;
// std::mutex mtx_;
struct BlockAllocation : public Allocation {
explicit BlockAllocation(const std::list<Block>::iterator &it)
: Allocation(it->ptr_, it->size_, it->chunk_->allocation_->place()),
block_it_(it) {}
std::list<Block>::iterator block_it_;
};
};
protected:
Allocation *AllocateImpl(size_t size, Attr attr) override;
void FreeImpl(Allocation *allocation) override;
private:
using BlockIt = std::list<Chunk::Block>::iterator;
std::shared_ptr<Allocator> underlying_allocator_;
std::list<Chunk> chunks_;
std::map<std::pair<size_t, void *>, BlockIt> free_blocks_;
size_t chunk_size_;
size_t alignment_;
bool underlying_allocator_exhaustive_{false};
mutable std::mutex mtx_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_string(allocator_strategy);
namespace paddle {
namespace memory {
namespace allocation {
static inline size_t AlignTo(size_t size, size_t alignment = 4096) {
auto remaining = size % alignment;
return remaining == 0 ? size : size + alignment - remaining;
}
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
FLAGS_allocator_strategy = "auto_growth_best_fit";
auto &instance = AllocatorFacade::Instance();
size_t size = 1024;
platform::Place place;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), AlignTo(size));
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), AlignTo(size));
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), AlignTo(size));
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), AlignTo(size));
}
#endif
}
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <condition_variable> // NOLINT
#include <mutex> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <iostream>
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, auto_growth_best_fit_allocator) {
auto cpu_allocator = std::make_shared<CPUAllocator>();
auto allocator =
std::make_shared<AutoGrowthBestFitAllocator>(cpu_allocator, 0, 4096);
std::mutex mtx;
std::condition_variable cv;
bool flag = false;
auto thread_main = [&] {
{
std::unique_lock<std::mutex> lock(mtx);
cv.wait(lock, [&] { return flag; });
}
for (size_t i = 10; i > 0; --i) {
allocator->Allocate((i + 1) * 1000);
}
};
std::vector<std::thread> ths;
for (size_t i = 10; i < 10; ++i) {
ths.emplace_back(thread_main);
}
{
std::lock_guard<std::mutex> lock(mtx);
flag = true;
}
cv.notify_all();
for (auto &th : ths) {
th.join();
}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -35,9 +35,9 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *p;
#ifdef _WIN32
p = _aligned_malloc(size, 4096);
p = _aligned_malloc(size, kAlignment);
#else
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096, size), 0, "Alloc %ld error!",
PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!",
size);
#endif
return new Allocation(p, size, platform::CPUPlace());
......
......@@ -33,7 +33,7 @@ namespace allocation {
// an open-sourced allocator into Paddle.
class CPUAllocator : public Allocator {
public:
constexpr static size_t kAlignment = 64u;
constexpr static size_t kAlignment = 4096UL;
bool IsAllocThreadSafe() const override;
protected:
......
......@@ -148,12 +148,18 @@ class GPUBuddyAllocatorList {
std::unique_ptr<detail::SystemAllocator>(
new detail::GPUAllocator(dev_id)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable "
<< "'FLAGS_fraction_of_gpu_memory_to_use' "
<< "or 'FLAGS_initial_gpu_memory_in_mb' "
<< "or 'FLAGS_reallocate_gpu_memory_in_mb' "
<< "to change the memory size for GPU usage.\n"
<< "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
<< FLAGS_fraction_of_gpu_memory_to_use
<< ". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
<< FLAGS_initial_gpu_memory_in_mb
<< ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<< FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
});
return allocators_[dev_id];
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <algorithm>
#include <cctype>
#include <fstream>
#include <limits>
#include <mutex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double(
buffered_allocator_excess_times, 2,
"Excess memory size times of buffered_allocator. BufferedAllocator"
" would try to reuse memory freed previously, but the size of freed"
" allocation may not be exactly the same as the requested. Here, we"
" use a flag to control the excess times of reused memory size. "
"Not quite sure what is the best excess times value.");
DEFINE_string(
buffered_allocator_division_plan_path, "",
"The file path which "
"determines the memory size division plans of BufferedAllocator."
"If it is empty, use the default division plan. The file must be a "
"text file which each lines indicates the bound of division plan. "
"For example, if the text file has 3 lines, which are '500M', '1G', "
" '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) "
"and [2G, +inf). Allocation request whose requested memory size is "
"inside the last interval of division plan would be dispatched to "
" underlying_allocator directly without caching when freed.");
namespace paddle {
namespace memory {
namespace allocation {
static std::string TrimStringAndToUpperCase(const std::string &str) {
auto not_space = [](char ch) { return std::isspace(ch) == 0; };
auto first_idx = static_cast<size_t>(
std::find_if(str.begin(), str.end(), not_space) - str.begin());
auto last_idx = static_cast<size_t>(
std::find_if(str.rbegin(), str.rend(), not_space) - str.rbegin());
if (first_idx == str.size() || last_idx == str.size()) return "";
last_idx = str.size() - last_idx;
auto ret = str.substr(first_idx, last_idx - first_idx);
std::for_each(ret.begin(), ret.end(),
[](char &ch) { ch = std::toupper(ch); });
return ret;
}
namespace {
enum DivisionPlanFileStatus { kEOF, kException, kNormal };
} // NOLINT
static size_t ParseStringToBytes(const std::string &original_str,
DivisionPlanFileStatus *ret_code) {
std::string str = TrimStringAndToUpperCase(original_str);
if (str.empty()) {
*ret_code = kEOF;
return 0;
}
if (str.back() == 'B') {
str.pop_back();
if (str.empty()) {
*ret_code = kException;
return 0;
}
}
size_t multiples = 1;
switch (str.back()) {
case 'G':
multiples *= (static_cast<size_t>(1) << 30);
break;
case 'M':
multiples *= (static_cast<size_t>(1) << 20);
break;
case 'K':
multiples *= (static_cast<size_t>(1) << 10);
break;
default:
break;
}
if (multiples != 1) {
str.pop_back();
if (str.empty()) {
*ret_code = kException;
return 0;
}
}
str = TrimStringAndToUpperCase(str);
double mem_val = -1.0;
std::stringstream ss(str);
if (!(ss >> mem_val) || mem_val < 0) {
*ret_code = kException;
return 0;
}
*ret_code = kNormal;
return static_cast<size_t>(mem_val * multiples);
}
static std::string GetDebugStringOfPlan(const std::vector<size_t> &plan) {
std::string ret("[");
for (auto sz : plan) {
ret += string::HumanReadableSize(sz);
ret += ", ";
}
return ret + "]";
}
std::vector<size_t> ReadBufferedAllocatorDivisionPlanFromFile(
const std::string &filepath) {
std::ifstream is(filepath.c_str());
PADDLE_ENFORCE(is.good(), "File %s not exist", filepath);
std::string str;
std::vector<size_t> plan;
size_t line_num = 1;
while (std::getline(is, str).good()) {
DivisionPlanFileStatus status;
size_t ret = ParseStringToBytes(str, &status);
if (status == kEOF) {
break;
}
if (status == kException) {
PADDLE_THROW(
"Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, "
"GB.",
line_num, filepath, str);
}
plan.push_back(ret);
++line_num;
}
return plan;
}
static void CheckAndModifyMemoryDivisionPlan(
std::vector<size_t> *division_plan) {
// Check whether the division plan is strictly sorted
bool is_strictly_sorted = true;
for (size_t i = 1; i < division_plan->size(); ++i) {
if ((*division_plan)[i - 1] >= (*division_plan)[i]) {
is_strictly_sorted = false;
break;
}
}
PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted");
// Insert 0 to disivion plan for clean binary searching code
if (division_plan->empty() || division_plan->front() != 0) {
division_plan->insert(division_plan->begin(), 0);
}
// Remove MAX from disivion plan for clean binary searching code
constexpr auto kSizeTypeMax = std::numeric_limits<size_t>::max();
if (division_plan->back() == kSizeTypeMax) {
division_plan->pop_back();
}
PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty");
}
static std::vector<size_t> GetDefaultDivisionPlan() {
if (!FLAGS_buffered_allocator_division_plan_path.empty()) {
return ReadBufferedAllocatorDivisionPlanFromFile(
FLAGS_buffered_allocator_division_plan_path);
}
// Default division plan is 4K, 8K, 16K, ..., 500M, 1G
constexpr size_t kMaxLogSize = 30;
std::vector<size_t> plan;
for (size_t i = 12; i <= kMaxLogSize; ++i) {
plan.push_back(static_cast<size_t>(1) << i);
}
return plan;
}
inline static size_t FindDivisionPlanBinIndex(const std::vector<size_t> &bins,
size_t size) {
return static_cast<size_t>(std::upper_bound(bins.begin(), bins.end(), size) -
bins.begin() - 1);
}
inline static size_t TolerantUpperSize(size_t size) {
return static_cast<size_t>(size * FLAGS_buffered_allocator_excess_times);
}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator)
: MultiBinBufferedAllocator(std::move(underlying_allocator),
GetDefaultDivisionPlan()) {}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t> &division_plan)
: underlying_allocator_(std::move(underlying_allocator)),
division_plan_(division_plan) {
CheckAndModifyMemoryDivisionPlan(&division_plan_);
allocations_.resize(division_plan_.size() - 1);
accumulated_cache_size_.assign(division_plan_.size() - 1, 0UL);
mtx_.resize(division_plan_.size() - 1);
if (underlying_allocator_->IsAllocThreadSafe()) {
for (auto &mtx : mtx_) {
mtx.reset(new std::mutex());
}
}
VLOG(1) << "Division plan is: " << GetDebugStringOfPlan(division_plan_);
VLOG(1) << "FLAGS_buffered_allocator_excess_times = "
<< FLAGS_buffered_allocator_excess_times;
}
void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size());
if (bin_index < allocations_.size()) {
platform::LockGuardPtr<std::mutex> guard(mtx_[bin_index]);
allocations_[bin_index].emplace(allocation->size(),
AllocationPtr(allocation));
accumulated_cache_size_[bin_index] += allocation->size();
} else {
underlying_allocator_->Free(allocation);
}
}
// Maybe we can design more flexible FreeCache strategy based on bin_index
// and require size.
size_t MultiBinBufferedAllocator::ClearCache() {
size_t accumulated_size = 0;
// FIXME(zjl): free the largest first when there is no extra
for (size_t i = allocations_.size() - 1; i != static_cast<size_t>(-1); --i) {
platform::LockGuardPtr<std::mutex> lock(mtx_[i]);
allocations_[i].clear();
accumulated_size += accumulated_cache_size_[i];
accumulated_cache_size_[i] = 0;
}
return accumulated_size;
}
Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, size);
auto upper_size = TolerantUpperSize(size);
for (; bin_index < allocations_.size() &&
upper_size >= division_plan_[bin_index];
++bin_index) {
auto &allocation = allocations_[bin_index];
platform::LockGuardPtr<std::mutex> lock(mtx_[bin_index]);
auto it = allocation.lower_bound(size);
if (it != allocation.end() && it->second->size() <= upper_size) {
size_t sz = it->second->size();
auto ret = std::move(it->second);
allocation.erase(it);
accumulated_cache_size_[bin_index] -= sz;
VLOG(3) << "Allocate " << sz << "(required " << size
<< ") from cache directly";
return ret.release();
}
}
size_t retry_time = 1;
while (true) {
try {
auto ret = underlying_allocator_->Allocate(size, attr).release();
VLOG(2) << "Allocate " << size << " from underlying directly";
return ret;
} catch (BadAlloc &) {
size_t actual_free_size = ClearCache();
VLOG(1) << retry_time << "-th free " << actual_free_size
<< " bytes caches";
if (actual_free_size == 0) throw;
}
++retry_time;
}
}
void UseMultiBinBufferedAllocatorGFlags() {}
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
std::vector<size_t> ReadBufferedAllocatorDivisionPlanFromFile(
const std::string& filepath);
class MultiBinBufferedAllocator : public Allocator {
public:
explicit MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator);
MultiBinBufferedAllocator(std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t>& division_plan);
bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; }
size_t ClearCache();
const std::vector<size_t>& DivisionPlan() const { return division_plan_; }
protected:
Allocation* AllocateImpl(size_t size, Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
std::vector<std::multimap<size_t, AllocationPtr>> allocations_;
std::vector<size_t> accumulated_cache_size_;
std::vector<size_t> division_plan_;
std::vector<std::unique_ptr<std::mutex>> mtx_;
};
extern void UseMultiBinBufferedAllocatorGFlags();
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
inline std::shared_ptr<MultiBinBufferedAllocator> GetBufferedAllocator(
Allocation *allocation, bool thread_safe) {
std::shared_ptr<Allocator> allocator(new BestFitAllocator(allocation));
if (thread_safe) {
allocator.reset(new LockedAllocator(std::move(allocator)));
}
return std::make_shared<MultiBinBufferedAllocator>(allocator);
}
TEST(buffered_allocator, thread_safety) {
std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
}
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
}
}
class StubAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class StubAllocator : public Allocator {
public:
void ResetCounter() {
construct_count_ = 0;
destruct_count_ = 0;
}
size_t GetAllocCount() const { return construct_count_; }
size_t GetFreeCount() const { return destruct_count_; }
protected:
void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
++destruct_count_;
delete allocation;
}
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
++construct_count_;
if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace());
} else {
return new StubAllocation(new uint8_t[size], size, platform::CPUPlace());
}
}
private:
size_t construct_count_ = 0;
size_t destruct_count_ = 0;
};
constexpr size_t kZero = 0;
constexpr size_t kOne = 1;
constexpr size_t kTwo = 2;
TEST(buffered_allocator, lazy_free) {
std::vector<int> original_alloc_size({1022, 1023, 1024, 1025, 1026});
for (auto alloc_size : original_alloc_size) {
auto stub_allocator = std::make_shared<StubAllocator>();
auto *underlying_allocator = stub_allocator.get();
auto allocator =
std::make_shared<MultiBinBufferedAllocator>(stub_allocator);
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(alloc_size, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(900, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
auto y = allocator->Allocate(2048, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
y = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
size_t cache_size = allocator->ClearCache();
ASSERT_EQ(cache_size, static_cast<size_t>(alloc_size + 2048));
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo);
}
{
underlying_allocator->ResetCounter();
auto p = allocator->Allocate(allocator->DivisionPlan().back(),
allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
ASSERT_EQ(underlying_allocator->GetFreeCount(), kOne);
{
underlying_allocator->ResetCounter();
auto p = allocator->Allocate(allocator->DivisionPlan().back() - 1,
allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
}
TEST(buffered_allocator, garbage_collection) {
std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
auto allocator = GetBufferedAllocator(chunk.get(), false);
auto x1 = allocator->Allocate(1600, allocator->kDefault);
auto x2 = allocator->Allocate(400, allocator->kDefault);
x1 = nullptr;
x2 = nullptr;
auto x3 = allocator->Allocate(1600, allocator->kDefault);
ASSERT_NE(x3, nullptr);
ASSERT_NE(x3->ptr(), nullptr);
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -22,8 +22,6 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_bool(enable_buffered_allocator);
DECLARE_string(allocator_strategy);
namespace paddle {
......@@ -38,7 +36,6 @@ TEST(allocator, allocator) {
#endif
FLAGS_allocator_strategy = "naive_best_fit";
FLAGS_enable_buffered_allocator = true;
auto &instance = AllocatorFacade::Instance();
platform::Place place;
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
DECLARE_string(buffered_allocator_division_plan_path);
namespace paddle {
namespace memory {
namespace allocation {
TEST(buffered_allocator, division_plan) {
std::string path = "/tmp/buffered_allocator_divison_plan";
FLAGS_buffered_allocator_division_plan_path = path;
{
std::vector<std::string> plan(
{"100b", "300.7K", "500.3m", "1.02gB", "2g", "4G"});
std::ofstream os(path);
for (auto &p : plan) {
os << p << std::endl;
}
os.close();
}
auto plan = ReadBufferedAllocatorDivisionPlanFromFile(
FLAGS_buffered_allocator_division_plan_path);
ASSERT_EQ(plan.size(), 6UL);
ASSERT_EQ(plan[0], 100UL);
ASSERT_EQ(plan[1], static_cast<size_t>(300.7 * 1024));
ASSERT_EQ(plan[2], static_cast<size_t>(500.3 * 1024 * 1024));
ASSERT_EQ(plan[3], static_cast<size_t>(1.02 * 1024 * 1024 * 1024));
ASSERT_EQ(plan[4], static_cast<size_t>(2.0 * 1024 * 1024 * 1024));
ASSERT_EQ(plan[5], static_cast<size_t>(4.0 * 1024 * 1024 * 1024));
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -39,7 +39,6 @@ limitations under the License. */
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
......@@ -135,8 +134,6 @@ PYBIND11_MODULE(core, m) {
paddle::memory::allocation::UseAllocatorStrategyGFlag();
paddle::memory::allocation::UseMultiBinBufferedAllocatorGFlags();
m.doc() = "C++ core of PaddlePaddle";
// using framework in this function. Since it is inside a function, it will
......
......@@ -129,9 +129,7 @@ def __bootstrap__():
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion',
'allocator_strategy', 'enable_buffered_allocator',
'buffered_allocator_excess_times',
'buffered_allocator_division_plan_path', 'reader_queue_speed_test_mode',
'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
'inner_op_parallelism', 'enable_parallel_graph',
'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册