提交 2a639d5c 编写于 作者: S sneaxiy

add allocator chain to fix bug

test=develop
上级 6d8771b5
......@@ -404,9 +404,6 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T));
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <array>
#include <deque>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class SmallStack {
static_assert(N > 0, "N must be larger than 0");
public:
inline void push(const T& item) {
if (size_ < N) {
head_[size_] = item;
} else {
tail_.emplace_back(item);
}
++size_;
}
inline void pop() {
PADDLE_ENFORCE(!empty(), "Try to pop element from empty stack.");
if (size_ > N) {
tail_.pop_back();
}
--size_;
}
inline const T& top() const {
PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline T& top() {
PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline bool empty() const { return size_ == 0; }
inline size_t size() const { return size_; }
// This API can only be used in unittest
T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}
private:
T head_[N];
std::deque<T> tail_;
size_t size_;
};
} // namespace framework
} // namespace paddle
......@@ -3,8 +3,11 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator)
if (WITH_GPU)
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
......@@ -53,6 +56,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
conditional_allocator
retry_allocator
buffered_allocator
multi_bin_buffered_allocator
allocator_strategy
legacy_allocator
)
......
......@@ -93,6 +93,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
}
void FreeImpl(Allocation* allocation) override { delete allocation; }
};
} // namespace allocation
......
......@@ -26,17 +26,28 @@ Allocator::~Allocator() {}
bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
VLOG(2) << "Alloc allocation on " << typeid(*this).name();
auto ptr = AllocateImpl(size, attr);
ptr->set_allocator(this);
ptr->RegisterAllocatorChain(this);
VLOG(2) << "Alloc success";
return AllocationPtr(ptr);
}
void Allocator::Free(Allocation* allocation) { delete allocation; }
void Allocator::FreeImpl(Allocation* allocation) {
auto* allocator = allocation->TopAllocator();
allocator->Free(allocation);
}
void Allocator::Free(Allocation* allocation) {
VLOG(2) << "Free allocation on " << typeid(*this).name();
allocation->PopAllocator();
FreeImpl(allocation);
}
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const {
auto* allocator = allocation->allocator();
auto* allocator = allocation->TopAllocator();
allocator->Free(allocation);
}
......
......@@ -15,6 +15,8 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/small_stack.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
......@@ -47,10 +49,12 @@ class Allocator;
class Allocation {
public:
Allocation(void* ptr, size_t size, platform::Place place)
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {}
: ptr_(ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
......@@ -72,17 +76,34 @@ class Allocation {
const platform::Place& place() const { return place_; }
Allocator* allocator() { return allocator_; }
virtual ~Allocation();
void set_allocator(Allocator* allocator) { allocator_ = allocator; }
// This function should only be used in unittest
std::vector<Allocator*> GetAllocatorChain() const {
std::vector<Allocator*> allocators;
for (size_t i = 0; i < allocator_chain_.size(); ++i) {
allocators[i] = allocator_chain_[i];
}
return allocators;
}
virtual ~Allocation();
private:
inline void RegisterAllocatorChain(Allocator* allocator) {
allocator_chain_.push(allocator);
}
inline void PopAllocator() { allocator_chain_.pop(); }
inline Allocator* TopAllocator() { return allocator_chain_.top(); }
private:
Allocator* allocator_;
void* ptr_;
size_t size_;
platform::Place place_;
framework::SmallStack<Allocator*, 8> allocator_chain_;
friend class Allocator;
friend class AllocationDeleter;
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
......@@ -132,9 +153,12 @@ class Allocator {
// True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const;
// This function should not be called outside
void Free(Allocation* allocation);
protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
virtual void FreeImpl(Allocation* allocation);
private:
friend class AllocationDeleter;
......
......@@ -27,6 +27,7 @@
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
......@@ -43,6 +44,8 @@ DEFINE_int64(
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0");
DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator");
namespace paddle {
namespace memory {
namespace allocation {
......@@ -110,8 +113,8 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
if (retry_time_ > 0) {
auto* retry_allocator =
......@@ -119,6 +122,10 @@ class ChunkedAllocator : public Allocator {
allocator.reset(retry_allocator);
}
if (FLAGS_enable_buffered_allocator) {
allocator.reset(new MultiBinBufferedAllocator(allocator));
}
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
}
......
......@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return num;
}
void BestFitAllocator::Free(Allocation* allocation) {
void BestFitAllocator::FreeImpl(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation.");
......
......@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it);
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
......
......@@ -22,11 +22,11 @@ namespace paddle {
namespace memory {
namespace allocation {
BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator)
BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator)
: underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_,
"Underlying allocator of BufferedAllocator must be unmanaged");
"Underlying allocator of BufferedAllocator must not be null");
if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex());
}
......@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end();
cur += it->second->size();
delete it->second.release();
underlying_allocator_->Free(it->second.release());
allocations_.erase(it);
if (cur >= size) return;
}
}
bool BufferedAllocator::IsAllocThreadSafe() const {
return this->underlying_allocator_->IsAllocThreadSafe();
}
void BufferedAllocator::Free(Allocation *allocation) {
bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
void BufferedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation));
}
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{
platform::LockGuardPtr<std::mutex> guard(mtx_);
......@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second));
allocations_.erase(it);
return new AllocationWithUnderlying(std::move(result));
return result.release();
}
}
try {
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
} catch (BadAlloc &) {
FreeCache(size);
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
}
}
......
......@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_
class BufferedAllocator : public Allocator {
public:
explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator);
explicit BufferedAllocator(std::shared_ptr<Allocator> allocator);
~BufferedAllocator();
......@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size);
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_;
};
......
......@@ -64,7 +64,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; }
protected:
void Free(Allocation *allocation) override {
void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
......@@ -25,7 +25,7 @@ CPUAllocation::CPUAllocation(void *ptr, size_t size)
bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::Free(Allocation *allocation) {
void CPUAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation));
free(allocation->ptr());
delete allocation;
......
......@@ -43,7 +43,7 @@ class CPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
};
} // namespace allocation
......
......@@ -23,13 +23,14 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::Free(Allocation* allocation) {
void CUDAAllocator::FreeImpl(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device);
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr()));
VLOG(2) << "cudaFree is called";
delete allocation;
}
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
......
......@@ -35,7 +35,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
......
......@@ -336,7 +336,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return new Allocation(ptr, size, place_);
}
void LegacyAllocator::Free(Allocation *allocation) {
void LegacyAllocator::FreeImpl(Allocation *allocation) {
boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place());
......
......@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
private:
platform::Place place_;
......
......@@ -23,26 +23,24 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator)
std::shared_ptr<Allocator> underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex());
}
}
void LockedAllocator::Free(Allocation *allocation) {
{
platform::LockGuardPtr<std::mutex> guard(mtx_);
reinterpret_cast<AllocationWithUnderlying *>(allocation)
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
void LockedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
underlying_allocator_->Free(allocation);
}
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator {
public:
explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator);
explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_;
};
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <algorithm>
#include <limits>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double(tolerant_times, 2,
"Tolerant memory size times of buffered_allocator");
namespace paddle {
namespace memory {
namespace allocation {
static void CheckAndModifyMemoryDivisionPlan(
std::vector<size_t> *division_plan) {
// Check whether the division plan is strictly sorted
bool is_strictly_sorted = true;
for (size_t i = 1; i < division_plan->size(); ++i) {
if ((*division_plan)[i - 1] >= (*division_plan)[i]) {
is_strictly_sorted = false;
break;
}
}
PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted");
// Insert 0 and remove MAX to disivion plan for clean binary searching code
if (division_plan->empty() || division_plan->front() != 0) {
division_plan->insert(division_plan->begin(), 0);
}
constexpr auto kSizeTypeMax = std::numeric_limits<size_t>::max();
if (division_plan->back() == kSizeTypeMax) {
division_plan->pop_back();
}
PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty");
}
static std::vector<size_t> GetDefaultDivisionPlan() {
std::vector<size_t> plan;
for (size_t i = 0; i < sizeof(size_t) * 8; ++i) {
plan.push_back(static_cast<size_t>(1) << i);
}
return plan;
}
inline static size_t FindDivisionPlanBinIndex(const std::vector<size_t> &bins,
size_t size) {
return static_cast<size_t>(std::upper_bound(bins.begin(), bins.end(), size) -
bins.begin() - 1);
}
inline static size_t TolerantUpperSize(size_t size) {
return static_cast<size_t>(size * FLAGS_tolerant_times);
}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator)
: MultiBinBufferedAllocator(std::move(underlying_allocator),
GetDefaultDivisionPlan()) {}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t> &division_plan)
: underlying_allocator_(std::move(underlying_allocator)),
division_plan_(division_plan) {
CheckAndModifyMemoryDivisionPlan(&division_plan_);
allocations_.resize(division_plan_.size());
mtx_.resize(division_plan_.size());
if (underlying_allocator_->IsAllocThreadSafe()) {
for (auto &mtx : mtx_) {
mtx.reset(new std::mutex());
}
}
VLOG(1) << "FLAGS_tolerant_times = " << FLAGS_tolerant_times;
}
void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size());
{
platform::LockGuardPtr<std::mutex> guard(mtx_[bin_index]);
allocations_[bin_index].emplace(allocation->size(),
AllocationPtr(allocation));
}
}
void MultiBinBufferedAllocator::FreeCache(size_t size, size_t bin_index) {
size_t accumulated_size = 0;
// FIXME(zjl): free the largest first when there is no extra
for (size_t i = allocations_.size() - 1; i != static_cast<size_t>(-1); --i) {
platform::LockGuardPtr<std::mutex> lock(mtx_[i]);
if (allocations_[i].empty()) continue;
auto it = --allocations_[i].end();
do {
accumulated_size += it->second->size();
underlying_allocator_->Free(it->second.release());
allocations_[i].erase(it--);
if (accumulated_size >= size) {
return;
}
} while (!allocations_[i].empty());
}
}
Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, size);
auto upper_size = TolerantUpperSize(size);
for (; upper_size >= division_plan_[bin_index]; ++bin_index) {
auto &allocation = allocations_[bin_index];
platform::LockGuardPtr<std::mutex> lock(mtx_[bin_index]);
auto it = allocation.lower_bound(size);
if (it != allocation.end() && it->second->size() < upper_size) {
auto ret = std::move(it->second);
allocation.erase(it);
return ret.release();
}
}
try {
return underlying_allocator_->Allocate(size, attr).release();
} catch (BadAlloc &) {
VLOG(2) << "BadAlloc raises, try to free " << size << " caches";
FreeCache(size, bin_index);
return underlying_allocator_->Allocate(size, attr).release();
}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class MultiBinBufferedAllocator : public Allocator {
public:
explicit MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator);
MultiBinBufferedAllocator(std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t>& division_plan);
bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; }
void ClearCache() { FreeCache(static_cast<size_t>(-1), 0); }
protected:
Allocation* AllocateImpl(size_t size, Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private:
void FreeCache(size_t size, size_t bin_index);
std::shared_ptr<Allocator> underlying_allocator_;
std::vector<std::multimap<size_t, AllocationPtr>> allocations_;
std::vector<size_t> division_plan_;
std::vector<std::unique_ptr<std::mutex>> mtx_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
inline std::shared_ptr<MultiBinBufferedAllocator> GetBufferedAllocator(
Allocation *allocation, bool thread_safe) {
std::shared_ptr<Allocator> allocator(new BestFitAllocator(allocation));
if (thread_safe) {
allocator.reset(new LockedAllocator(std::move(allocator)));
}
return std::make_shared<MultiBinBufferedAllocator>(allocator);
}
TEST(buffered_allocator, thread_safety) {
std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
}
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
}
}
class StubAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class StubAllocator : public Allocator {
public:
void ResetCounter() {
construct_count_ = 0;
destruct_count_ = 0;
}
size_t GetAllocCount() const { return construct_count_; }
size_t GetFreeCount() const { return destruct_count_; }
protected:
void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
++destruct_count_;
delete allocation;
}
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
++construct_count_;
if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace());
} else {
return new StubAllocation(new uint8_t[size], size, platform::CPUPlace());
}
}
private:
size_t construct_count_ = 0;
size_t destruct_count_ = 0;
};
constexpr size_t kZero = 0;
constexpr size_t kOne = 1;
constexpr size_t kTwo = 2;
TEST(buffered_allocator, lazy_free) {
std::vector<int> original_alloc_size({1022, 1023, 1024, 1025, 1026});
for (auto alloc_size : original_alloc_size) {
auto stub_allocator = std::make_shared<StubAllocator>();
auto *underlying_allocator = stub_allocator.get();
auto allocator =
std::make_shared<MultiBinBufferedAllocator>(stub_allocator);
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(alloc_size, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(900, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
auto y = allocator->Allocate(2048, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
y = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
allocator->ClearCache();
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo);
}
}
}
TEST(buffered_allocator, garbage_collection) {
std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
auto allocator = GetBufferedAllocator(chunk.get(), false);
auto x1 = allocator->Allocate(1600, allocator->kDefault);
auto x2 = allocator->Allocate(400, allocator->kDefault);
x1 = nullptr;
x2 = nullptr;
auto x3 = allocator->Allocate(1600, allocator->kDefault);
ASSERT_NE(x3, nullptr);
ASSERT_NE(x3->ptr(), nullptr);
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -20,7 +20,7 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::Free(Allocation *allocation) {
void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation;
......
......@@ -31,7 +31,7 @@ class CPUPinnedAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
};
......
......@@ -18,25 +18,15 @@ namespace paddle {
namespace memory {
namespace allocation {
bool RetryAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
void RetryAllocator::FreeImpl(Allocation* allocation) {
// Delete underlying allocation first.
reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset();
{
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
underlying_allocator_->Free(allocation);
cv_.notify_all();
}
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() {
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
};
// In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time
......
......@@ -24,32 +24,25 @@ namespace paddle {
namespace memory {
namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator {
public:
RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms)
RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_.get(),
"UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator");
underlying_allocator_,
"UnderlyingAllocator of RetryAllocator must not be null");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe");
}
bool IsAllocThreadSafe() const override { return true; }
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_;
std::mutex mutex_;
std::condition_variable cv_;
......@@ -57,8 +50,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
};
} // namespace allocation
......
......@@ -22,6 +22,14 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (dynamic_cast<ZeroSizeAllocation *>(allocation)) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) {
return new ZeroSizeAllocation(place_);
......
......@@ -39,6 +39,7 @@ class ZeroSizeAllocator : public Allocator {
protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
......
......@@ -29,38 +29,31 @@ namespace paddle {
namespace platform {
namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
}
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations;
std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations;
{
std::unique_lock<std::mutex> lock(mtx_);
callback();
t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
wait_delete_mem_ = 0;
}
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size();
delete tmp.second;
deleter(tmp.second);
}
}
void TemporaryAllocator::Free(alloc::Allocation *allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same.");
......@@ -84,7 +77,6 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
}
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size();
delete temp_allocation;
}
size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
......@@ -119,11 +111,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
}
// If not find the the available allocation, get allocation from
// AllocatorFacadeInstance.
auto raw_allocation =
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem;
return temp_mem.release();
}
} // namespace platform
......
......@@ -22,14 +22,6 @@
namespace paddle {
namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
......@@ -56,7 +48,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback);
protected:
void Free(memory::allocation::Allocation *allocation) override;
void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override;
......@@ -65,8 +57,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_;
// When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{
nullptr};
std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>>
temp_mem_map_{nullptr};
std::mutex mtx_;
size_t wait_delete_mem_{0};
std::function<void()> callback_;
......
......@@ -308,6 +308,7 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place);
})
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
......
......@@ -129,6 +129,7 @@ def __bootstrap__():
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
'fast_eager_deletion_mode', 'allocator_strategy',
'enable_buffered_allocator', 'tolerant_times',
'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism',
'enable_parallel_graph', 'multiple_of_cupti_buffer_size',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册