未验证 提交 c6189637 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix allocator bug (#16712)

* Revert "Revert "Fix allocator bug""

This reverts commit 174d0d0b.

* Revert "fix travis ci"

This reverts commit 5656fa9f.

test=develop

* add inlined_vector.h, test=develop

* add inlined_vector_test,test=develop
上级 03577151
......@@ -225,6 +225,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test(tuple_test SRCS tuple_test.cc )
cc_test(inlined_vector_test SRCS inlined_vector_test.cc)
if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class InlinedVector {
static_assert(N > 0, "N must be larger than 0");
public:
inline InlinedVector() { len_ = 0; }
inline size_t size() const { return len_; }
inline T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
inline const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}
inline void emplace_back(const T& item) {
if (LIKELY(len_ < N)) {
head_[len_++] = item;
} else {
tail_.emplace_back(item);
++len_;
}
}
inline void pop_back() {
if (UNLIKELY(len_ > N)) {
tail_.pop_back();
}
--len_;
}
inline T& back() {
if (LIKELY(len_ <= N)) {
return head_[len_ - 1];
} else {
return tail_.back();
}
}
private:
T head_[N];
size_t len_;
std::vector<T> tail_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <vector>
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
static std::vector<T> ToStdVector(const framework::InlinedVector<T, N> &vec) {
std::vector<T> std_vec;
std_vec.reserve(vec.size());
for (size_t i = 0; i < vec.size(); ++i) {
std_vec.emplace_back(vec[i]);
}
return std_vec;
}
template <size_t N>
void InlinedVectorCheck(size_t n) {
std::srand(std::time(nullptr));
std::vector<int> std_vec;
framework::InlinedVector<int, N> vec;
for (size_t i = 0; i < n; ++i) {
int value = rand(); // NOLINT
std_vec.emplace_back(value);
vec.emplace_back(value);
CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());
CHECK_EQ(vec.back(), value);
}
bool is_equal = (std_vec == ToStdVector(vec));
CHECK_EQ(is_equal, true);
for (size_t i = 0; i < n; ++i) {
CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());
std_vec.pop_back();
vec.pop_back();
CHECK_EQ(std_vec.size(), vec.size());
}
CHECK_EQ(std_vec.size(), static_cast<size_t>(0));
CHECK_EQ(vec.size(), static_cast<size_t>(0));
}
TEST(inlined_vector, inlined_vector) {
for (size_t i = 0; i < 20; ++i) {
InlinedVectorCheck<1>(i);
InlinedVectorCheck<10>(i);
InlinedVectorCheck<15>(i);
InlinedVectorCheck<20>(i);
InlinedVectorCheck<21>(i);
InlinedVectorCheck<25>(i);
}
}
} // namespace framework
} // namespace paddle
......@@ -366,9 +366,6 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T));
......
......@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
if (WITH_GPU)
......@@ -37,30 +38,19 @@ else ()
set(AllocatorFacadeDeps)
endif()
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags)
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
${AllocatorFacadeDeps}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps})
cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
if (WITH_TESTING)
set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
endif()
......
......@@ -94,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
}
void FreeImpl(Allocation* allocation) override { delete allocation; }
};
} // namespace allocation
......
......@@ -19,24 +19,11 @@
namespace paddle {
namespace memory {
namespace allocation {
Allocation::~Allocation() {}
Allocator::~Allocator() {}
bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
auto ptr = AllocateImpl(size, attr);
ptr->set_allocator(this);
return AllocationPtr(ptr);
}
void Allocator::Free(Allocation* allocation) { delete allocation; }
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const {
auto* allocator = allocation->allocator();
void Allocator::FreeImpl(Allocation* allocation) {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
......
......@@ -15,8 +15,10 @@
#pragma once
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
......@@ -26,40 +28,73 @@ namespace allocation {
// Exception when `Alloc`/`AllocShared` failed
class BadAlloc : public std::exception {
public:
explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {}
const char* what() const noexcept override;
inline explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {}
inline const char* what() const noexcept override { return msg_.c_str(); }
private:
std::string msg_;
};
class Allocation;
class AllocationDeleter {
public:
void operator()(Allocation* allocation) const;
};
class Allocator;
// Allocation is the object holding the actually pointer. Use
// `Allocation::ptr()` will returns the pointer that allocated.
//
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate a RetryAllocator to any allocator to perform allocation retry when
* first allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class Allocation {
public:
Allocation(void* ptr, size_t size, platform::Place place)
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {}
inline Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const { return ptr_; }
inline void* ptr() const { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
......@@ -70,24 +105,38 @@ class Allocation {
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const { return size_; }
inline size_t size() const { return size_; }
inline const platform::Place& place() const { return place_; }
const platform::Place& place() const { return place_; }
virtual ~Allocation() {}
Allocator* allocator() { return allocator_; }
private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.emplace_back(allocator);
}
void set_allocator(Allocator* allocator) { allocator_ = allocator; }
inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); }
virtual ~Allocation();
inline Allocator* TopDecoratedAllocator() {
return decorated_allocators_.back();
}
private:
Allocator* allocator_;
void* ptr_;
size_t size_;
platform::Place place_;
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
static constexpr size_t kReserveAllocatorNum = 8;
using DecoratedAllocatorStack =
framework::InlinedVector<Allocator*, kReserveAllocatorNum>;
DecoratedAllocatorStack decorated_allocators_;
friend class Allocator;
};
// Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters:
......@@ -126,22 +175,42 @@ class Allocator {
NumOfAttrs = 5 // The number of all attributes. It is used internally.
};
virtual ~Allocator();
virtual ~Allocator() {}
class AllocationDeleter {
public:
inline void operator()(Allocation* allocation) const {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
// Allocate an allocation.
AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault);
inline AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault) {
auto ptr = AllocateImpl(size, attr);
ptr->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr);
}
// This function should not be called outside Allocator class
inline void Free(Allocation* allocation) {
allocation->PopDecoratedAllocator();
FreeImpl(allocation);
}
// True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const;
protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
private:
friend class AllocationDeleter;
virtual void FreeImpl(Allocation* allocation);
};
using AllocationDeleter = Allocator::AllocationDeleter;
using AllocationPtr = Allocator::AllocationPtr;
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -49,6 +49,17 @@ namespace paddle {
namespace memory {
namespace allocation {
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
if (retry_time > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time);
allocator.reset(retry_allocator);
}
return allocator;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class CPUManagedAllocator : public Allocator {
public:
......@@ -112,14 +123,10 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
if (retry_time_ > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time_);
allocator.reset(retry_allocator);
}
allocator = WrapRetryAllocator(allocator, retry_time_);
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
}
......@@ -190,13 +197,23 @@ class AllocatorFacadePrivate {
~AllocatorFacadePrivate() = default;
AllocatorFacadePrivate() {
if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) {
InitLegacyAllocator();
} else {
InitCPUAllocator();
InitCUDAAllocator();
InitCUDAPinnedAllocator();
WrapZeroSizeAllocator();
auto strategy = GetAllocatorStrategy();
switch (strategy) {
case AllocatorStrategy::kLegacy: {
InitLegacyAllocator();
break;
}
case AllocatorStrategy::kNaiveBestFit: {
InitCPUAllocator();
InitCUDAAllocator();
InitCUDAPinnedAllocator();
WrapZeroSizeAllocator();
break;
}
default: {
PADDLE_THROW("Unsupported allocator strategy: %d",
static_cast<int>(strategy));
}
}
}
......@@ -254,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Alloc(place, size, attr).release(),
AllocationDeleter());
return std::shared_ptr<Allocation>(Alloc(place, size, attr));
}
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,
......
......@@ -19,16 +19,22 @@
DEFINE_string(
allocator_strategy, "legacy",
"The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]");
"naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit].");
namespace paddle {
namespace memory {
namespace allocation {
static AllocatorStrategy GetStrategyFromFlag() {
return FLAGS_allocator_strategy == "legacy"
? AllocatorStrategy::kLegacy
: AllocatorStrategy::kNaiveBestFit;
if (FLAGS_allocator_strategy == "legacy") {
return AllocatorStrategy::kLegacy;
} else if (FLAGS_allocator_strategy == "naive_best_fit") {
return AllocatorStrategy::kNaiveBestFit;
} else {
PADDLE_THROW("Unsupported allocator strategy: %s",
FLAGS_allocator_strategy);
}
}
AllocatorStrategy GetAllocatorStrategy() {
......
......@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return num;
}
void BestFitAllocator::Free(Allocation* allocation) {
void BestFitAllocator::FreeImpl(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation.");
......
......@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it);
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
......
......@@ -22,11 +22,11 @@ namespace paddle {
namespace memory {
namespace allocation {
BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator)
BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator)
: underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_,
"Underlying allocator of BufferedAllocator must be unmanaged");
"Underlying allocator of BufferedAllocator must not be null");
if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex());
}
......@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end();
cur += it->second->size();
delete it->second.release();
underlying_allocator_->Free(it->second.release());
allocations_.erase(it);
if (cur >= size) return;
}
}
bool BufferedAllocator::IsAllocThreadSafe() const {
return this->underlying_allocator_->IsAllocThreadSafe();
}
void BufferedAllocator::Free(Allocation *allocation) {
bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
void BufferedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation));
}
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{
platform::LockGuardPtr<std::mutex> guard(mtx_);
......@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second));
allocations_.erase(it);
return new AllocationWithUnderlying(std::move(result));
return result.release();
}
}
try {
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
} catch (BadAlloc &) {
FreeCache(size);
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
}
}
......
......@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_
class BufferedAllocator : public Allocator {
public:
explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator);
explicit BufferedAllocator(std::shared_ptr<Allocator> allocator);
~BufferedAllocator();
......@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size);
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_;
};
......
......@@ -14,7 +14,6 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h>
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
......@@ -66,7 +65,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; }
protected:
void Free(Allocation *allocation) override {
void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
......@@ -20,25 +20,27 @@ namespace paddle {
namespace memory {
namespace allocation {
CPUAllocation::CPUAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CPUPlace()) {}
bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::Free(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation));
free(allocation->ptr());
void CPUAllocator::FreeImpl(Allocation *allocation) {
void *p = allocation->ptr();
#ifdef _WIN32
_aligned_free(p);
#else
free(p);
#endif
delete allocation;
}
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *ptr;
auto status = posix_memalign(&ptr, kAlignment, size);
if (UNLIKELY(status) != 0) {
throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d",
size, status));
}
return new CPUAllocation(ptr, size);
void *p;
#ifdef _WIN32
p = _aligned_malloc(size, kAlignment);
#else
PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!",
size);
#endif
return new Allocation(p, size, platform::CPUPlace());
}
} // namespace allocation
} // namespace memory
......
......@@ -31,19 +31,13 @@ namespace allocation {
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class CPUAllocator;
class CPUAllocation : public Allocation {
public:
CPUAllocation(void* ptr, size_t size);
};
class CPUAllocator : public Allocator {
public:
constexpr static size_t kAlignment = 64u;
constexpr static size_t kAlignment = 4096UL;
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
};
} // namespace allocation
......
......@@ -23,15 +23,14 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::Free(Allocation* allocation) {
void CUDAAllocator::FreeImpl(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device);
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(allocation->place()),
place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr()));
delete allocation;
}
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::CUDADeviceGuard guard(place_.device);
void* ptr;
......@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status)));
}
return new CUDAAllocation(ptr, size, platform::Place(place_));
return new Allocation(ptr, size, platform::Place(place_));
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -20,13 +20,6 @@ namespace paddle {
namespace memory {
namespace allocation {
// CUDA System allocator and allocation.
// Just a flag type.
class CUDAAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class CUDAAllocator : public Allocator {
public:
explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
......@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
......
......@@ -347,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return tmp_alloc;
}
void LegacyAllocator::Free(Allocation *allocation) {
void LegacyAllocator::FreeImpl(Allocation *allocation) {
boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place());
......
......@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
private:
platform::Place place_;
......
......@@ -17,6 +17,7 @@
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace paddle {
namespace memory {
namespace allocation {
......@@ -24,26 +25,24 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator)
std::shared_ptr<Allocator> underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex());
}
}
void LockedAllocator::Free(Allocation *allocation) {
{
platform::LockGuardPtr<std::mutex> guard(mtx_);
reinterpret_cast<AllocationWithUnderlying *>(allocation)
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
void LockedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
underlying_allocator_->Free(allocation);
}
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator {
public:
explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator);
explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_;
};
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_string(allocator_strategy);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
FLAGS_allocator_strategy = "naive_best_fit";
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), size);
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), size);
}
#endif
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -20,20 +20,15 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::Free(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation;
}
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
Allocator::Attr attr) {
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void *ptr;
PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
return new CPUPinnedAllocation(ptr, size);
return new Allocation(ptr, size, platform::CUDAPinnedPlace());
}
} // namespace allocation
} // namespace memory
......
......@@ -20,18 +20,12 @@ namespace memory {
namespace allocation {
// Allocator uses `cudaHostAlloc`
class CPUPinnedAllocation : public Allocation {
public:
CPUPinnedAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CUDAPinnedPlace()) {}
};
class CPUPinnedAllocator : public Allocator {
public:
bool IsAllocThreadSafe() const override;
protected:
void Free(Allocation *allocation) override;
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
};
......
......@@ -18,25 +18,15 @@ namespace paddle {
namespace memory {
namespace allocation {
bool RetryAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
void RetryAllocator::FreeImpl(Allocation* allocation) {
// Delete underlying allocation first.
reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset();
{
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
underlying_allocator_->Free(allocation);
cv_.notify_all();
}
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() {
return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
return underlying_allocator_->Allocate(size, attr).release();
};
// In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time
......
......@@ -25,32 +25,25 @@ namespace paddle {
namespace memory {
namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator {
public:
RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms)
RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_.get(),
"UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator");
underlying_allocator_,
"UnderlyingAllocator of RetryAllocator must not be null");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe");
}
bool IsAllocThreadSafe() const override { return true; }
protected:
void Free(Allocation* allocation) override;
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private:
std::unique_ptr<Allocator> underlying_allocator_;
std::shared_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_;
std::mutex mutex_;
std::condition_variable cv_;
......@@ -58,8 +51,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
};
} // namespace allocation
......
......@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) {
return new ZeroSizeAllocation(place_);
return new Allocation(nullptr, 0, place_);
} else {
return underlying_allocator_->Allocate(size, attr).release();
}
}
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (allocation->size() == 0) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -24,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class ZeroSizeAllocation : public Allocation {
public:
explicit ZeroSizeAllocation(const platform::Place& p)
: Allocation(nullptr, 0, p) {}
};
class ZeroSizeAllocator : public Allocator {
public:
ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator,
......@@ -40,6 +34,7 @@ class ZeroSizeAllocator : public Allocator {
protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
......
......@@ -14,7 +14,6 @@
#include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64(limit_of_tmp_allocation, -1,
......@@ -31,38 +30,31 @@ namespace paddle {
namespace platform {
namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
}
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations;
std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations;
{
std::unique_lock<std::mutex> lock(mtx_);
callback();
t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
wait_delete_mem_ = 0;
}
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size();
delete tmp.second;
deleter(tmp.second);
}
}
void TemporaryAllocator::Free(alloc::Allocation *allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same.");
......@@ -86,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
}
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size();
delete temp_allocation;
alloc::AllocationDeleter()(temp_allocation);
}
size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
......@@ -121,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
}
// If not find the the available allocation, get allocation from
// AllocatorFacadeInstance.
auto raw_allocation =
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem;
return temp_mem.release();
}
} // namespace platform
......
......@@ -23,14 +23,6 @@
namespace paddle {
namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
......@@ -57,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback);
protected:
void Free(memory::allocation::Allocation *allocation) override;
void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override;
......@@ -66,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_;
// When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{
nullptr};
std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>>
temp_mem_map_{nullptr};
std::mutex mtx_;
size_t wait_delete_mem_{0};
std::function<void()> callback_;
......
......@@ -357,6 +357,7 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place);
})
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
......
......@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf(std::cout, fmt, args...);
}
template <typename T>
std::string HumanReadableSize(T size) {
inline std::string HumanReadableSize(double f_size) {
size_t i = 0;
double f_size = static_cast<double>(size);
double orig = f_size;
const std::vector<std::string> units(
{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"});
while (f_size > 1024) {
while (f_size >= 1024) {
f_size /= 1024;
i++;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册