提交 beb4a86d 编写于 作者: Z zhoukunsheng

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into rank

...@@ -365,6 +365,9 @@ class ExecutionContext { ...@@ -365,6 +365,9 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>( auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter); allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(), PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T)); framework::product(dim) * sizeof(T));
......
...@@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) ...@@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
if (WITH_GPU) if (WITH_GPU)
...@@ -38,20 +37,30 @@ else () ...@@ -38,20 +37,30 @@ else ()
set(AllocatorFacadeDeps) set(AllocatorFacadeDeps)
endif() endif()
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags)
cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) cc_library(allocator_facade SRCS allocator_facade.cc DEPS
${AllocatorFacadeDeps}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
...@@ -94,8 +94,6 @@ class AlignedAllocator : public ThinAlignedAllocator { ...@@ -94,8 +94,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr); underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size); return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
} }
void FreeImpl(Allocation* allocation) override { delete allocation; }
}; };
} // namespace allocation } // namespace allocation
......
...@@ -27,24 +27,16 @@ bool Allocator::IsAllocThreadSafe() const { return false; } ...@@ -27,24 +27,16 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
auto ptr = AllocateImpl(size, attr); auto ptr = AllocateImpl(size, attr);
ptr->RegisterDecoratedAllocator(this); ptr->set_allocator(this);
return AllocationPtr(ptr); return AllocationPtr(ptr);
} }
void Allocator::FreeImpl(Allocation* allocation) { void Allocator::Free(Allocation* allocation) { delete allocation; }
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
void Allocator::Free(Allocation* allocation) {
allocation->PopDecoratedAllocator();
FreeImpl(allocation);
}
const char* BadAlloc::what() const noexcept { return msg_.c_str(); } const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const { void AllocationDeleter::operator()(Allocation* allocation) const {
Allocator* allocator = allocation->TopDecoratedAllocator(); auto* allocator = allocation->allocator();
allocator->Free(allocation); allocator->Free(allocation);
} }
......
...@@ -46,56 +46,13 @@ class Allocator; ...@@ -46,56 +46,13 @@ class Allocator;
// NOTE: this is the base class of Allocation. Each allocator can use its own // NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object. // allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate
* a RetryAllocator to any allocator to perform allocation retry when first
* allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class Allocation { class Allocation {
public: public:
Allocation(void* ptr, size_t size, platform::Place place) Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), size_(size), place_(place) { : allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {}
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
// Not quite sure whether we need something like gtl vector.
decorated_allocators_.reserve(8);
}
Allocation(const Allocation& o) = delete; Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer. // Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method // NOTE: For performance consideration, it is better not to make this method
...@@ -117,31 +74,17 @@ class Allocation { ...@@ -117,31 +74,17 @@ class Allocation {
const platform::Place& place() const { return place_; } const platform::Place& place() const { return place_; }
virtual ~Allocation(); Allocator* allocator() { return allocator_; }
private:
const std::vector<Allocator*>& DecoratedAllocators() const {
return decorated_allocators_;
}
inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.push_back(allocator);
}
inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } void set_allocator(Allocator* allocator) { allocator_ = allocator; }
inline Allocator* TopDecoratedAllocator() { virtual ~Allocation();
return decorated_allocators_.back();
}
private: private:
Allocator* allocator_;
void* ptr_; void* ptr_;
size_t size_; size_t size_;
platform::Place place_; platform::Place place_;
std::vector<Allocator*> decorated_allocators_;
friend class Allocator;
friend class AllocationDeleter;
}; };
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>; using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
...@@ -191,12 +134,9 @@ class Allocator { ...@@ -191,12 +134,9 @@ class Allocator {
// True if the `Allocate` is thread safe. // True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
// This function should not be called outside
void Free(Allocation* allocation);
protected: protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
virtual void FreeImpl(Allocation* allocation);
private: private:
friend class AllocationDeleter; friend class AllocationDeleter;
......
...@@ -49,17 +49,6 @@ namespace paddle { ...@@ -49,17 +49,6 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
if (retry_time > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time);
allocator.reset(retry_allocator);
}
return allocator;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime. // TODO(yy): Dirty code here. This class should be configurable in runtime.
class CPUManagedAllocator : public Allocator { class CPUManagedAllocator : public Allocator {
public: public:
...@@ -123,10 +112,14 @@ class ChunkedAllocator : public Allocator { ...@@ -123,10 +112,14 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() { std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
std::shared_ptr<Allocator> allocator(new LockedAllocator( std::unique_ptr<Allocator> allocator(new LockedAllocator(
std::shared_ptr<Allocator>(new BestFitAllocator(allocation)))); std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
allocator = WrapRetryAllocator(allocator, retry_time_); if (retry_time_ > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time_);
allocator.reset(retry_allocator);
}
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator)); return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
} }
...@@ -197,23 +190,13 @@ class AllocatorFacadePrivate { ...@@ -197,23 +190,13 @@ class AllocatorFacadePrivate {
~AllocatorFacadePrivate() = default; ~AllocatorFacadePrivate() = default;
AllocatorFacadePrivate() { AllocatorFacadePrivate() {
auto strategy = GetAllocatorStrategy(); if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) {
switch (strategy) { InitLegacyAllocator();
case AllocatorStrategy::kLegacy: { } else {
InitLegacyAllocator(); InitCPUAllocator();
break; InitCUDAAllocator();
} InitCUDAPinnedAllocator();
case AllocatorStrategy::kNaiveBestFit: { WrapZeroSizeAllocator();
InitCPUAllocator();
InitCUDAAllocator();
InitCUDAPinnedAllocator();
WrapZeroSizeAllocator();
break;
}
default: {
PADDLE_THROW("Unsupported allocator strategy: %d",
static_cast<int>(strategy));
}
} }
} }
...@@ -271,7 +254,8 @@ AllocatorFacade& AllocatorFacade::Instance() { ...@@ -271,7 +254,8 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) { const platform::Place& place, size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Alloc(place, size, attr)); return std::shared_ptr<Allocation>(Alloc(place, size, attr).release(),
AllocationDeleter());
} }
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,
......
...@@ -19,22 +19,16 @@ ...@@ -19,22 +19,16 @@
DEFINE_string( DEFINE_string(
allocator_strategy, "legacy", allocator_strategy, "legacy",
"The allocation strategy. Legacy means the original allocator of Fluid." "The allocation strategy. Legacy means the original allocator of Fluid."
"naive_best_fit means the experimental best fit allocator. " "New means the experimental allocators of Fluid. in [legacy, new]");
"allocator. Enum in [legacy, naive_best_fit].");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static AllocatorStrategy GetStrategyFromFlag() { static AllocatorStrategy GetStrategyFromFlag() {
if (FLAGS_allocator_strategy == "legacy") { return FLAGS_allocator_strategy == "legacy"
return AllocatorStrategy::kLegacy; ? AllocatorStrategy::kLegacy
} else if (FLAGS_allocator_strategy == "naive_best_fit") { : AllocatorStrategy::kNaiveBestFit;
return AllocatorStrategy::kNaiveBestFit;
} else {
PADDLE_THROW("Unsupported allocator strategy: %s",
FLAGS_allocator_strategy);
}
} }
AllocatorStrategy GetAllocatorStrategy() { AllocatorStrategy GetAllocatorStrategy() {
......
...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { ...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
} }
return num; return num;
} }
void BestFitAllocator::FreeImpl(Allocation* allocation) { void BestFitAllocator::Free(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation, PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation."); "The input allocation is not BestFitAllocation.");
......
...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { ...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it); void InsertFreeNode(const ListIt& it);
protected: protected:
void FreeImpl(Allocation* allocation) override; void Free(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -22,11 +22,11 @@ namespace paddle { ...@@ -22,11 +22,11 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator) BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator)
: underlying_allocator_(std::move(allocator)) { : underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_, underlying_allocator_,
"Underlying allocator of BufferedAllocator must not be null"); "Underlying allocator of BufferedAllocator must be unmanaged");
if (underlying_allocator_->IsAllocThreadSafe()) { if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { ...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end(); auto it = --allocations_.end();
cur += it->second->size(); cur += it->second->size();
underlying_allocator_->Free(it->second.release()); delete it->second.release();
allocations_.erase(it); allocations_.erase(it);
if (cur >= size) return; if (cur >= size) return;
} }
} }
bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } bool BufferedAllocator::IsAllocThreadSafe() const {
return this->underlying_allocator_->IsAllocThreadSafe();
void BufferedAllocator::FreeImpl(Allocation *allocation) { }
void BufferedAllocator::Free(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(), AllocationPtr(allocation));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
...@@ -61,15 +61,17 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -61,15 +61,17 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) { if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second)); AllocationPtr result(std::move(it->second));
allocations_.erase(it); allocations_.erase(it);
return result.release(); return new AllocationWithUnderlying(std::move(result));
} }
} }
try { try {
return underlying_allocator_->Allocate(size, attr).release(); return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
} catch (BadAlloc &) { } catch (BadAlloc &) {
FreeCache(size); FreeCache(size);
return underlying_allocator_->Allocate(size, attr).release(); return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
} }
} }
......
...@@ -31,7 +31,7 @@ namespace allocation { ...@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_ // underlying_allocator_
class BufferedAllocator : public Allocator { class BufferedAllocator : public Allocator {
public: public:
explicit BufferedAllocator(std::shared_ptr<Allocator> allocator); explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator);
~BufferedAllocator(); ~BufferedAllocator();
...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { ...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size); void FreeCache(size_t size);
protected: protected:
void FreeImpl(Allocation *allocation) override; void Free(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::unique_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_; std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h" #include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory>
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
...@@ -65,7 +66,7 @@ class StubAllocator : public Allocator { ...@@ -65,7 +66,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; } size_t GetFreeCount() const { return destruct_count_; }
protected: protected:
void FreeImpl(Allocation *allocation) override { void Free(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation); auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc); PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr()); if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
...@@ -20,27 +20,25 @@ namespace paddle { ...@@ -20,27 +20,25 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
CPUAllocation::CPUAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CPUPlace()) {}
bool CPUAllocator::IsAllocThreadSafe() const { return true; } bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::FreeImpl(Allocation *allocation) { void CPUAllocator::Free(Allocation *allocation) {
void *p = allocation->ptr(); PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation));
#ifdef _WIN32 free(allocation->ptr());
_aligned_free(p);
#else
free(p);
#endif
delete allocation; delete allocation;
} }
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *p; void *ptr;
#ifdef _WIN32 auto status = posix_memalign(&ptr, kAlignment, size);
p = _aligned_malloc(size, kAlignment); if (UNLIKELY(status) != 0) {
#else throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d",
PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!", size, status));
size); }
#endif return new CPUAllocation(ptr, size);
return new Allocation(p, size, platform::CPUPlace());
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -31,13 +31,19 @@ namespace allocation { ...@@ -31,13 +31,19 @@ namespace allocation {
// //
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle. // an open-sourced allocator into Paddle.
class CPUAllocator;
class CPUAllocation : public Allocation {
public:
CPUAllocation(void* ptr, size_t size);
};
class CPUAllocator : public Allocator { class CPUAllocator : public Allocator {
public: public:
constexpr static size_t kAlignment = 4096UL; constexpr static size_t kAlignment = 64u;
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void Free(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -23,14 +23,15 @@ namespace paddle { ...@@ -23,14 +23,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; } bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::FreeImpl(Allocation* allocation) { void CUDAAllocator::Free(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(allocation->place()), auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_); place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr())); PADDLE_ENFORCE(cudaFree(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
void* ptr; void* ptr;
...@@ -40,9 +41,8 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -40,9 +41,8 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status))); status, cudaGetErrorString(status)));
} }
return new Allocation(ptr, size, platform::Place(place_)); return new CUDAAllocation(ptr, size, platform::Place(place_));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -20,6 +20,13 @@ namespace paddle { ...@@ -20,6 +20,13 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
// CUDA System allocator and allocation.
// Just a flag type.
class CUDAAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class CUDAAllocator : public Allocator { class CUDAAllocator : public Allocator {
public: public:
explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
...@@ -28,7 +35,7 @@ class CUDAAllocator : public Allocator { ...@@ -28,7 +35,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void Free(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -134,22 +134,26 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) { ...@@ -134,22 +134,26 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
class GPUBuddyAllocatorList { BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
public: static std::once_flag init_flag;
GPUBuddyAllocatorList() static detail::BuddyAllocator **a_arr = nullptr;
: allocators_(platform::GetCUDADeviceCount()), static std::vector<int> devices;
flags_(platform::GetCUDADeviceCount()) {
allocation::GPUMemMonitor.Initialize(allocators_.size()); std::call_once(init_flag, [gpu_id]() {
} devices = platform::GetSelectedDevices();
int gpu_num = devices.size();
BuddyAllocator *Get(size_t dev_id) { allocation::GPUMemMonitor.Initialize(devices.size());
PADDLE_ENFORCE(dev_id < flags_.size(), "Invalid device id %s", dev_id);
std::call_once(flags_[dev_id], [this, dev_id] { a_arr = new BuddyAllocator *[gpu_num];
for (size_t i = 0; i < devices.size(); ++i) {
int dev_id = devices[i];
a_arr[i] = nullptr;
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
allocators_[dev_id] = new BuddyAllocator( a_arr[i] = new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
std::unique_ptr<detail::SystemAllocator>( new detail::GPUAllocator(dev_id)),
new detail::GPUAllocator(dev_id)), platform::GpuMinChunkSize(),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE:\n" VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable " << "You can set GFlags environment variable "
...@@ -163,19 +167,13 @@ class GPUBuddyAllocatorList { ...@@ -163,19 +167,13 @@ class GPUBuddyAllocatorList {
<< FLAGS_initial_gpu_memory_in_mb << FLAGS_initial_gpu_memory_in_mb
<< ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<< FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
}); }
return allocators_[dev_id]; });
}
private:
std::vector<BuddyAllocator *> allocators_;
std::vector<std::once_flag> flags_;
};
BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
static GPUBuddyAllocatorList allocators;
platform::SetDeviceId(gpu_id); platform::SetDeviceId(gpu_id);
return allocators.Get(gpu_id); auto pos = std::distance(devices.begin(),
std::find(devices.begin(), devices.end(), gpu_id));
return a_arr[pos];
} }
#endif #endif
...@@ -194,7 +192,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place, ...@@ -194,7 +192,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *buddy_allocator = GetGPUBuddyAllocator(place.device);
auto *ptr = buddy_allocator->Alloc(size); auto *ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr && size > 0) { if (ptr == nullptr) {
int cur_dev = platform::GetCurrentDeviceId(); int cur_dev = platform::GetCurrentDeviceId();
platform::SetDeviceId(place.device); platform::SetDeviceId(place.device);
size_t avail, total; size_t avail, total;
...@@ -349,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -349,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return tmp_alloc; return tmp_alloc;
} }
void LegacyAllocator::FreeImpl(Allocation *allocation) { void LegacyAllocator::Free(Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
......
...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { ...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation *allocation) override; void Free(Allocation *allocation) override;
private: private:
platform::Place place_; platform::Place place_;
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/lock_guard_ptr.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
...@@ -25,24 +24,26 @@ namespace allocation { ...@@ -25,24 +24,26 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; } bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator( LockedAllocator::LockedAllocator(
std::shared_ptr<Allocator> underlying_allocator) std::unique_ptr<Allocator> &&underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) { : underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) { if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
} }
void LockedAllocator::Free(Allocation *allocation) {
void LockedAllocator::FreeImpl(Allocation *allocation) { {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
underlying_allocator_->Free(allocation); reinterpret_cast<AllocationWithUnderlying *>(allocation)
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
} }
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return underlying_allocator_->Allocate(size, attr).release(); return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,15 +24,15 @@ namespace allocation { ...@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe. // A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator { class LockedAllocator : public Allocator {
public: public:
explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator); explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator);
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation *allocation) override; void Free(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::unique_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_string(allocator_strategy);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
FLAGS_allocator_strategy = "naive_best_fit";
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), size);
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), size);
}
#endif
}
} // namespace allocation
} // namespace memory
} // namespace paddle
...@@ -20,15 +20,20 @@ namespace paddle { ...@@ -20,15 +20,20 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { void CPUPinnedAllocator::Free(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
Allocator::Attr attr) { Allocator::Attr attr) {
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void *ptr; void *ptr;
PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
return new Allocation(ptr, size, platform::CUDAPinnedPlace()); return new CPUPinnedAllocation(ptr, size);
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -20,12 +20,18 @@ namespace memory { ...@@ -20,12 +20,18 @@ namespace memory {
namespace allocation { namespace allocation {
// Allocator uses `cudaHostAlloc` // Allocator uses `cudaHostAlloc`
class CPUPinnedAllocation : public Allocation {
public:
CPUPinnedAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CUDAPinnedPlace()) {}
};
class CPUPinnedAllocator : public Allocator { class CPUPinnedAllocator : public Allocator {
public: public:
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation *allocation) override; void Free(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
......
...@@ -18,15 +18,25 @@ namespace paddle { ...@@ -18,15 +18,25 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
void RetryAllocator::FreeImpl(Allocation* allocation) { bool RetryAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
underlying_allocator_->Free(allocation); reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset();
cv_.notify_all(); {
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
} }
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return underlying_allocator_->Allocate(size, attr).release(); return new AllocationWithUnderlying(
underlying_allocator_->Allocate(size, attr));
}; };
// In fact, we can unify the code of allocation success and failure // In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
......
...@@ -25,25 +25,32 @@ namespace paddle { ...@@ -25,25 +25,32 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator { class RetryAllocator : public Allocator {
public: public:
RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms) RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_, underlying_allocator_.get(),
"UnderlyingAllocator of RetryAllocator must not be null"); "UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"); "UnderlyingAllocator of RetryAllocator must be thread-safe");
} }
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
void FreeImpl(Allocation* allocation) override; void Free(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::unique_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_; std::chrono::milliseconds retry_time_;
std::mutex mutex_; std::mutex mutex_;
std::condition_variable cv_; std::condition_variable cv_;
...@@ -51,6 +58,8 @@ class RetryAllocator : public Allocator { ...@@ -51,6 +58,8 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are // For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate // waited to allocate
// std::atomic<size_t> waited_allocate_size_{0}; // std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -24,20 +24,11 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { ...@@ -24,20 +24,11 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) { if (size == 0) {
return new Allocation(nullptr, 0, place_); return new ZeroSizeAllocation(place_);
} else { } else {
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size, attr).release();
} }
} }
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (allocation->size() == 0) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,6 +24,12 @@ namespace allocation { ...@@ -24,6 +24,12 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always // The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the // return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr // allocation.ptr() is nullptr
class ZeroSizeAllocation : public Allocation {
public:
explicit ZeroSizeAllocation(const platform::Place& p)
: Allocation(nullptr, 0, p) {}
};
class ZeroSizeAllocator : public Allocator { class ZeroSizeAllocator : public Allocator {
public: public:
ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator, ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator,
...@@ -34,7 +40,6 @@ class ZeroSizeAllocator : public Allocator { ...@@ -34,7 +40,6 @@ class ZeroSizeAllocator : public Allocator {
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/platform/temporary_allocator.h" #include "paddle/fluid/platform/temporary_allocator.h"
#include <memory> #include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64(limit_of_tmp_allocation, -1, DEFINE_int64(limit_of_tmp_allocation, -1,
...@@ -30,31 +31,38 @@ namespace paddle { ...@@ -30,31 +31,38 @@ namespace paddle {
namespace platform { namespace platform {
namespace alloc = memory::allocation; namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>()); temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
} }
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) { void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations; std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations;
{ {
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
callback(); callback();
t_allocations.swap(temp_mem_map_); t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>()); temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>());
wait_delete_mem_ = 0; wait_delete_mem_ = 0;
} }
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) { for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size(); << " size: " << tmp.second->size();
deleter(tmp.second); delete tmp.second;
} }
} }
void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { void TemporaryAllocator::Free(alloc::Allocation *allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
if (platform::is_gpu_place(temp_allocation->place())) { if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same."); "The place should be the same.");
...@@ -78,7 +86,7 @@ void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) { ...@@ -78,7 +86,7 @@ void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
} }
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size(); << " size: " << temp_allocation->size();
alloc::AllocationDeleter()(temp_allocation); delete temp_allocation;
} }
size_t TemporaryAllocator::TemporaryAllocationQueueSize() { size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
...@@ -113,9 +121,11 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( ...@@ -113,9 +121,11 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
} }
// If not find the the available allocation, get allocation from // If not find the the available allocation, get allocation from
// AllocatorFacadeInstance. // AllocatorFacadeInstance.
auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); auto raw_allocation =
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem.release(); return temp_mem;
} }
} // namespace platform } // namespace platform
......
...@@ -23,6 +23,14 @@ ...@@ -23,6 +23,14 @@
namespace paddle { namespace paddle {
namespace platform { namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation /*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation. * which used by CUDA's async operation.
* *
...@@ -49,7 +57,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -49,7 +57,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback); void SetCallback(const std::function<void()> &callback);
protected: protected:
void FreeImpl(memory::allocation::Allocation *allocation) override; void Free(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl( memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override; size_t size, memory::allocation::Allocator::Attr attr) override;
...@@ -58,8 +66,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -58,8 +66,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_; platform::Place place_;
// When the allocation is not held by any variable, it should be placed // When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately. // to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>> std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{
temp_mem_map_{nullptr}; nullptr};
std::mutex mtx_; std::mutex mtx_;
size_t wait_delete_mem_{0}; size_t wait_delete_mem_{0};
std::function<void()> callback_; std::function<void()> callback_;
......
...@@ -324,7 +324,6 @@ PYBIND11_MODULE(core, m) { ...@@ -324,7 +324,6 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
......
...@@ -105,12 +105,14 @@ void Printf(const char* fmt, const Args&... args) { ...@@ -105,12 +105,14 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf(std::cout, fmt, args...); Fprintf(std::cout, fmt, args...);
} }
inline std::string HumanReadableSize(double f_size) { template <typename T>
std::string HumanReadableSize(T size) {
size_t i = 0; size_t i = 0;
double f_size = static_cast<double>(size);
double orig = f_size; double orig = f_size;
const std::vector<std::string> units( const std::vector<std::string> units(
{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"});
while (f_size >= 1024) { while (f_size > 1024) {
f_size /= 1024; f_size /= 1024;
i++; i++;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册