未验证 提交 644e8af4 编写于 作者: Z Zeng Jinle 提交者: GitHub

Merge pull request #16424 from sneaxiy/fix_allocator_bug

Fix allocator bug
...@@ -365,9 +365,6 @@ class ExecutionContext { ...@@ -365,9 +365,6 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>( auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter); allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(), PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T)); framework::product(dim) * sizeof(T));
......
...@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) ...@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
if (WITH_GPU) if (WITH_GPU)
...@@ -37,30 +38,20 @@ else () ...@@ -37,30 +38,20 @@ else ()
set(AllocatorFacadeDeps) set(AllocatorFacadeDeps)
endif() endif()
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps})
cc_library(allocator_facade SRCS allocator_facade.cc DEPS cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy)
${AllocatorFacadeDeps}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle { namespace paddle {
...@@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator { ...@@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr); underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size); return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
} }
void FreeImpl(Allocation* allocation) override { delete allocation; }
}; };
} // namespace allocation } // namespace allocation
......
...@@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; } ...@@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
auto ptr = AllocateImpl(size, attr); auto ptr = AllocateImpl(size, attr);
ptr->set_allocator(this); ptr->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr); return AllocationPtr(ptr);
} }
void Allocator::Free(Allocation* allocation) { delete allocation; } void Allocator::FreeImpl(Allocation* allocation) {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
void Allocator::Free(Allocation* allocation) {
allocation->PopDecoratedAllocator();
FreeImpl(allocation);
}
const char* BadAlloc::what() const noexcept { return msg_.c_str(); } const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const { void AllocationDeleter::operator()(Allocation* allocation) const {
auto* allocator = allocation->allocator(); Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation); allocator->Free(allocation);
} }
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
...@@ -44,13 +46,56 @@ class Allocator; ...@@ -44,13 +46,56 @@ class Allocator;
// NOTE: this is the base class of Allocation. Each allocator can use its own // NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object. // allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate
* a RetryAllocator to any allocator to perform allocation retry when first
* allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class Allocation { class Allocation {
public: public:
Allocation(void* ptr, size_t size, platform::Place place) Allocation(void* ptr, size_t size, platform::Place place)
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} : ptr_(ptr), size_(size), place_(place) {
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
// Not quite sure whether we need something like gtl vector.
decorated_allocators_.reserve(8);
}
Allocation(const Allocation& o) = delete; Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer. // Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method // NOTE: For performance consideration, it is better not to make this method
...@@ -72,17 +117,31 @@ class Allocation { ...@@ -72,17 +117,31 @@ class Allocation {
const platform::Place& place() const { return place_; } const platform::Place& place() const { return place_; }
Allocator* allocator() { return allocator_; } virtual ~Allocation();
void set_allocator(Allocator* allocator) { allocator_ = allocator; } private:
const std::vector<Allocator*>& DecoratedAllocators() const {
return decorated_allocators_;
}
virtual ~Allocation(); inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.push_back(allocator);
}
inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); }
inline Allocator* TopDecoratedAllocator() {
return decorated_allocators_.back();
}
private: private:
Allocator* allocator_;
void* ptr_; void* ptr_;
size_t size_; size_t size_;
platform::Place place_; platform::Place place_;
std::vector<Allocator*> decorated_allocators_;
friend class Allocator;
friend class AllocationDeleter;
}; };
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>; using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
...@@ -132,9 +191,12 @@ class Allocator { ...@@ -132,9 +191,12 @@ class Allocator {
// True if the `Allocate` is thread safe. // True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
// This function should not be called outside
void Free(Allocation* allocation);
protected: protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
virtual void FreeImpl(Allocation* allocation);
private: private:
friend class AllocationDeleter; friend class AllocationDeleter;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
...@@ -30,6 +31,7 @@ ...@@ -30,6 +31,7 @@
#include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h" #include "paddle/fluid/memory/allocation/cuda_allocator.h"
...@@ -47,6 +49,17 @@ namespace paddle { ...@@ -47,6 +49,17 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
if (retry_time > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time);
allocator.reset(retry_allocator);
}
return allocator;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime. // TODO(yy): Dirty code here. This class should be configurable in runtime.
class CPUManagedAllocator : public Allocator { class CPUManagedAllocator : public Allocator {
public: public:
...@@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator { ...@@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() { std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> allocator(new LockedAllocator( std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation)))); std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
if (retry_time_ > 0) { allocator = WrapRetryAllocator(allocator, retry_time_);
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time_);
allocator.reset(retry_allocator);
}
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator)); return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
} }
...@@ -188,13 +197,23 @@ class AllocatorFacadePrivate { ...@@ -188,13 +197,23 @@ class AllocatorFacadePrivate {
~AllocatorFacadePrivate() = default; ~AllocatorFacadePrivate() = default;
AllocatorFacadePrivate() { AllocatorFacadePrivate() {
if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { auto strategy = GetAllocatorStrategy();
InitLegacyAllocator(); switch (strategy) {
} else { case AllocatorStrategy::kLegacy: {
InitCPUAllocator(); InitLegacyAllocator();
InitCUDAAllocator(); break;
InitCUDAPinnedAllocator(); }
WrapZeroSizeAllocator(); case AllocatorStrategy::kNaiveBestFit: {
InitCPUAllocator();
InitCUDAAllocator();
InitCUDAPinnedAllocator();
WrapZeroSizeAllocator();
break;
}
default: {
PADDLE_THROW("Unsupported allocator strategy: %d",
static_cast<int>(strategy));
}
} }
} }
...@@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() { ...@@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) { const platform::Place& place, size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Alloc(place, size, attr).release(), return std::shared_ptr<Allocation>(Alloc(place, size, attr));
AllocationDeleter());
} }
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,
......
...@@ -14,20 +14,27 @@ ...@@ -14,20 +14,27 @@
#include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string( DEFINE_string(
allocator_strategy, "legacy", allocator_strategy, "legacy",
"The allocation strategy. Legacy means the original allocator of Fluid." "The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]"); "naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit].");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static AllocatorStrategy GetStrategyFromFlag() { static AllocatorStrategy GetStrategyFromFlag() {
return FLAGS_allocator_strategy == "legacy" if (FLAGS_allocator_strategy == "legacy") {
? AllocatorStrategy::kLegacy return AllocatorStrategy::kLegacy;
: AllocatorStrategy::kNaiveBestFit; } else if (FLAGS_allocator_strategy == "naive_best_fit") {
return AllocatorStrategy::kNaiveBestFit;
} else {
PADDLE_THROW("Unsupported allocator strategy: %s",
FLAGS_allocator_strategy);
}
} }
AllocatorStrategy GetAllocatorStrategy() { AllocatorStrategy GetAllocatorStrategy() {
......
...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { ...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
} }
return num; return num;
} }
void BestFitAllocator::Free(Allocation* allocation) { void BestFitAllocator::FreeImpl(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation, PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation."); "The input allocation is not BestFitAllocation.");
......
...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { ...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it); void InsertFreeNode(const ListIt& it);
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -22,11 +22,11 @@ namespace paddle { ...@@ -22,11 +22,11 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator) BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator)
: underlying_allocator_(std::move(allocator)) { : underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_, underlying_allocator_,
"Underlying allocator of BufferedAllocator must be unmanaged"); "Underlying allocator of BufferedAllocator must not be null");
if (underlying_allocator_->IsAllocThreadSafe()) { if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { ...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end(); auto it = --allocations_.end();
cur += it->second->size(); cur += it->second->size();
delete it->second.release(); underlying_allocator_->Free(it->second.release());
allocations_.erase(it); allocations_.erase(it);
if (cur >= size) return; if (cur >= size) return;
} }
} }
bool BufferedAllocator::IsAllocThreadSafe() const { bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
return this->underlying_allocator_->IsAllocThreadSafe();
} void BufferedAllocator::FreeImpl(Allocation *allocation) {
void BufferedAllocator::Free(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(), AllocationPtr(allocation));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) { if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second)); AllocationPtr result(std::move(it->second));
allocations_.erase(it); allocations_.erase(it);
return new AllocationWithUnderlying(std::move(result)); return result.release();
} }
} }
try { try {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} catch (BadAlloc &) { } catch (BadAlloc &) {
FreeCache(size); FreeCache(size);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} }
......
...@@ -31,7 +31,7 @@ namespace allocation { ...@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_ // underlying_allocator_
class BufferedAllocator : public Allocator { class BufferedAllocator : public Allocator {
public: public:
explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator); explicit BufferedAllocator(std::shared_ptr<Allocator> allocator);
~BufferedAllocator(); ~BufferedAllocator();
...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { ...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size); void FreeCache(size_t size);
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_; std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h" #include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h"
...@@ -64,7 +65,7 @@ class StubAllocator : public Allocator { ...@@ -64,7 +65,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; } size_t GetFreeCount() const { return destruct_count_; }
protected: protected:
void Free(Allocation *allocation) override { void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation); auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc); PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr()); if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
...@@ -20,25 +20,27 @@ namespace paddle { ...@@ -20,25 +20,27 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
CPUAllocation::CPUAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CPUPlace()) {}
bool CPUAllocator::IsAllocThreadSafe() const { return true; } bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::Free(Allocation *allocation) { void CPUAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation)); void *p = allocation->ptr();
free(allocation->ptr()); #ifdef _WIN32
_aligned_free(p);
#else
free(p);
#endif
delete allocation; delete allocation;
} }
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *ptr; void *p;
auto status = posix_memalign(&ptr, kAlignment, size); #ifdef _WIN32
if (UNLIKELY(status) != 0) { p = _aligned_malloc(size, kAlignment);
throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", #else
size, status)); PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!",
} size);
return new CPUAllocation(ptr, size); #endif
return new Allocation(p, size, platform::CPUPlace());
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -31,19 +31,13 @@ namespace allocation { ...@@ -31,19 +31,13 @@ namespace allocation {
// //
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle. // an open-sourced allocator into Paddle.
class CPUAllocator;
class CPUAllocation : public Allocation {
public:
CPUAllocation(void* ptr, size_t size);
};
class CPUAllocator : public Allocator { class CPUAllocator : public Allocator {
public: public:
constexpr static size_t kAlignment = 64u; constexpr static size_t kAlignment = 4096UL;
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -23,15 +23,14 @@ namespace paddle { ...@@ -23,15 +23,14 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; } bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::Free(Allocation* allocation) { void CUDAAllocator::FreeImpl(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation); PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(allocation->place()),
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_); place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr())); PADDLE_ENFORCE(cudaFree(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
void* ptr; void* ptr;
...@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status))); status, cudaGetErrorString(status)));
} }
return new CUDAAllocation(ptr, size, platform::Place(place_)); return new Allocation(ptr, size, platform::Place(place_));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -20,13 +20,6 @@ namespace paddle { ...@@ -20,13 +20,6 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
// CUDA System allocator and allocation.
// Just a flag type.
class CUDAAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class CUDAAllocator : public Allocator { class CUDAAllocator : public Allocator {
public: public:
explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
...@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator { ...@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -134,26 +134,22 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) { ...@@ -134,26 +134,22 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { class GPUBuddyAllocatorList {
static std::once_flag init_flag; public:
static detail::BuddyAllocator **a_arr = nullptr; GPUBuddyAllocatorList()
static std::vector<int> devices; : allocators_(platform::GetCUDADeviceCount()),
flags_(platform::GetCUDADeviceCount()) {
std::call_once(init_flag, [gpu_id]() { allocation::GPUMemMonitor.Initialize(allocators_.size());
devices = platform::GetSelectedDevices(); }
int gpu_num = devices.size();
allocation::GPUMemMonitor.Initialize(devices.size());
a_arr = new BuddyAllocator *[gpu_num]; BuddyAllocator *Get(size_t dev_id) {
for (size_t i = 0; i < devices.size(); ++i) { PADDLE_ENFORCE(dev_id < flags_.size(), "Invalid device id %s", dev_id);
int dev_id = devices[i]; std::call_once(flags_[dev_id], [this, dev_id] {
a_arr[i] = nullptr;
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
a_arr[i] = new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>( allocators_[dev_id] = new BuddyAllocator(
new detail::GPUAllocator(dev_id)), std::unique_ptr<detail::SystemAllocator>(
platform::GpuMinChunkSize(), new detail::GPUAllocator(dev_id)),
platform::GpuMaxChunkSize()); platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE:\n" VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable " << "You can set GFlags environment variable "
...@@ -167,13 +163,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { ...@@ -167,13 +163,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
<< FLAGS_initial_gpu_memory_in_mb << FLAGS_initial_gpu_memory_in_mb
<< ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<< FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
} });
}); return allocators_[dev_id];
}
private:
std::vector<BuddyAllocator *> allocators_;
std::vector<std::once_flag> flags_;
};
BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
static GPUBuddyAllocatorList allocators;
platform::SetDeviceId(gpu_id); platform::SetDeviceId(gpu_id);
auto pos = std::distance(devices.begin(), return allocators.Get(gpu_id);
std::find(devices.begin(), devices.end(), gpu_id));
return a_arr[pos];
} }
#endif #endif
...@@ -192,7 +194,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place, ...@@ -192,7 +194,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *buddy_allocator = GetGPUBuddyAllocator(place.device);
auto *ptr = buddy_allocator->Alloc(size); auto *ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr) { if (ptr == nullptr && size > 0) {
int cur_dev = platform::GetCurrentDeviceId(); int cur_dev = platform::GetCurrentDeviceId();
platform::SetDeviceId(place.device); platform::SetDeviceId(place.device);
size_t avail, total; size_t avail, total;
...@@ -347,7 +349,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -347,7 +349,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return tmp_alloc; return tmp_alloc;
} }
void LegacyAllocator::Free(Allocation *allocation) { void LegacyAllocator::FreeImpl(Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
......
...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { ...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
private: private:
platform::Place place_; platform::Place place_;
......
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h"
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/lock_guard_ptr.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
...@@ -23,26 +25,24 @@ namespace allocation { ...@@ -23,26 +25,24 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; } bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator( LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator) std::shared_ptr<Allocator> underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) { : underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) { if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
} }
void LockedAllocator::Free(Allocation *allocation) {
{ void LockedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
reinterpret_cast<AllocationWithUnderlying *>(allocation) underlying_allocator_->Free(allocation);
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
} }
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,15 +24,15 @@ namespace allocation { ...@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe. // A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator { class LockedAllocator : public Allocator {
public: public:
explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator); explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_string(allocator_strategy);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
FLAGS_allocator_strategy = "naive_best_fit";
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), size);
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), size);
}
#endif
}
} // namespace allocation
} // namespace memory
} // namespace paddle
...@@ -20,20 +20,15 @@ namespace paddle { ...@@ -20,20 +20,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::Free(Allocation *allocation) { void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
Allocator::Attr attr) { Allocator::Attr attr) {
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void *ptr; void *ptr;
PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
return new CPUPinnedAllocation(ptr, size); return new Allocation(ptr, size, platform::CUDAPinnedPlace());
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -20,18 +20,12 @@ namespace memory { ...@@ -20,18 +20,12 @@ namespace memory {
namespace allocation { namespace allocation {
// Allocator uses `cudaHostAlloc` // Allocator uses `cudaHostAlloc`
class CPUPinnedAllocation : public Allocation {
public:
CPUPinnedAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CUDAPinnedPlace()) {}
};
class CPUPinnedAllocator : public Allocator { class CPUPinnedAllocator : public Allocator {
public: public:
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
......
...@@ -18,25 +18,15 @@ namespace paddle { ...@@ -18,25 +18,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool RetryAllocator::IsAllocThreadSafe() const { void RetryAllocator::FreeImpl(Allocation* allocation) {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset(); underlying_allocator_->Free(allocation);
{ cv_.notify_all();
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
} }
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
}; };
// In fact, we can unify the code of allocation success and failure // In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
......
...@@ -18,38 +18,32 @@ ...@@ -18,38 +18,32 @@
#include <condition_variable> // NOLINT #include <condition_variable> // NOLINT
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator { class RetryAllocator : public Allocator {
public: public:
RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms) RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_.get(), underlying_allocator_,
"UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); "UnderlyingAllocator of RetryAllocator must not be null");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"); "UnderlyingAllocator of RetryAllocator must be thread-safe");
} }
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_; std::chrono::milliseconds retry_time_;
std::mutex mutex_; std::mutex mutex_;
std::condition_variable cv_; std::condition_variable cv_;
...@@ -57,8 +51,6 @@ class RetryAllocator : public Allocator { ...@@ -57,8 +51,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are // For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate // waited to allocate
// std::atomic<size_t> waited_allocate_size_{0}; // std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { ...@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) { if (size == 0) {
return new ZeroSizeAllocation(place_); return new Allocation(nullptr, 0, place_);
} else { } else {
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size, attr).release();
} }
} }
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (allocation->size() == 0) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <memory>
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
...@@ -23,12 +24,6 @@ namespace allocation { ...@@ -23,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always // The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the // return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr // allocation.ptr() is nullptr
class ZeroSizeAllocation : public Allocation {
public:
explicit ZeroSizeAllocation(const platform::Place& p)
: Allocation(nullptr, 0, p) {}
};
class ZeroSizeAllocator : public Allocator { class ZeroSizeAllocator : public Allocator {
public: public:
ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator, ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator,
...@@ -39,6 +34,7 @@ class ZeroSizeAllocator : public Allocator { ...@@ -39,6 +34,7 @@ class ZeroSizeAllocator : public Allocator {
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/platform/temporary_allocator.h" #include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64(limit_of_tmp_allocation, -1, DEFINE_int64(limit_of_tmp_allocation, -1,
...@@ -29,38 +30,31 @@ namespace paddle { ...@@ -29,38 +30,31 @@ namespace paddle {
namespace platform { namespace platform {
namespace alloc = memory::allocation; namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
} }
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) { void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations; std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations;
{ {
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
callback(); callback();
t_allocations.swap(temp_mem_map_); t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
wait_delete_mem_ = 0; wait_delete_mem_ = 0;
} }
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) { for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size(); << " size: " << tmp.second->size();
delete tmp.second; deleter(tmp.second);
} }
} }
void TemporaryAllocator::Free(alloc::Allocation *allocation) { void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
if (platform::is_gpu_place(temp_allocation->place())) { if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same."); "The place should be the same.");
...@@ -84,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { ...@@ -84,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
} }
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size(); << " size: " << temp_allocation->size();
delete temp_allocation; alloc::AllocationDeleter()(temp_allocation);
} }
size_t TemporaryAllocator::TemporaryAllocationQueueSize() { size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
...@@ -119,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( ...@@ -119,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
} }
// If not find the the available allocation, get allocation from // If not find the the available allocation, get allocation from
// AllocatorFacadeInstance. // AllocatorFacadeInstance.
auto raw_allocation = auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem; return temp_mem.release();
} }
} // namespace platform } // namespace platform
......
...@@ -16,20 +16,13 @@ ...@@ -16,20 +16,13 @@
#include <condition_variable> // NOLINT #include <condition_variable> // NOLINT
#include <deque> #include <deque>
#include <map> #include <map>
#include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/lock_guard_ptr.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation /*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation. * which used by CUDA's async operation.
* *
...@@ -56,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -56,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback); void SetCallback(const std::function<void()> &callback);
protected: protected:
void Free(memory::allocation::Allocation *allocation) override; void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl( memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override; size_t size, memory::allocation::Allocator::Attr attr) override;
...@@ -65,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -65,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_; platform::Place place_;
// When the allocation is not held by any variable, it should be placed // When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately. // to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{ std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>>
nullptr}; temp_mem_map_{nullptr};
std::mutex mtx_; std::mutex mtx_;
size_t wait_delete_mem_{0}; size_t wait_delete_mem_{0};
std::function<void()> callback_; std::function<void()> callback_;
......
...@@ -324,6 +324,7 @@ PYBIND11_MODULE(core, m) { ...@@ -324,6 +324,7 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
......
...@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) { ...@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf(std::cout, fmt, args...); Fprintf(std::cout, fmt, args...);
} }
template <typename T> inline std::string HumanReadableSize(double f_size) {
std::string HumanReadableSize(T size) {
size_t i = 0; size_t i = 0;
double f_size = static_cast<double>(size);
double orig = f_size; double orig = f_size;
const std::vector<std::string> units( const std::vector<std::string> units(
{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"});
while (f_size > 1024) { while (f_size >= 1024) {
f_size /= 1024; f_size /= 1024;
i++; i++;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册