未验证 提交 c6189637 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix allocator bug (#16712)

* Revert "Revert "Fix allocator bug""

This reverts commit 174d0d0b.

* Revert "fix travis ci"

This reverts commit 5656fa9f.

test=develop

* add inlined_vector.h, test=develop

* add inlined_vector_test,test=develop
上级 03577151
...@@ -225,6 +225,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) ...@@ -225,6 +225,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test(tuple_test SRCS tuple_test.cc ) cc_test(tuple_test SRCS tuple_test.cc )
cc_test(inlined_vector_test SRCS inlined_vector_test.cc)
if (NOT WIN32) if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc) cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32) endif (NOT WIN32)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class InlinedVector {
static_assert(N > 0, "N must be larger than 0");
public:
inline InlinedVector() { len_ = 0; }
inline size_t size() const { return len_; }
inline T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
inline const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}
inline void emplace_back(const T& item) {
if (LIKELY(len_ < N)) {
head_[len_++] = item;
} else {
tail_.emplace_back(item);
++len_;
}
}
inline void pop_back() {
if (UNLIKELY(len_ > N)) {
tail_.pop_back();
}
--len_;
}
inline T& back() {
if (LIKELY(len_ <= N)) {
return head_[len_ - 1];
} else {
return tail_.back();
}
}
private:
T head_[N];
size_t len_;
std::vector<T> tail_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <vector>
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
static std::vector<T> ToStdVector(const framework::InlinedVector<T, N> &vec) {
std::vector<T> std_vec;
std_vec.reserve(vec.size());
for (size_t i = 0; i < vec.size(); ++i) {
std_vec.emplace_back(vec[i]);
}
return std_vec;
}
template <size_t N>
void InlinedVectorCheck(size_t n) {
std::srand(std::time(nullptr));
std::vector<int> std_vec;
framework::InlinedVector<int, N> vec;
for (size_t i = 0; i < n; ++i) {
int value = rand(); // NOLINT
std_vec.emplace_back(value);
vec.emplace_back(value);
CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());
CHECK_EQ(vec.back(), value);
}
bool is_equal = (std_vec == ToStdVector(vec));
CHECK_EQ(is_equal, true);
for (size_t i = 0; i < n; ++i) {
CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());
std_vec.pop_back();
vec.pop_back();
CHECK_EQ(std_vec.size(), vec.size());
}
CHECK_EQ(std_vec.size(), static_cast<size_t>(0));
CHECK_EQ(vec.size(), static_cast<size_t>(0));
}
TEST(inlined_vector, inlined_vector) {
for (size_t i = 0; i < 20; ++i) {
InlinedVectorCheck<1>(i);
InlinedVectorCheck<10>(i);
InlinedVectorCheck<15>(i);
InlinedVectorCheck<20>(i);
InlinedVectorCheck<21>(i);
InlinedVectorCheck<25>(i);
}
}
} // namespace framework
} // namespace paddle
...@@ -366,9 +366,6 @@ class ExecutionContext { ...@@ -366,9 +366,6 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>( auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter); allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(), PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T)); framework::product(dim) * sizeof(T));
......
...@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) ...@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
if (WITH_GPU) if (WITH_GPU)
...@@ -37,30 +38,19 @@ else () ...@@ -37,30 +38,19 @@ else ()
set(AllocatorFacadeDeps) set(AllocatorFacadeDeps)
endif() endif()
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator) cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags) cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps})
cc_library(allocator_facade SRCS allocator_facade.cc DEPS cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy)
${AllocatorFacadeDeps}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator) cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
if (WITH_TESTING) if (WITH_TESTING)
set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
endif() endif()
......
...@@ -94,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator { ...@@ -94,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr); underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size); return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
} }
void FreeImpl(Allocation* allocation) override { delete allocation; }
}; };
} // namespace allocation } // namespace allocation
......
...@@ -19,24 +19,11 @@ ...@@ -19,24 +19,11 @@
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
Allocation::~Allocation() {}
Allocator::~Allocator() {}
bool Allocator::IsAllocThreadSafe() const { return false; } bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { void Allocator::FreeImpl(Allocation* allocation) {
auto ptr = AllocateImpl(size, attr); Allocator* allocator = allocation->TopDecoratedAllocator();
ptr->set_allocator(this);
return AllocationPtr(ptr);
}
void Allocator::Free(Allocation* allocation) { delete allocation; }
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const {
auto* allocator = allocation->allocator();
allocator->Free(allocation); allocator->Free(allocation);
} }
......
...@@ -15,8 +15,10 @@ ...@@ -15,8 +15,10 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string> #include <string>
#include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
...@@ -26,40 +28,73 @@ namespace allocation { ...@@ -26,40 +28,73 @@ namespace allocation {
// Exception when `Alloc`/`AllocShared` failed // Exception when `Alloc`/`AllocShared` failed
class BadAlloc : public std::exception { class BadAlloc : public std::exception {
public: public:
explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {} inline explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {}
const char* what() const noexcept override;
inline const char* what() const noexcept override { return msg_.c_str(); }
private: private:
std::string msg_; std::string msg_;
}; };
class Allocation;
class AllocationDeleter {
public:
void operator()(Allocation* allocation) const;
};
class Allocator; class Allocator;
// Allocation is the object holding the actually pointer. Use // Allocation is the object holding the actually pointer. Use
// `Allocation::ptr()` will returns the pointer that allocated. // `Allocation::ptr()` will returns the pointer that allocated.
// //
// NOTE: this is the base class of Allocation. Each allocator can use its own // NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object. // allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate a RetryAllocator to any allocator to perform allocation retry when
* first allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class Allocation { class Allocation {
public: public:
Allocation(void* ptr, size_t size, platform::Place place) inline Allocation(void* ptr, size_t size, platform::Place place)
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} : ptr_(ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete; Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer. // Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method // NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later, // as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual // we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`. // method like `defragmentation` to change `ptr_`.
void* ptr() const { return ptr_; } inline void* ptr() const { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element. // last valid element.
...@@ -70,24 +105,38 @@ class Allocation { ...@@ -70,24 +105,38 @@ class Allocation {
// The raw pointer might not aligned, so an offset might be added to raw // The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be // the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`. // `size + kAlignemnt - offset`.
size_t size() const { return size_; } inline size_t size() const { return size_; }
inline const platform::Place& place() const { return place_; }
const platform::Place& place() const { return place_; } virtual ~Allocation() {}
Allocator* allocator() { return allocator_; } private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.emplace_back(allocator);
}
void set_allocator(Allocator* allocator) { allocator_ = allocator; } inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); }
virtual ~Allocation(); inline Allocator* TopDecoratedAllocator() {
return decorated_allocators_.back();
}
private: private:
Allocator* allocator_;
void* ptr_; void* ptr_;
size_t size_; size_t size_;
platform::Place place_; platform::Place place_;
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>; // NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
static constexpr size_t kReserveAllocatorNum = 8;
using DecoratedAllocatorStack =
framework::InlinedVector<Allocator*, kReserveAllocatorNum>;
DecoratedAllocatorStack decorated_allocators_;
friend class Allocator;
};
// Base interface class of memory Allocator. // Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters: // To allocate a memory, allocator needs two parameters:
...@@ -126,22 +175,42 @@ class Allocator { ...@@ -126,22 +175,42 @@ class Allocator {
NumOfAttrs = 5 // The number of all attributes. It is used internally. NumOfAttrs = 5 // The number of all attributes. It is used internally.
}; };
virtual ~Allocator(); virtual ~Allocator() {}
class AllocationDeleter {
public:
inline void operator()(Allocation* allocation) const {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
// Allocate an allocation. // Allocate an allocation.
AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault); inline AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault) {
auto ptr = AllocateImpl(size, attr);
ptr->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr);
}
// This function should not be called outside Allocator class
inline void Free(Allocation* allocation) {
allocation->PopDecoratedAllocator();
FreeImpl(allocation);
}
// True if the `Allocate` is thread safe. // True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
protected: protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
virtual void FreeImpl(Allocation* allocation);
private:
friend class AllocationDeleter;
}; };
using AllocationDeleter = Allocator::AllocationDeleter;
using AllocationPtr = Allocator::AllocationPtr;
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -49,6 +49,17 @@ namespace paddle { ...@@ -49,6 +49,17 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
if (retry_time > 0) {
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time);
allocator.reset(retry_allocator);
}
return allocator;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime. // TODO(yy): Dirty code here. This class should be configurable in runtime.
class CPUManagedAllocator : public Allocator { class CPUManagedAllocator : public Allocator {
public: public:
...@@ -112,14 +123,10 @@ class ChunkedAllocator : public Allocator { ...@@ -112,14 +123,10 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() { std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> allocator(new LockedAllocator( std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation)))); std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
if (retry_time_ > 0) { allocator = WrapRetryAllocator(allocator, retry_time_);
auto* retry_allocator =
new RetryAllocator(std::move(allocator), retry_time_);
allocator.reset(retry_allocator);
}
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator)); return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
} }
...@@ -190,13 +197,23 @@ class AllocatorFacadePrivate { ...@@ -190,13 +197,23 @@ class AllocatorFacadePrivate {
~AllocatorFacadePrivate() = default; ~AllocatorFacadePrivate() = default;
AllocatorFacadePrivate() { AllocatorFacadePrivate() {
if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { auto strategy = GetAllocatorStrategy();
InitLegacyAllocator(); switch (strategy) {
} else { case AllocatorStrategy::kLegacy: {
InitCPUAllocator(); InitLegacyAllocator();
InitCUDAAllocator(); break;
InitCUDAPinnedAllocator(); }
WrapZeroSizeAllocator(); case AllocatorStrategy::kNaiveBestFit: {
InitCPUAllocator();
InitCUDAAllocator();
InitCUDAPinnedAllocator();
WrapZeroSizeAllocator();
break;
}
default: {
PADDLE_THROW("Unsupported allocator strategy: %d",
static_cast<int>(strategy));
}
} }
} }
...@@ -254,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() { ...@@ -254,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) { const platform::Place& place, size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Alloc(place, size, attr).release(), return std::shared_ptr<Allocation>(Alloc(place, size, attr));
AllocationDeleter());
} }
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,
......
...@@ -19,16 +19,22 @@ ...@@ -19,16 +19,22 @@
DEFINE_string( DEFINE_string(
allocator_strategy, "legacy", allocator_strategy, "legacy",
"The allocation strategy. Legacy means the original allocator of Fluid." "The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]"); "naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit].");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
static AllocatorStrategy GetStrategyFromFlag() { static AllocatorStrategy GetStrategyFromFlag() {
return FLAGS_allocator_strategy == "legacy" if (FLAGS_allocator_strategy == "legacy") {
? AllocatorStrategy::kLegacy return AllocatorStrategy::kLegacy;
: AllocatorStrategy::kNaiveBestFit; } else if (FLAGS_allocator_strategy == "naive_best_fit") {
return AllocatorStrategy::kNaiveBestFit;
} else {
PADDLE_THROW("Unsupported allocator strategy: %s",
FLAGS_allocator_strategy);
}
} }
AllocatorStrategy GetAllocatorStrategy() { AllocatorStrategy GetAllocatorStrategy() {
......
...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { ...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
} }
return num; return num;
} }
void BestFitAllocator::Free(Allocation* allocation) { void BestFitAllocator::FreeImpl(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation, PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation."); "The input allocation is not BestFitAllocation.");
......
...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { ...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it); void InsertFreeNode(const ListIt& it);
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -22,11 +22,11 @@ namespace paddle { ...@@ -22,11 +22,11 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator) BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator)
: underlying_allocator_(std::move(allocator)) { : underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_, underlying_allocator_,
"Underlying allocator of BufferedAllocator must be unmanaged"); "Underlying allocator of BufferedAllocator must not be null");
if (underlying_allocator_->IsAllocThreadSafe()) { if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { ...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end(); auto it = --allocations_.end();
cur += it->second->size(); cur += it->second->size();
delete it->second.release(); underlying_allocator_->Free(it->second.release());
allocations_.erase(it); allocations_.erase(it);
if (cur >= size) return; if (cur >= size) return;
} }
} }
bool BufferedAllocator::IsAllocThreadSafe() const { bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
return this->underlying_allocator_->IsAllocThreadSafe();
} void BufferedAllocator::FreeImpl(Allocation *allocation) {
void BufferedAllocator::Free(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(), AllocationPtr(allocation));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) { if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second)); AllocationPtr result(std::move(it->second));
allocations_.erase(it); allocations_.erase(it);
return new AllocationWithUnderlying(std::move(result)); return result.release();
} }
} }
try { try {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} catch (BadAlloc &) { } catch (BadAlloc &) {
FreeCache(size); FreeCache(size);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} }
......
...@@ -31,7 +31,7 @@ namespace allocation { ...@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_ // underlying_allocator_
class BufferedAllocator : public Allocator { class BufferedAllocator : public Allocator {
public: public:
explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator); explicit BufferedAllocator(std::shared_ptr<Allocator> allocator);
~BufferedAllocator(); ~BufferedAllocator();
...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { ...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size); void FreeCache(size_t size);
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_; std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h" #include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory>
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
...@@ -66,7 +65,7 @@ class StubAllocator : public Allocator { ...@@ -66,7 +65,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; } size_t GetFreeCount() const { return destruct_count_; }
protected: protected:
void Free(Allocation *allocation) override { void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation); auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc); PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr()); if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
...@@ -20,25 +20,27 @@ namespace paddle { ...@@ -20,25 +20,27 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
CPUAllocation::CPUAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CPUPlace()) {}
bool CPUAllocator::IsAllocThreadSafe() const { return true; } bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::Free(Allocation *allocation) { void CPUAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation)); void *p = allocation->ptr();
free(allocation->ptr()); #ifdef _WIN32
_aligned_free(p);
#else
free(p);
#endif
delete allocation; delete allocation;
} }
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *ptr; void *p;
auto status = posix_memalign(&ptr, kAlignment, size); #ifdef _WIN32
if (UNLIKELY(status) != 0) { p = _aligned_malloc(size, kAlignment);
throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d", #else
size, status)); PADDLE_ENFORCE_EQ(posix_memalign(&p, kAlignment, size), 0, "Alloc %ld error!",
} size);
return new CPUAllocation(ptr, size); #endif
return new Allocation(p, size, platform::CPUPlace());
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -31,19 +31,13 @@ namespace allocation { ...@@ -31,19 +31,13 @@ namespace allocation {
// //
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle. // an open-sourced allocator into Paddle.
class CPUAllocator;
class CPUAllocation : public Allocation {
public:
CPUAllocation(void* ptr, size_t size);
};
class CPUAllocator : public Allocator { class CPUAllocator : public Allocator {
public: public:
constexpr static size_t kAlignment = 64u; constexpr static size_t kAlignment = 4096UL;
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -23,15 +23,14 @@ namespace paddle { ...@@ -23,15 +23,14 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; } bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::Free(Allocation* allocation) { void CUDAAllocator::FreeImpl(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation); PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(allocation->place()),
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_); place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr())); PADDLE_ENFORCE(cudaFree(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
void* ptr; void* ptr;
...@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status))); status, cudaGetErrorString(status)));
} }
return new CUDAAllocation(ptr, size, platform::Place(place_)); return new Allocation(ptr, size, platform::Place(place_));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -20,13 +20,6 @@ namespace paddle { ...@@ -20,13 +20,6 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
// CUDA System allocator and allocation.
// Just a flag type.
class CUDAAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class CUDAAllocator : public Allocator { class CUDAAllocator : public Allocator {
public: public:
explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {} explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
...@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator { ...@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -347,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -347,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return tmp_alloc; return tmp_alloc;
} }
void LegacyAllocator::Free(Allocation *allocation) { void LegacyAllocator::FreeImpl(Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
......
...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { ...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
private: private:
platform::Place place_; platform::Place place_;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/lock_guard_ptr.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
...@@ -24,26 +25,24 @@ namespace allocation { ...@@ -24,26 +25,24 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; } bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator( LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator) std::shared_ptr<Allocator> underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) { : underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) { if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
} }
void LockedAllocator::Free(Allocation *allocation) {
{ void LockedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
reinterpret_cast<AllocationWithUnderlying *>(allocation) underlying_allocator_->Free(allocation);
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
} }
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,15 +24,15 @@ namespace allocation { ...@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe. // A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator { class LockedAllocator : public Allocator {
public: public:
explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator); explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_string(allocator_strategy);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
FLAGS_allocator_strategy = "naive_best_fit";
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), size);
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), size);
}
#endif
}
} // namespace allocation
} // namespace memory
} // namespace paddle
...@@ -20,20 +20,15 @@ namespace paddle { ...@@ -20,20 +20,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::Free(Allocation *allocation) { void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
Allocator::Attr attr) { Allocator::Attr attr) {
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void *ptr; void *ptr;
PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
return new CPUPinnedAllocation(ptr, size); return new Allocation(ptr, size, platform::CUDAPinnedPlace());
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -20,18 +20,12 @@ namespace memory { ...@@ -20,18 +20,12 @@ namespace memory {
namespace allocation { namespace allocation {
// Allocator uses `cudaHostAlloc` // Allocator uses `cudaHostAlloc`
class CPUPinnedAllocation : public Allocation {
public:
CPUPinnedAllocation(void *ptr, size_t size)
: Allocation(ptr, size, platform::CUDAPinnedPlace()) {}
};
class CPUPinnedAllocator : public Allocator { class CPUPinnedAllocator : public Allocator {
public: public:
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
......
...@@ -18,25 +18,15 @@ namespace paddle { ...@@ -18,25 +18,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool RetryAllocator::IsAllocThreadSafe() const { void RetryAllocator::FreeImpl(Allocation* allocation) {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset(); underlying_allocator_->Free(allocation);
{ cv_.notify_all();
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
} }
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
}; };
// In fact, we can unify the code of allocation success and failure // In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
......
...@@ -25,32 +25,25 @@ namespace paddle { ...@@ -25,32 +25,25 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator { class RetryAllocator : public Allocator {
public: public:
RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms) RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_.get(), underlying_allocator_,
"UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); "UnderlyingAllocator of RetryAllocator must not be null");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"); "UnderlyingAllocator of RetryAllocator must be thread-safe");
} }
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_; std::chrono::milliseconds retry_time_;
std::mutex mutex_; std::mutex mutex_;
std::condition_variable cv_; std::condition_variable cv_;
...@@ -58,8 +51,6 @@ class RetryAllocator : public Allocator { ...@@ -58,8 +51,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are // For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate // waited to allocate
// std::atomic<size_t> waited_allocate_size_{0}; // std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { ...@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) { if (size == 0) {
return new ZeroSizeAllocation(place_); return new Allocation(nullptr, 0, place_);
} else { } else {
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size, attr).release();
} }
} }
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (allocation->size() == 0) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,12 +24,6 @@ namespace allocation { ...@@ -24,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always // The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the // return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr // allocation.ptr() is nullptr
class ZeroSizeAllocation : public Allocation {
public:
explicit ZeroSizeAllocation(const platform::Place& p)
: Allocation(nullptr, 0, p) {}
};
class ZeroSizeAllocator : public Allocator { class ZeroSizeAllocator : public Allocator {
public: public:
ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator, ZeroSizeAllocator(std::shared_ptr<Allocator> underlying_allocator,
...@@ -40,6 +34,7 @@ class ZeroSizeAllocator : public Allocator { ...@@ -40,6 +34,7 @@ class ZeroSizeAllocator : public Allocator {
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "paddle/fluid/platform/temporary_allocator.h" #include "paddle/fluid/platform/temporary_allocator.h"
#include <memory> #include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64(limit_of_tmp_allocation, -1, DEFINE_int64(limit_of_tmp_allocation, -1,
...@@ -31,38 +30,31 @@ namespace paddle { ...@@ -31,38 +30,31 @@ namespace paddle {
namespace platform { namespace platform {
namespace alloc = memory::allocation; namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
} }
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) { void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations; std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations;
{ {
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
callback(); callback();
t_allocations.swap(temp_mem_map_); t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
wait_delete_mem_ = 0; wait_delete_mem_ = 0;
} }
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) { for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size(); << " size: " << tmp.second->size();
delete tmp.second; deleter(tmp.second);
} }
} }
void TemporaryAllocator::Free(alloc::Allocation *allocation) { void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
if (platform::is_gpu_place(temp_allocation->place())) { if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same."); "The place should be the same.");
...@@ -86,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { ...@@ -86,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
} }
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size(); << " size: " << temp_allocation->size();
delete temp_allocation; alloc::AllocationDeleter()(temp_allocation);
} }
size_t TemporaryAllocator::TemporaryAllocationQueueSize() { size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
...@@ -121,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( ...@@ -121,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
} }
// If not find the the available allocation, get allocation from // If not find the the available allocation, get allocation from
// AllocatorFacadeInstance. // AllocatorFacadeInstance.
auto raw_allocation = auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem; return temp_mem.release();
} }
} // namespace platform } // namespace platform
......
...@@ -23,14 +23,6 @@ ...@@ -23,14 +23,6 @@
namespace paddle { namespace paddle {
namespace platform { namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation /*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation. * which used by CUDA's async operation.
* *
...@@ -57,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -57,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback); void SetCallback(const std::function<void()> &callback);
protected: protected:
void Free(memory::allocation::Allocation *allocation) override; void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl( memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override; size_t size, memory::allocation::Allocator::Attr attr) override;
...@@ -66,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -66,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_; platform::Place place_;
// When the allocation is not held by any variable, it should be placed // When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately. // to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{ std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>>
nullptr}; temp_mem_map_{nullptr};
std::mutex mtx_; std::mutex mtx_;
size_t wait_delete_mem_{0}; size_t wait_delete_mem_{0};
std::function<void()> callback_; std::function<void()> callback_;
......
...@@ -357,6 +357,7 @@ PYBIND11_MODULE(core, m) { ...@@ -357,6 +357,7 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
......
...@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) { ...@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf(std::cout, fmt, args...); Fprintf(std::cout, fmt, args...);
} }
template <typename T> inline std::string HumanReadableSize(double f_size) {
std::string HumanReadableSize(T size) {
size_t i = 0; size_t i = 0;
double f_size = static_cast<double>(size);
double orig = f_size; double orig = f_size;
const std::vector<std::string> units( const std::vector<std::string> units(
{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}); {"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"});
while (f_size > 1024) { while (f_size >= 1024) {
f_size /= 1024; f_size /= 1024;
i++; i++;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册