提交 64d94596 编写于 作者: S sneaxiy

fix allocator_facade bug

上级 e2780623
...@@ -74,10 +74,24 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -74,10 +74,24 @@ class CUDAManagedAllocator : public ManagedAllocator {
explicit CUDAManagedAllocator(int dev_id) { explicit CUDAManagedAllocator(int dev_id) {
platform::CUDADeviceGuard guard(dev_id); platform::CUDADeviceGuard guard(dev_id);
max_chunk_size_ = platform::GpuMaxChunkSize(); max_chunk_size_ = platform::GpuMaxChunkSize();
raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>( raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new CUDAAllocator(platform::CUDAPlace(dev_id)))); new CUDAAllocator(platform::CUDAPlace(dev_id))));
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(BestFitAllocatorCreator()); }); if (max_chunk_size_ == 0) {
default_allocator_ = raw_allocator_;
} else {
size_t available, total;
platform::GpuMemoryUsage(&available, &total);
size_t capacity = available / max_chunk_size_;
if (capacity == 1) {
default_allocator_ = BestFitAllocatorCreator();
} else {
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(BestFitAllocatorCreator()); }, capacity);
}
}
auto* cond_allocator = new ConditionalAllocator(); auto* cond_allocator = new ConditionalAllocator();
cond_allocator cond_allocator
...@@ -110,9 +124,11 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -110,9 +124,11 @@ class CUDAManagedAllocator : public ManagedAllocator {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
return std::make_shared<AlignedAllocator<64u>>( return std::make_shared<AlignedAllocator<64u>>(
NaiveManagedAllocator::Create( NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation)))); new LockedAllocator(std::unique_ptr<Allocator>(
new BestFitAllocator(allocation))))));
} }
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
private: private:
......
...@@ -40,13 +40,18 @@ namespace allocation { ...@@ -40,13 +40,18 @@ namespace allocation {
// allocator. The allocation requests from many threads may be dispatched // allocator. The allocation requests from many threads may be dispatched
// to the same underlying allocator. So the underlying allocator must be // to the same underlying allocator. So the underlying allocator must be
// thread safe. // thread safe.
//
// NOTE(zjl): Add capacity parameters to constructor. A high-performance
// thread-safe std::vector with varying size is hard to implement.
// Fortunately, we can get the total GPU memory and each chunk size.
// Therefore, we can get the suitable capacity of AutoIncrementAllocator.
class AutoIncrementAllocator : public ManagedAllocator { class AutoIncrementAllocator : public ManagedAllocator {
public: public:
// Creator is the method to create ManagedAllocator // Creator is the method to create ManagedAllocator
using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>; using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>;
explicit AutoIncrementAllocator(AllocatorCreator&& creator) explicit AutoIncrementAllocator(AllocatorCreator&& creator, size_t capacity)
: creator_(std::move(creator)), prev_success_allocator_{0} {} : creator_(std::move(creator)), underlying_allocators_(capacity) {}
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override; std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override;
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override; std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
...@@ -56,15 +61,13 @@ class AutoIncrementAllocator : public ManagedAllocator { ...@@ -56,15 +61,13 @@ class AutoIncrementAllocator : public ManagedAllocator {
template <typename Callback> template <typename Callback>
inline typename std::result_of<Callback(ManagedAllocator&)>::type inline typename std::result_of<Callback(ManagedAllocator&)>::type
InvokeOrCreateUnderlyingAllocator(Callback callback) { InvokeOrCreateUnderlyingAllocator(Callback callback) {
std::shared_ptr<std::vector<AllocatorCreator::result_type>>
underlying_allocators = underlying_allocators_;
size_t retry_count = underlying_allocators->size();
size_t allocator_num = retry_count;
auto cur = prev_success_allocator_.load(); auto cur = prev_success_allocator_.load();
size_t retry_count = allocator_num_.load();
size_t allocator_num = retry_count;
while (retry_count-- > 0) { // until there retry count is zero while (retry_count-- > 0) { // until there retry count is zero
try { try {
auto res = callback(*((*underlying_allocators)[cur])); auto res = callback(*underlying_allocators_[cur]);
prev_success_allocator_.store(cur); prev_success_allocator_ = cur;
return std::move(res); return std::move(res);
} catch (BadAlloc&) { } catch (BadAlloc&) {
if (++cur >= allocator_num) { if (++cur >= allocator_num) {
...@@ -77,20 +80,34 @@ class AutoIncrementAllocator : public ManagedAllocator { ...@@ -77,20 +80,34 @@ class AutoIncrementAllocator : public ManagedAllocator {
} }
// No suitable allocator // No suitable allocator
// This happens when the first allocator is exhausted and
// there are more than 1 allocation requests
// In this situation, the first allocation request would success
// and the second allocation request would fail if we do not use
// the newly created allocator by the first allocation request.
for (size_t new_allocator_num = allocator_num_.load();
allocator_num < new_allocator_num; ++allocator_num) {
try {
auto ret = callback(*underlying_allocators_[allocator_num]);
prev_success_allocator_ = allocator_num;
return std::move(ret);
} catch (BadAlloc&) {
} catch (...) {
std::rethrow_exception(std::current_exception());
}
}
ManagedAllocator* new_allocator; ManagedAllocator* new_allocator;
{ {
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
auto old_size = underlying_allocators_->size(); auto old_size = allocator_num_.load();
decltype(underlying_allocators_) new_allocators( PADDLE_ENFORCE_LT(old_size, underlying_allocators_.size(),
new std::vector<AllocatorCreator::result_type>(old_size + 1)); "Allocator number exceeds capacity %d",
for (size_t i = 0; i < old_size; ++i) { underlying_allocators_.size());
(*new_allocators)[i] = (*underlying_allocators_)[i]; underlying_allocators_[old_size] = creator_();
} new_allocator = underlying_allocators_[old_size].get();
prev_success_allocator_ = old_size;
(*new_allocators)[old_size] = creator_(); allocator_num_.fetch_add(1);
new_allocator = (*new_allocators)[old_size].get();
underlying_allocators_ = new_allocators;
prev_success_allocator_.store(old_size);
} }
PADDLE_ENFORCE( PADDLE_ENFORCE(
...@@ -102,9 +119,8 @@ class AutoIncrementAllocator : public ManagedAllocator { ...@@ -102,9 +119,8 @@ class AutoIncrementAllocator : public ManagedAllocator {
AllocatorCreator creator_; AllocatorCreator creator_;
// Use std::shared_ptr to ensure thread-safety std::vector<AllocatorCreator::result_type> underlying_allocators_;
std::shared_ptr<std::vector<AllocatorCreator::result_type>> std::atomic<size_t> allocator_num_{0};
underlying_allocators_;
// Use std::atomic rather than std::mutex, since std::atomic is usually // Use std::atomic rather than std::mutex, since std::atomic is usually
// lock-free // lock-free
......
...@@ -26,10 +26,11 @@ static int HighestBitPos(size_t N) { ...@@ -26,10 +26,11 @@ static int HighestBitPos(size_t N) {
if (UNLIKELY(N == 0)) { if (UNLIKELY(N == 0)) {
return 0; return 0;
} else { } else {
// NOTE: here we can use __builtin_clz in GCC. #ifdef __GNUC__
// However, let's use std::log2 for better readability return sizeof(unsigned int) * 8 - __builtin_clz(N);
// and trust std::log2's performance. #else
return static_cast<int>(std::log2(N) + 1); return static_cast<int>(std::log2(N) + 1);
#endif
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册