提交 2a639d5c 编写于 作者: S sneaxiy

add allocator chain to fix bug

test=develop
上级 6d8771b5
...@@ -404,9 +404,6 @@ class ExecutionContext { ...@@ -404,9 +404,6 @@ class ExecutionContext {
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>( auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter); allocation_ptr, deleter);
PADDLE_ENFORCE(
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
"The AllocationPtr must be TemporaryAllocation.");
PADDLE_ENFORCE_GE(allocation_ptr->size(), PADDLE_ENFORCE_GE(allocation_ptr->size(),
framework::product(dim) * sizeof(T)); framework::product(dim) * sizeof(T));
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <array>
#include <deque>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class SmallStack {
static_assert(N > 0, "N must be larger than 0");
public:
inline void push(const T& item) {
if (size_ < N) {
head_[size_] = item;
} else {
tail_.emplace_back(item);
}
++size_;
}
inline void pop() {
PADDLE_ENFORCE(!empty(), "Try to pop element from empty stack.");
if (size_ > N) {
tail_.pop_back();
}
--size_;
}
inline const T& top() const {
PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline T& top() {
PADDLE_ENFORCE(!empty(), "Try to get top element of empty stack.");
return size_ <= N ? head_[size_ - 1] : tail_.back();
}
inline bool empty() const { return size_ == 0; }
inline size_t size() const { return size_; }
// This API can only be used in unittest
T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }
const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}
private:
T head_[N];
std::deque<T> tail_;
size_t size_;
};
} // namespace framework
} // namespace paddle
...@@ -3,8 +3,11 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) ...@@ -3,8 +3,11 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
cc_test(multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator)
if (WITH_GPU) if (WITH_GPU)
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard) nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
...@@ -53,6 +56,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS ...@@ -53,6 +56,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
conditional_allocator conditional_allocator
retry_allocator retry_allocator
buffered_allocator buffered_allocator
multi_bin_buffered_allocator
allocator_strategy allocator_strategy
legacy_allocator legacy_allocator
) )
......
...@@ -93,6 +93,8 @@ class AlignedAllocator : public ThinAlignedAllocator { ...@@ -93,6 +93,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_->Allocate(size + kAlignment, attr); underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size); return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
} }
void FreeImpl(Allocation* allocation) override { delete allocation; }
}; };
} // namespace allocation } // namespace allocation
......
...@@ -26,17 +26,28 @@ Allocator::~Allocator() {} ...@@ -26,17 +26,28 @@ Allocator::~Allocator() {}
bool Allocator::IsAllocThreadSafe() const { return false; } bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) { AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
VLOG(2) << "Alloc allocation on " << typeid(*this).name();
auto ptr = AllocateImpl(size, attr); auto ptr = AllocateImpl(size, attr);
ptr->set_allocator(this); ptr->RegisterAllocatorChain(this);
VLOG(2) << "Alloc success";
return AllocationPtr(ptr); return AllocationPtr(ptr);
} }
void Allocator::Free(Allocation* allocation) { delete allocation; } void Allocator::FreeImpl(Allocation* allocation) {
auto* allocator = allocation->TopAllocator();
allocator->Free(allocation);
}
void Allocator::Free(Allocation* allocation) {
VLOG(2) << "Free allocation on " << typeid(*this).name();
allocation->PopAllocator();
FreeImpl(allocation);
}
const char* BadAlloc::what() const noexcept { return msg_.c_str(); } const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void AllocationDeleter::operator()(Allocation* allocation) const { void AllocationDeleter::operator()(Allocation* allocation) const {
auto* allocator = allocation->allocator(); auto* allocator = allocation->TopAllocator();
allocator->Free(allocation); allocator->Free(allocation);
} }
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector>
#include "paddle/fluid/framework/small_stack.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
...@@ -47,10 +49,12 @@ class Allocator; ...@@ -47,10 +49,12 @@ class Allocator;
class Allocation { class Allocation {
public: public:
Allocation(void* ptr, size_t size, platform::Place place) Allocation(void* ptr, size_t size, platform::Place place)
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {} : ptr_(ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete; Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer. // Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method // NOTE: For performance consideration, it is better not to make this method
...@@ -72,17 +76,34 @@ class Allocation { ...@@ -72,17 +76,34 @@ class Allocation {
const platform::Place& place() const { return place_; } const platform::Place& place() const { return place_; }
Allocator* allocator() { return allocator_; } virtual ~Allocation();
void set_allocator(Allocator* allocator) { allocator_ = allocator; } // This function should only be used in unittest
std::vector<Allocator*> GetAllocatorChain() const {
std::vector<Allocator*> allocators;
for (size_t i = 0; i < allocator_chain_.size(); ++i) {
allocators[i] = allocator_chain_[i];
}
return allocators;
}
virtual ~Allocation(); private:
inline void RegisterAllocatorChain(Allocator* allocator) {
allocator_chain_.push(allocator);
}
inline void PopAllocator() { allocator_chain_.pop(); }
inline Allocator* TopAllocator() { return allocator_chain_.top(); }
private: private:
Allocator* allocator_;
void* ptr_; void* ptr_;
size_t size_; size_t size_;
platform::Place place_; platform::Place place_;
framework::SmallStack<Allocator*, 8> allocator_chain_;
friend class Allocator;
friend class AllocationDeleter;
}; };
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>; using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
...@@ -132,9 +153,12 @@ class Allocator { ...@@ -132,9 +153,12 @@ class Allocator {
// True if the `Allocate` is thread safe. // True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
// This function should not be called outside
void Free(Allocation* allocation);
protected: protected:
virtual void Free(Allocation* allocation);
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
virtual void FreeImpl(Allocation* allocation);
private: private:
friend class AllocationDeleter; friend class AllocationDeleter;
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
...@@ -43,6 +44,8 @@ DEFINE_int64( ...@@ -43,6 +44,8 @@ DEFINE_int64(
"The retry time (milliseconds) when allocator fails " "The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"); "to allocate memory. No retry if this value is not greater than 0");
DEFINE_bool(enable_buffered_allocator, false, "Enable buffered_allocator");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
...@@ -110,8 +113,8 @@ class ChunkedAllocator : public Allocator { ...@@ -110,8 +113,8 @@ class ChunkedAllocator : public Allocator {
std::shared_ptr<Allocator> CreateAllocatorWithChunk() { std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> allocator(new LockedAllocator( std::shared_ptr<Allocator> allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation)))); std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
if (retry_time_ > 0) { if (retry_time_ > 0) {
auto* retry_allocator = auto* retry_allocator =
...@@ -119,6 +122,10 @@ class ChunkedAllocator : public Allocator { ...@@ -119,6 +122,10 @@ class ChunkedAllocator : public Allocator {
allocator.reset(retry_allocator); allocator.reset(retry_allocator);
} }
if (FLAGS_enable_buffered_allocator) {
allocator.reset(new MultiBinBufferedAllocator(allocator));
}
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator)); return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
} }
......
...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const { ...@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
} }
return num; return num;
} }
void BestFitAllocator::Free(Allocation* allocation) { void BestFitAllocator::FreeImpl(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(bf_allocation, PADDLE_ENFORCE_NOT_NULL(bf_allocation,
"The input allocation is not BestFitAllocation."); "The input allocation is not BestFitAllocation.");
......
...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator { ...@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it); void InsertFreeNode(const ListIt& it);
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -22,11 +22,11 @@ namespace paddle { ...@@ -22,11 +22,11 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
BufferedAllocator::BufferedAllocator(std::unique_ptr<Allocator> &&allocator) BufferedAllocator::BufferedAllocator(std::shared_ptr<Allocator> allocator)
: underlying_allocator_(std::move(allocator)) { : underlying_allocator_(std::move(allocator)) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_, underlying_allocator_,
"Underlying allocator of BufferedAllocator must be unmanaged"); "Underlying allocator of BufferedAllocator must not be null");
if (underlying_allocator_->IsAllocThreadSafe()) { if (underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) { ...@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while (!allocations_.empty()) { // free the largest while (!allocations_.empty()) { // free the largest
auto it = --allocations_.end(); auto it = --allocations_.end();
cur += it->second->size(); cur += it->second->size();
delete it->second.release(); underlying_allocator_->Free(it->second.release());
allocations_.erase(it); allocations_.erase(it);
if (cur >= size) return; if (cur >= size) return;
} }
} }
bool BufferedAllocator::IsAllocThreadSafe() const { bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
return this->underlying_allocator_->IsAllocThreadSafe();
} void BufferedAllocator::FreeImpl(Allocation *allocation) {
void BufferedAllocator::Free(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(), AllocationPtr(allocation));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (it != allocations_.end() && it->first < size * 2) { if (it != allocations_.end() && it->first < size * 2) {
AllocationPtr result(std::move(it->second)); AllocationPtr result(std::move(it->second));
allocations_.erase(it); allocations_.erase(it);
return new AllocationWithUnderlying(std::move(result)); return result.release();
} }
} }
try { try {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} catch (BadAlloc &) { } catch (BadAlloc &) {
FreeCache(size); FreeCache(size);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} }
......
...@@ -31,7 +31,7 @@ namespace allocation { ...@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_ // underlying_allocator_
class BufferedAllocator : public Allocator { class BufferedAllocator : public Allocator {
public: public:
explicit BufferedAllocator(std::unique_ptr<Allocator> &&allocator); explicit BufferedAllocator(std::shared_ptr<Allocator> allocator);
~BufferedAllocator(); ~BufferedAllocator();
...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator { ...@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size); void FreeCache(size_t size);
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::multimap<size_t, AllocationPtr> allocations_; std::multimap<size_t, AllocationPtr> allocations_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
...@@ -64,7 +64,7 @@ class StubAllocator : public Allocator { ...@@ -64,7 +64,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; } size_t GetFreeCount() const { return destruct_count_; }
protected: protected:
void Free(Allocation *allocation) override { void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation); auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc); PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr()); if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
......
...@@ -25,7 +25,7 @@ CPUAllocation::CPUAllocation(void *ptr, size_t size) ...@@ -25,7 +25,7 @@ CPUAllocation::CPUAllocation(void *ptr, size_t size)
bool CPUAllocator::IsAllocThreadSafe() const { return true; } bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::Free(Allocation *allocation) { void CPUAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation)); PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation));
free(allocation->ptr()); free(allocation->ptr());
delete allocation; delete allocation;
......
...@@ -43,7 +43,7 @@ class CPUAllocator : public Allocator { ...@@ -43,7 +43,7 @@ class CPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -23,13 +23,14 @@ namespace paddle { ...@@ -23,13 +23,14 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; } bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::Free(Allocation* allocation) { void CUDAAllocator::FreeImpl(Allocation* allocation) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation); auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(cuda_allocation); PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()), PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_); place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr())); PADDLE_ENFORCE(cudaFree(allocation->ptr()));
VLOG(2) << "cudaFree is called";
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
......
...@@ -35,7 +35,7 @@ class CUDAAllocator : public Allocator { ...@@ -35,7 +35,7 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
......
...@@ -336,7 +336,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -336,7 +336,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return new Allocation(ptr, size, place_); return new Allocation(ptr, size, place_);
} }
void LegacyAllocator::Free(Allocation *allocation) { void LegacyAllocator::FreeImpl(Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
......
...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator { ...@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
private: private:
platform::Place place_; platform::Place place_;
......
...@@ -23,26 +23,24 @@ namespace allocation { ...@@ -23,26 +23,24 @@ namespace allocation {
bool LockedAllocator::IsAllocThreadSafe() const { return true; } bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator( LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator) std::shared_ptr<Allocator> underlying_allocator)
: underlying_allocator_(std::move(underlying_allocator)) { : underlying_allocator_(std::move(underlying_allocator)) {
PADDLE_ENFORCE_NOT_NULL(underlying_allocator_); PADDLE_ENFORCE_NOT_NULL(underlying_allocator_);
if (!underlying_allocator_->IsAllocThreadSafe()) { if (!underlying_allocator_->IsAllocThreadSafe()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
} }
void LockedAllocator::Free(Allocation *allocation) {
{ void LockedAllocator::FreeImpl(Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
reinterpret_cast<AllocationWithUnderlying *>(allocation) underlying_allocator_->Free(allocation);
->allocation_.reset(); // Destroy inner allocation
}
delete allocation;
} }
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
} }
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -24,15 +24,15 @@ namespace allocation { ...@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe. // A allocator to make underlying allocator thread safe.
class LockedAllocator : public Allocator { class LockedAllocator : public Allocator {
public: public:
explicit LockedAllocator(std::unique_ptr<Allocator> &&underlying_allocator); explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <algorithm>
#include <limits>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double(tolerant_times, 2,
"Tolerant memory size times of buffered_allocator");
namespace paddle {
namespace memory {
namespace allocation {
static void CheckAndModifyMemoryDivisionPlan(
std::vector<size_t> *division_plan) {
// Check whether the division plan is strictly sorted
bool is_strictly_sorted = true;
for (size_t i = 1; i < division_plan->size(); ++i) {
if ((*division_plan)[i - 1] >= (*division_plan)[i]) {
is_strictly_sorted = false;
break;
}
}
PADDLE_ENFORCE(is_strictly_sorted, "Divison plan must be stricted sorted");
// Insert 0 and remove MAX to disivion plan for clean binary searching code
if (division_plan->empty() || division_plan->front() != 0) {
division_plan->insert(division_plan->begin(), 0);
}
constexpr auto kSizeTypeMax = std::numeric_limits<size_t>::max();
if (division_plan->back() == kSizeTypeMax) {
division_plan->pop_back();
}
PADDLE_ENFORCE(division_plan->size() >= 1, "Division plan cannot be empty");
}
static std::vector<size_t> GetDefaultDivisionPlan() {
std::vector<size_t> plan;
for (size_t i = 0; i < sizeof(size_t) * 8; ++i) {
plan.push_back(static_cast<size_t>(1) << i);
}
return plan;
}
inline static size_t FindDivisionPlanBinIndex(const std::vector<size_t> &bins,
size_t size) {
return static_cast<size_t>(std::upper_bound(bins.begin(), bins.end(), size) -
bins.begin() - 1);
}
inline static size_t TolerantUpperSize(size_t size) {
return static_cast<size_t>(size * FLAGS_tolerant_times);
}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator)
: MultiBinBufferedAllocator(std::move(underlying_allocator),
GetDefaultDivisionPlan()) {}
MultiBinBufferedAllocator::MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t> &division_plan)
: underlying_allocator_(std::move(underlying_allocator)),
division_plan_(division_plan) {
CheckAndModifyMemoryDivisionPlan(&division_plan_);
allocations_.resize(division_plan_.size());
mtx_.resize(division_plan_.size());
if (underlying_allocator_->IsAllocThreadSafe()) {
for (auto &mtx : mtx_) {
mtx.reset(new std::mutex());
}
}
VLOG(1) << "FLAGS_tolerant_times = " << FLAGS_tolerant_times;
}
void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, allocation->size());
{
platform::LockGuardPtr<std::mutex> guard(mtx_[bin_index]);
allocations_[bin_index].emplace(allocation->size(),
AllocationPtr(allocation));
}
}
void MultiBinBufferedAllocator::FreeCache(size_t size, size_t bin_index) {
size_t accumulated_size = 0;
// FIXME(zjl): free the largest first when there is no extra
for (size_t i = allocations_.size() - 1; i != static_cast<size_t>(-1); --i) {
platform::LockGuardPtr<std::mutex> lock(mtx_[i]);
if (allocations_[i].empty()) continue;
auto it = --allocations_[i].end();
do {
accumulated_size += it->second->size();
underlying_allocator_->Free(it->second.release());
allocations_[i].erase(it--);
if (accumulated_size >= size) {
return;
}
} while (!allocations_[i].empty());
}
}
Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
auto bin_index = FindDivisionPlanBinIndex(division_plan_, size);
auto upper_size = TolerantUpperSize(size);
for (; upper_size >= division_plan_[bin_index]; ++bin_index) {
auto &allocation = allocations_[bin_index];
platform::LockGuardPtr<std::mutex> lock(mtx_[bin_index]);
auto it = allocation.lower_bound(size);
if (it != allocation.end() && it->second->size() < upper_size) {
auto ret = std::move(it->second);
allocation.erase(it);
return ret.release();
}
}
try {
return underlying_allocator_->Allocate(size, attr).release();
} catch (BadAlloc &) {
VLOG(2) << "BadAlloc raises, try to free " << size << " caches";
FreeCache(size, bin_index);
return underlying_allocator_->Allocate(size, attr).release();
}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class MultiBinBufferedAllocator : public Allocator {
public:
explicit MultiBinBufferedAllocator(
std::shared_ptr<Allocator> underlying_allocator);
MultiBinBufferedAllocator(std::shared_ptr<Allocator> underlying_allocator,
const std::vector<size_t>& division_plan);
bool IsAllocThreadSafe() const override { return mtx_.front() != nullptr; }
void ClearCache() { FreeCache(static_cast<size_t>(-1), 0); }
protected:
Allocation* AllocateImpl(size_t size, Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private:
void FreeCache(size_t size, size_t bin_index);
std::shared_ptr<Allocator> underlying_allocator_;
std::vector<std::multimap<size_t, AllocationPtr>> allocations_;
std::vector<size_t> division_plan_;
std::vector<std::unique_ptr<std::mutex>> mtx_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
inline std::shared_ptr<MultiBinBufferedAllocator> GetBufferedAllocator(
Allocation *allocation, bool thread_safe) {
std::shared_ptr<Allocator> allocator(new BestFitAllocator(allocation));
if (thread_safe) {
allocator.reset(new LockedAllocator(std::move(allocator)));
}
return std::make_shared<MultiBinBufferedAllocator>(allocator);
}
TEST(buffered_allocator, thread_safety) {
std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
auto chunk = allocator->Allocate(1 << 20, allocator->kDefault);
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
}
{
auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
}
}
class StubAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class StubAllocator : public Allocator {
public:
void ResetCounter() {
construct_count_ = 0;
destruct_count_ = 0;
}
size_t GetAllocCount() const { return construct_count_; }
size_t GetFreeCount() const { return destruct_count_; }
protected:
void FreeImpl(Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(alloc);
if (alloc->ptr()) delete[] static_cast<uint8_t *>(alloc->ptr());
++destruct_count_;
delete allocation;
}
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override {
++construct_count_;
if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace());
} else {
return new StubAllocation(new uint8_t[size], size, platform::CPUPlace());
}
}
private:
size_t construct_count_ = 0;
size_t destruct_count_ = 0;
};
constexpr size_t kZero = 0;
constexpr size_t kOne = 1;
constexpr size_t kTwo = 2;
TEST(buffered_allocator, lazy_free) {
std::vector<int> original_alloc_size({1022, 1023, 1024, 1025, 1026});
for (auto alloc_size : original_alloc_size) {
auto stub_allocator = std::make_shared<StubAllocator>();
auto *underlying_allocator = stub_allocator.get();
auto allocator =
std::make_shared<MultiBinBufferedAllocator>(stub_allocator);
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(alloc_size, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
auto x = allocator->Allocate(900, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
auto y = allocator->Allocate(2048, allocator->kDefault);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
y = nullptr;
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
}
{
underlying_allocator->ResetCounter();
allocator->ClearCache();
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kTwo);
}
}
}
TEST(buffered_allocator, garbage_collection) {
std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault);
auto allocator = GetBufferedAllocator(chunk.get(), false);
auto x1 = allocator->Allocate(1600, allocator->kDefault);
auto x2 = allocator->Allocate(400, allocator->kDefault);
x1 = nullptr;
x2 = nullptr;
auto x3 = allocator->Allocate(1600, allocator->kDefault);
ASSERT_NE(x3, nullptr);
ASSERT_NE(x3->ptr(), nullptr);
}
} // namespace allocation
} // namespace memory
} // namespace paddle
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::Free(Allocation *allocation) { void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation)); PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation; delete allocation;
......
...@@ -31,7 +31,7 @@ class CPUPinnedAllocator : public Allocator { ...@@ -31,7 +31,7 @@ class CPUPinnedAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void Free(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override;
}; };
......
...@@ -18,25 +18,15 @@ namespace paddle { ...@@ -18,25 +18,15 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool RetryAllocator::IsAllocThreadSafe() const { void RetryAllocator::FreeImpl(Allocation* allocation) {
return underlying_allocator_->IsAllocThreadSafe();
}
void RetryAllocator::Free(Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
reinterpret_cast<AllocationWithUnderlying*>(allocation)->allocation_.reset(); underlying_allocator_->Free(allocation);
{ cv_.notify_all();
// notify all waited allocators, they can try to allocate memory after free.
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
delete allocation;
} }
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return new AllocationWithUnderlying( return underlying_allocator_->Allocate(size, attr).release();
underlying_allocator_->Allocate(size, attr));
}; };
// In fact, we can unify the code of allocation success and failure // In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
......
...@@ -24,32 +24,25 @@ namespace paddle { ...@@ -24,32 +24,25 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
class RetryAllocator;
class RetryAllocator : public Allocator { class RetryAllocator : public Allocator {
public: public:
RetryAllocator(std::unique_ptr<Allocator>&& allocator, size_t retry_ms) RetryAllocator(std::shared_ptr<Allocator> allocator, size_t retry_ms)
: underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) { : underlying_allocator_(std::move(allocator)), retry_time_(retry_ms) {
EnforceCheck();
}
bool IsAllocThreadSafe() const override;
private:
void EnforceCheck() {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
underlying_allocator_.get(), underlying_allocator_,
"UnderlyingAllocator of RetryAllocator must be UnmanagedAllocator"); "UnderlyingAllocator of RetryAllocator must not be null");
PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(), PADDLE_ENFORCE(underlying_allocator_->IsAllocThreadSafe(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"); "UnderlyingAllocator of RetryAllocator must be thread-safe");
} }
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
void Free(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
private: private:
std::unique_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
std::chrono::milliseconds retry_time_; std::chrono::milliseconds retry_time_;
std::mutex mutex_; std::mutex mutex_;
std::condition_variable cv_; std::condition_variable cv_;
...@@ -57,8 +50,6 @@ class RetryAllocator : public Allocator { ...@@ -57,8 +50,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are // For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate // waited to allocate
// std::atomic<size_t> waited_allocate_size_{0}; // std::atomic<size_t> waited_allocate_size_{0};
friend class RetryAllocation;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -22,6 +22,14 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const { ...@@ -22,6 +22,14 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe(); return underlying_allocator_->IsAllocThreadSafe();
} }
void ZeroSizeAllocator::FreeImpl(Allocation *allocation) {
if (dynamic_cast<ZeroSizeAllocation *>(allocation)) {
delete allocation;
} else {
underlying_allocator_->Free(allocation);
}
}
Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if (size == 0) { if (size == 0) {
return new ZeroSizeAllocation(place_); return new ZeroSizeAllocation(place_);
......
...@@ -39,6 +39,7 @@ class ZeroSizeAllocator : public Allocator { ...@@ -39,6 +39,7 @@ class ZeroSizeAllocator : public Allocator {
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
void FreeImpl(Allocation* allocation) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -29,38 +29,31 @@ namespace paddle { ...@@ -29,38 +29,31 @@ namespace paddle {
namespace platform { namespace platform {
namespace alloc = memory::allocation; namespace alloc = memory::allocation;
TemporaryAllocation::TemporaryAllocation(
alloc::AllocationPtr &&underlying_allocation)
: Allocation(underlying_allocation->ptr(), underlying_allocation->size(),
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) { TemporaryAllocator::TemporaryAllocator(platform::Place place) : place_(place) {
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
} }
bool TemporaryAllocator::IsAllocThreadSafe() const { return true; } bool TemporaryAllocator::IsAllocThreadSafe() const { return true; }
void TemporaryAllocator::Release(const std::function<void()> &callback) { void TemporaryAllocator::Release(const std::function<void()> &callback) {
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> t_allocations; std::unique_ptr<std::multimap<size_t, alloc::Allocation *>> t_allocations;
{ {
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
callback(); callback();
t_allocations.swap(temp_mem_map_); t_allocations.swap(temp_mem_map_);
temp_mem_map_.reset(new std::multimap<size_t, TemporaryAllocation *>()); temp_mem_map_.reset(new std::multimap<size_t, alloc::Allocation *>());
wait_delete_mem_ = 0; wait_delete_mem_ = 0;
} }
alloc::AllocationDeleter deleter;
for (auto tmp : *t_allocations) { for (auto tmp : *t_allocations) {
VLOG(10) << "Delete temporary allocation " << tmp.second->ptr() VLOG(10) << "Delete temporary allocation " << tmp.second->ptr()
<< " size: " << tmp.second->size(); << " size: " << tmp.second->size();
delete tmp.second; deleter(tmp.second);
} }
} }
void TemporaryAllocator::Free(alloc::Allocation *allocation) { void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
auto *temp_allocation = dynamic_cast<TemporaryAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(temp_allocation);
if (platform::is_gpu_place(temp_allocation->place())) { if (platform::is_gpu_place(temp_allocation->place())) {
PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_), PADDLE_ENFORCE(platform::is_same_place(temp_allocation->place(), place_),
"The place should be the same."); "The place should be the same.");
...@@ -84,7 +77,6 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) { ...@@ -84,7 +77,6 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
} }
VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr() VLOG(10) << "Delete temporary allocation " << temp_allocation->ptr()
<< " size: " << temp_allocation->size(); << " size: " << temp_allocation->size();
delete temp_allocation;
} }
size_t TemporaryAllocator::TemporaryAllocationQueueSize() { size_t TemporaryAllocator::TemporaryAllocationQueueSize() {
...@@ -119,11 +111,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( ...@@ -119,11 +111,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
} }
// If not find the the available allocation, get allocation from // If not find the the available allocation, get allocation from
// AllocatorFacadeInstance. // AllocatorFacadeInstance.
auto raw_allocation = auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
alloc::AllocatorFacade::Instance().Alloc(place_, size, attr);
auto temp_mem = new TemporaryAllocation(std::move(raw_allocation));
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem; return temp_mem.release();
} }
} // namespace platform } // namespace platform
......
...@@ -22,14 +22,6 @@ ...@@ -22,14 +22,6 @@
namespace paddle { namespace paddle {
namespace platform { namespace platform {
class TemporaryAllocation : public memory::allocation::Allocation {
public:
explicit TemporaryAllocation(
memory::allocation::AllocationPtr &&underlying_allocation);
memory::allocation::AllocationPtr underlying_allocation_;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation /*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation. * which used by CUDA's async operation.
* *
...@@ -56,7 +48,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -56,7 +48,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void SetCallback(const std::function<void()> &callback); void SetCallback(const std::function<void()> &callback);
protected: protected:
void Free(memory::allocation::Allocation *allocation) override; void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl( memory::allocation::Allocation *AllocateImpl(
size_t size, memory::allocation::Allocator::Attr attr) override; size_t size, memory::allocation::Allocator::Attr attr) override;
...@@ -65,8 +57,8 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -65,8 +57,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform::Place place_; platform::Place place_;
// When the allocation is not held by any variable, it should be placed // When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately. // to temp_mem_map immediately.
std::unique_ptr<std::multimap<size_t, TemporaryAllocation *>> temp_mem_map_{ std::unique_ptr<std::multimap<size_t, memory::allocation::Allocation *>>
nullptr}; temp_mem_map_{nullptr};
std::mutex mtx_; std::mutex mtx_;
size_t wait_delete_mem_{0}; size_t wait_delete_mem_{0};
std::function<void()> callback_; std::function<void()> callback_;
......
...@@ -308,6 +308,7 @@ PYBIND11_MODULE(core, m) { ...@@ -308,6 +308,7 @@ PYBIND11_MODULE(core, m) {
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
......
...@@ -129,6 +129,7 @@ def __bootstrap__(): ...@@ -129,6 +129,7 @@ def __bootstrap__():
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb', 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
'fast_eager_deletion_mode', 'allocator_strategy', 'fast_eager_deletion_mode', 'allocator_strategy',
'enable_buffered_allocator', 'tolerant_times',
'reader_queue_speed_test_mode', 'print_sub_graph_dir', 'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism', 'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism',
'enable_parallel_graph', 'multiple_of_cupti_buffer_size', 'enable_parallel_graph', 'multiple_of_cupti_buffer_size',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册