未验证 提交 277cf900 编写于 作者: 石晓伟 提交者: GitHub

splits allocation for pten, test=develop (#38853)

上级 0efcae86
......@@ -410,8 +410,8 @@ class ExecutionContext {
auto tmp_allocation_ptr = memory::Alloc(dev_ctx, product(dim) * sizeof(T));
auto& deleter = tmp_allocation_ptr.get_deleter();
auto* allocation_ptr = tmp_allocation_ptr.release();
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
allocation_ptr, deleter);
auto shared_allocation =
std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
PADDLE_ENFORCE_GE(
allocation_ptr->size(), framework::product(dim) * sizeof(T),
......
......@@ -17,14 +17,6 @@ limitations under the License. */
DECLARE_bool(use_stream_safe_cuda_allocator);
namespace paddle {
namespace memory {
namespace allocation {
class Allocation;
} // namespace allocation
} // namespace memory
} // namespace paddle
namespace paddle {
namespace framework {
......
......@@ -32,14 +32,6 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
namespace paddle {
namespace memory {
namespace allocation {
class Allocation;
} // namespace allocation
} // namespace memory
} // namespace paddle
namespace paddle {
namespace framework {
......
......@@ -151,8 +151,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place)
.get());
paddle::memory::allocation::Allocation* allocation =
npu_pinned_tensor.Holder().get();
pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(
allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......
......@@ -183,8 +183,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place)
.get());
paddle::memory::allocation::Allocation* allocation =
npu_pinned_tensor.Holder().get();
pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(
allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......@@ -241,8 +240,7 @@ void TensorFromVector(const std::vector<T>& src,
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place)
.get());
paddle::memory::allocation::Allocation* allocation =
npu_pinned_tensor.Holder().get();
pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(
allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......@@ -312,8 +310,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place)
.get());
paddle::memory::allocation::Allocation* allocation =
npu_pinned_tensor.Holder().get();
pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(
allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......
......@@ -223,9 +223,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
auto t_place = tensor->place();
paddle::framework::Tensor out;
auto mem_allocation = std::make_shared<paddle::memory::Allocation>(
static_cast<void *>(data), ele_num * sizeof(T),
paddle::platform::CPUPlace());
auto mem_allocation =
std::make_shared<paddle::memory::allocation::Allocation>(
static_cast<void *>(data), ele_num * sizeof(T),
paddle::platform::CPUPlace());
out.ResetHolder(mem_allocation);
if (paddle::platform::is_cpu_place(t_place)) {
......
......@@ -257,9 +257,8 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
size_t memory_size =
GetLiteTensorNumel(*src) *
framework::SizeOfType(GetNativePrecisionType(src->precision()));
std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target())));
std::shared_ptr<pten::Allocation> holder(new pten::Allocation(
src_raw_data, memory_size, GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
......
......@@ -23,7 +23,7 @@ namespace allocation {
// For memory address alignment
class AlignedAllocation : public Allocation {
public:
AlignedAllocation(AllocationPtr underlying_allocation, size_t offset)
AlignedAllocation(DecoratedAllocationPtr underlying_allocation, size_t offset)
: Allocation(
reinterpret_cast<uint8_t*>(underlying_allocation->ptr()) + offset,
underlying_allocation->base_ptr(),
......@@ -32,7 +32,7 @@ class AlignedAllocation : public Allocation {
underlying_allocation_(std::move(underlying_allocation)) {}
private:
AllocationPtr underlying_allocation_;
DecoratedAllocationPtr underlying_allocation_;
};
AlignedAllocator::AlignedAllocator(
......@@ -52,13 +52,17 @@ bool AlignedAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
Allocation* AlignedAllocator::AllocateImpl(size_t size) {
pten::Allocation* AlignedAllocator::AllocateImpl(size_t size) {
auto raw_allocation = underlying_allocator_->Allocate(size + alignment_);
size_t offset = AlignedPtrOffset(raw_allocation->ptr(), alignment_);
return new AlignedAllocation(std::move(raw_allocation), offset);
auto* p = new AlignedAllocation(
static_unique_ptr_cast<Allocation>(std::move(raw_allocation)), offset);
return p;
}
void AlignedAllocator::FreeImpl(Allocation* allocation) { delete allocation; }
void AlignedAllocator::FreeImpl(pten::Allocation* allocation) {
delete allocation;
}
} // namespace allocation
} // namespace memory
......
......@@ -30,9 +30,9 @@ class AlignedAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
Allocation* AllocateImpl(size_t size) override;
pten::Allocation* AllocateImpl(size_t size) override;
void FreeImpl(Allocation* allocation) override;
void FreeImpl(pten::Allocation* allocation) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
......
......@@ -18,11 +18,10 @@ namespace paddle {
namespace memory {
namespace allocation {
bool Allocator::IsAllocThreadSafe() const { return false; }
void Allocator::FreeImpl(Allocation* allocation) {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
void Allocator::FreeImpl(pten::Allocation* allocation) {
static_cast<Allocation*>(allocation)
->TopDecoratedAllocator()
->Free(allocation);
}
} // namespace allocation
......
......@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/allocator.h"
DECLARE_string(allocator_strategy);
......@@ -80,30 +81,19 @@ class Allocator;
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* Therefore, `decorated_allocators_` of the new Allocation object
* would
* be a new chain, differing from the underlying Allocation object.
*/
class Allocation {
class Allocation : public pten::Allocation {
public:
inline Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), base_ptr_(ptr), size_(size), place_(place) {}
inline Allocation(void* ptr, void* base_ptr, size_t size,
platform::Place place)
: ptr_(ptr), base_ptr_(base_ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
inline void* ptr() const { return ptr_; }
inline void* base_ptr() const {
Allocation(void* ptr, size_t size, platform::Place place)
: pten::Allocation(ptr, size, place), base_ptr_(ptr) {}
Allocation(void* ptr, void* base_ptr, size_t size,
const platform::Place& place)
: pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
void* base_ptr() const {
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth "
......@@ -112,21 +102,6 @@ class Allocation {
return base_ptr_;
}
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
inline size_t size() const { return size_; }
inline const platform::Place& place() const { return place_; }
virtual ~Allocation() {}
private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.emplace_back(allocator);
......@@ -139,10 +114,7 @@ class Allocation {
}
private:
void* ptr_;
void* base_ptr_; // the point that directly requested from system
size_t size_;
platform::Place place_;
/**
* NOTE(zjl): Since decorated_allocators_ is usually a small vector.
......@@ -162,53 +134,42 @@ class Allocation {
friend class Allocator;
};
using AllocationPtr = pten::Allocator::AllocationPtr;
using DecoratedAllocationPtr =
std::unique_ptr<Allocation, pten::Allocator::DeleterType>;
// Base interface class of memory Allocator.
class Allocator {
class Allocator : public pten::Allocator {
public:
virtual ~Allocator() {}
class AllocationDeleter {
public:
inline void operator()(Allocation* allocation) const {
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
static void AllocationDeleter(pten::Allocation* allocation) {
Allocator* allocator =
static_cast<Allocation*>(allocation)->TopDecoratedAllocator();
allocator->Free(allocation);
}
// Allocate an allocation.
// size may be 0, but it would be too complex if we handle size == 0
// in each Allocator. So we handle size == 0 inside AllocatorFacade
// in our design.
inline AllocationPtr Allocate(size_t size) {
AllocationPtr Allocate(size_t size) override {
auto ptr = AllocateImpl(size);
ptr->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr);
static_cast<Allocation*>(ptr)->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr, AllocationDeleter);
}
// This function should not be called outside Allocator class
inline void Free(Allocation* allocation) {
allocation->PopDecoratedAllocator();
void Free(pten::Allocation* allocation) {
static_cast<Allocation*>(allocation)->PopDecoratedAllocator();
FreeImpl(allocation);
}
inline uint64_t Release(const platform::Place& place) {
return ReleaseImpl(place);
}
// True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const;
uint64_t Release(const platform::Place& place) { return ReleaseImpl(place); }
protected:
virtual Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(Allocation* allocation);
virtual pten::Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(pten::Allocation* allocation);
virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
};
using AllocationDeleter = Allocator::AllocationDeleter;
using AllocationPtr = Allocator::AllocationPtr;
inline size_t AlignedSize(size_t size, size_t alignment) {
auto remaining = size % alignment;
return remaining == 0 ? size : size + alignment - remaining;
......@@ -220,6 +181,14 @@ inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) {
return diff == 0 ? 0 : alignment - diff;
}
template <typename Derived, typename Base, typename BaseDel>
decltype(auto) static_unique_ptr_cast(std::unique_ptr<Base, BaseDel>&& p) {
static_assert(std::is_base_of<Base, Derived>::value,
"Derived type must derive from Base.");
auto d = static_cast<Derived*>(p.release());
return std::unique_ptr<Derived, BaseDel>(d, p.get_deleter());
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -94,7 +94,7 @@ class CUDAGraphAllocator
class PrivateAllocation : public Allocation {
public:
PrivateAllocation(CUDAGraphAllocator* allocator,
AllocationPtr underlying_allocation)
DecoratedAllocationPtr underlying_allocation)
: Allocation(
underlying_allocation->ptr(), underlying_allocation->base_ptr(),
underlying_allocation->size(), underlying_allocation->place()),
......@@ -103,7 +103,7 @@ class CUDAGraphAllocator
private:
std::shared_ptr<Allocator> allocator_;
AllocationPtr underlying_allocation_;
DecoratedAllocationPtr underlying_allocation_;
};
explicit CUDAGraphAllocator(const std::shared_ptr<Allocator>& allocator)
......@@ -116,12 +116,14 @@ class CUDAGraphAllocator
}
protected:
Allocation* AllocateImpl(size_t size) {
pten::Allocation* AllocateImpl(size_t size) {
VLOG(10) << "Allocate " << size << " for CUDA Graph";
return new PrivateAllocation(this, underlying_allocator_->Allocate(size));
return new PrivateAllocation(this,
static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(size)));
}
void FreeImpl(Allocation* allocation) {
void FreeImpl(pten::Allocation* allocation) {
VLOG(10) << "delete for CUDA Graph";
delete allocation;
}
......@@ -322,7 +324,7 @@ class AllocatorFacadePrivate {
return static_cast<platform::CUDADeviceContext*>(pool.Get(place))->stream();
}
void RecordStream(std::shared_ptr<Allocation> allocation,
void RecordStream(std::shared_ptr<pten::Allocation> allocation,
const gpuStream_t& stream) {
if (allocation->size() == 0) {
return;
......@@ -339,7 +341,7 @@ class AllocatorFacadePrivate {
}
const gpuStream_t& GetStream(
const std::shared_ptr<Allocation>& allocation) const {
const std::shared_ptr<pten::Allocation>& allocation) const {
const StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<const StreamSafeCUDAAllocation*>(allocation.get());
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
......@@ -391,10 +393,10 @@ class AllocatorFacadePrivate {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation* AllocateImpl(size_t size) override {
pten::Allocation* AllocateImpl(size_t size) override {
return new Allocation(nullptr, 0, place_);
}
void FreeImpl(Allocation* allocation) override { delete allocation; }
void FreeImpl(pten::Allocation* allocation) override { delete allocation; }
private:
platform::Place place_;
......@@ -820,9 +822,9 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
}
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) {
return std::shared_ptr<Allocation>(Alloc(place, size));
return std::shared_ptr<pten::Allocation>(Alloc(place, size));
}
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
......@@ -866,7 +868,7 @@ uint64_t AllocatorFacade::Release(const platform::Place& place) {
->Release(place);
}
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, const platform::Stream& stream) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_EQ(
......@@ -884,14 +886,14 @@ std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
}
#endif
gpuStream_t s = reinterpret_cast<gpuStream_t>(stream.id());
return std::shared_ptr<Allocation>(Alloc(place, size, s));
return std::shared_ptr<pten::Allocation>(Alloc(place, size, s));
#else
PADDLE_THROW(platform::errors::PreconditionNotMet("Not compiled with GPU."));
#endif
}
bool AllocatorFacade::InSameStream(
const std::shared_ptr<Allocation>& allocation,
const std::shared_ptr<pten::Allocation>& allocation,
const platform::Stream& stream) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_EQ(
......@@ -962,7 +964,7 @@ uint64_t AllocatorFacade::Release(const platform::CUDAPlace& place,
return m_->GetAllocator(place, stream)->Release(place);
}
void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation,
void AllocatorFacade::RecordStream(std::shared_ptr<pten::Allocation> allocation,
const gpuStream_t& stream) {
PADDLE_ENFORCE_EQ(
FLAGS_use_stream_safe_cuda_allocator, true,
......@@ -983,7 +985,7 @@ void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation,
}
const gpuStream_t& AllocatorFacade::GetStream(
const std::shared_ptr<Allocation>& allocation) const {
const std::shared_ptr<pten::Allocation>& allocation) const {
PADDLE_ENFORCE_EQ(
FLAGS_use_stream_safe_cuda_allocator, true,
platform::errors::Unimplemented(
......
......@@ -42,6 +42,7 @@ using NPUPinnedAllocator = paddle::memory::allocation::NPUPinnedAllocator;
class AllocatorFacadePrivate;
class AllocatorFacade {
public:
using Allocation = pten::Allocation;
AllocatorFacade(const AllocatorFacade& o) = delete;
const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
~AllocatorFacade();
......
......@@ -45,7 +45,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)),
allow_free_idle_chunk_(allow_free_idle_chunk) {}
Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
pten::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
size_t unaligned_size) {
size_t size = AlignedSize(unaligned_size, alignment_);
VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size;
......@@ -78,11 +79,13 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
size_t realloc_size = std::max(size, chunk_size_);
try {
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(realloc_size)));
} catch (BadAlloc &ex) {
if (FLAGS_free_when_no_cache_hit) throw ex;
FreeIdleChunks();
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size));
chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(realloc_size)));
}
auto *chunk = &(*chunks_.rbegin());
......@@ -104,7 +107,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
return new BlockAllocation(block_it);
}
void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
void AutoGrowthBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
VLOG(10) << "Free " << allocation->size()
<< " bytes, ptr = " << allocation->ptr();
std::lock_guard<SpinLock> guard(spinlock_);
......
......@@ -36,9 +36,9 @@ class AutoGrowthBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation *AllocateImpl(size_t size) override;
pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
void FreeImpl(pten::Allocation *allocation) override;
// Release the memory block which is not used in pool.
uint64_t ReleaseImpl(const platform::Place &place) override {
......@@ -64,10 +64,10 @@ class AutoGrowthBestFitAllocator : public Allocator {
};
struct Chunk {
explicit Chunk(AllocationPtr allocation)
explicit Chunk(DecoratedAllocationPtr allocation)
: allocation_(std::move(allocation)) {}
AllocationPtr allocation_;
DecoratedAllocationPtr allocation_;
List<Block> blocks_;
};
......
......@@ -28,12 +28,12 @@ namespace allocation {
class RecordedAllocator : public Allocator {
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
allocated_size_ += size;
return new Allocation(malloc(size), size, platform::CPUPlace());
}
void FreeImpl(Allocation *allocation) {
void FreeImpl(pten::Allocation *allocation) {
allocated_size_ -= allocation->size();
free(allocation->ptr());
delete allocation;
......@@ -79,7 +79,7 @@ class LimitedResourceAllocator : public Allocator {
size_t AllocatedSize() const { return allocated_size_; }
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
if (allocated_size_ + size > capacity_) {
throw BadAlloc("", __FILE__, __LINE__);
}
......@@ -88,7 +88,7 @@ class LimitedResourceAllocator : public Allocator {
return new Allocation(malloc(size), size, platform::CPUPlace());
}
void FreeImpl(Allocation *allocation) {
void FreeImpl(pten::Allocation *allocation) {
allocated_size_ -= allocation->size();
free(allocation->ptr());
delete allocation;
......
......@@ -37,7 +37,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr();
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......@@ -56,7 +56,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr();
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......@@ -77,7 +77,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr();
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......@@ -91,7 +91,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0);
void* base_ptr = allocation->base_ptr();
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......
......@@ -33,7 +33,7 @@ static int HighestBitPos(size_t N) {
}
}
BestFitAllocator::BestFitAllocator(Allocation* allocation)
BestFitAllocator::BestFitAllocator(pten::Allocation* allocation)
: allocation_(allocation) {
details::Chunk chunk;
chunk.size_ = allocation_->size();
......@@ -115,7 +115,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return num;
}
void BestFitAllocator::FreeImpl(Allocation* allocation) {
void BestFitAllocator::FreeImpl(pten::Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(
bf_allocation,
......@@ -150,7 +150,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) {
InsertFreeNode(chunk_it);
delete allocation;
}
Allocation* BestFitAllocator::AllocateImpl(size_t size) {
pten::Allocation* BestFitAllocator::AllocateImpl(size_t size) {
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
MapIt map_it;
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
......
......@@ -108,7 +108,7 @@ class BestFitAllocation : public Allocation {
// the prev-chunk and the next-chunk when possible.
class BestFitAllocator : public Allocator {
public:
explicit BestFitAllocator(Allocation* allocation);
explicit BestFitAllocator(pten::Allocation* allocation);
void* BasePtr() const { return allocation_->ptr(); }
......@@ -127,11 +127,11 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it);
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
private:
Allocation* allocation_; // not owned
pten::Allocation* allocation_; // not owned
details::ChunkList chunks_;
details::FreeChunkBin free_chunks_;
};
......
......@@ -46,12 +46,13 @@ void BufferedAllocator::FreeCache(size_t size) {
bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
void BufferedAllocator::FreeImpl(Allocation *allocation) {
void BufferedAllocator::FreeImpl(pten::Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation));
allocations_.emplace(allocation->size(),
AllocationPtr(allocation, Allocator::AllocationDeleter));
}
Allocation *BufferedAllocator::AllocateImpl(size_t size) {
pten::Allocation *BufferedAllocator::AllocateImpl(size_t size) {
{
platform::LockGuardPtr<std::mutex> guard(mtx_);
auto it = allocations_.lower_bound(size);
......
......@@ -45,8 +45,8 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size);
protected:
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
......
......@@ -27,7 +27,7 @@ namespace memory {
namespace allocation {
inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
Allocation *allocation, bool thread_safe) {
pten::Allocation *allocation, bool thread_safe) {
std::unique_ptr<Allocator> allocator(new BestFitAllocator(allocation));
if (thread_safe) {
allocator.reset(new LockedAllocator(std::move(allocator)));
......@@ -68,7 +68,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; }
protected:
void FreeImpl(Allocation *allocation) override {
void FreeImpl(pten::Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL(
alloc, platform::errors::InvalidArgument(
......@@ -77,7 +77,7 @@ class StubAllocator : public Allocator {
++destruct_count_;
delete allocation;
}
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
++construct_count_;
if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace());
......
......@@ -24,7 +24,7 @@ namespace allocation {
bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::FreeImpl(Allocation *allocation) {
void CPUAllocator::FreeImpl(pten::Allocation *allocation) {
void *p = allocation->ptr();
#ifdef _WIN32
_aligned_free(p);
......@@ -34,7 +34,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
delete allocation;
}
Allocation *CPUAllocator::AllocateImpl(size_t size) {
pten::Allocation *CPUAllocator::AllocateImpl(size_t size) {
void *p;
#ifdef _WIN32
p = _aligned_malloc(size, kAlignment);
......
......@@ -37,8 +37,8 @@ class CPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
};
} // namespace allocation
} // namespace memory
......
......@@ -32,7 +32,7 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::FreeImpl(Allocation* allocation) {
void CUDAAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
platform::errors::PermissionDenied(
......@@ -42,7 +42,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
delete allocation;
}
Allocation* CUDAAllocator::AllocateImpl(size_t size) {
pten::Allocation* CUDAAllocator::AllocateImpl(size_t size) {
std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
void* ptr;
......
......@@ -28,8 +28,8 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
private:
platform::CUDAPlace place_;
......
......@@ -41,7 +41,7 @@ namespace allocation {
*/
class CUDADeviceContextAllocation : public Allocation {
public:
explicit CUDADeviceContextAllocation(AllocationPtr allocation)
explicit CUDADeviceContextAllocation(DecoratedAllocationPtr allocation)
: Allocation(allocation->ptr(), allocation->base_ptr(),
allocation->size(), allocation->place()),
underlying_allocation_(std::move(allocation)) {}
......@@ -56,7 +56,7 @@ class CUDADeviceContextAllocation : public Allocation {
<< p_allocation;
dev_ctx_->AddStreamCallback([p_allocation] {
VLOG(4) << "Delete CUDADeviceContextAllocation at " << p_allocation;
AllocationDeleter()(p_allocation);
Allocator::AllocationDeleter(p_allocation);
});
}
......@@ -65,7 +65,7 @@ class CUDADeviceContextAllocation : public Allocation {
}
private:
AllocationPtr underlying_allocation_;
DecoratedAllocationPtr underlying_allocation_;
const platform::CUDADeviceContext *dev_ctx_{nullptr};
};
......@@ -102,14 +102,14 @@ class CUDADeviceContextAllocator : public Allocator {
}
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_ENFORCE_NOT_NULL(
default_stream_,
platform::errors::PreconditionNotMet(
"Default stream is not set for CUDADeviceContextAllocator"));
platform::CUDADeviceGuard guard(place_.device);
auto allocation =
new CUDADeviceContextAllocation(memory::Alloc(place_, size));
auto allocation = new CUDADeviceContextAllocation(
static_unique_ptr_cast<Allocation>(memory::Alloc(place_, size)));
// Wait for the event on stream
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, default_stream_));
......@@ -121,7 +121,7 @@ class CUDADeviceContextAllocator : public Allocator {
return allocation;
}
void FreeImpl(Allocation *allocation) override { delete allocation; }
void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
private:
platform::CUDAPlace place_;
......
......@@ -101,7 +101,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator(
bool CUDAVirtualMemAllocator::IsAllocThreadSafe() const { return false; }
void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) {
void CUDAVirtualMemAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
platform::errors::PermissionDenied(
......@@ -140,7 +140,7 @@ void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) {
delete allocation;
}
Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) {
pten::Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) {
size = AlignedSize(size, granularity_);
CUdeviceptr ptr = virtual_mem_base_ + virtual_mem_alloced_offset_;
......
......@@ -37,8 +37,8 @@ class CUDAVirtualMemAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
private:
platform::CUDAPlace place_;
......
......@@ -37,12 +37,12 @@ LockedAllocator::LockedAllocator(
}
}
void LockedAllocator::FreeImpl(Allocation *allocation) {
void LockedAllocator::FreeImpl(pten::Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
underlying_allocator_->Free(allocation);
}
Allocation *LockedAllocator::AllocateImpl(size_t size) {
pten::Allocation *LockedAllocator::AllocateImpl(size_t size) {
platform::LockGuardPtr<std::mutex> guard(mtx_);
return underlying_allocator_->Allocate(size).release();
}
......
......@@ -29,8 +29,8 @@ class LockedAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
private:
std::shared_ptr<Allocator> underlying_allocator_;
......
......@@ -790,7 +790,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
namespace allocation {
Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
pten::Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
auto *tmp_alloc = new Allocation(ptr, size, place_);
platform::MemEvenRecorder::Instance().PushMemRecord(
......@@ -798,7 +798,7 @@ Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
return tmp_alloc;
}
void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
void NaiveBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place());
......
......@@ -34,8 +34,8 @@ class NaiveBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override;
private:
......
......@@ -22,7 +22,7 @@ namespace memory {
namespace allocation {
bool NPUAllocator::IsAllocThreadSafe() const { return true; }
void NPUAllocator::FreeImpl(Allocation* allocation) {
void NPUAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::NPUPlace, allocation->place()), place_,
platform::errors::PermissionDenied(
......@@ -32,7 +32,7 @@ void NPUAllocator::FreeImpl(Allocation* allocation) {
delete allocation;
}
Allocation* NPUAllocator::AllocateImpl(size_t size) {
pten::Allocation* NPUAllocator::AllocateImpl(size_t size) {
std::call_once(once_flag_,
[this] { platform::SetNPUDeviceId(place_.device); });
......
......@@ -28,8 +28,8 @@ class NPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
private:
platform::NPUPlace place_;
......
......@@ -26,7 +26,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
platform::NPUEventQuery(event, &status);
if (status == ACL_EVENT_STATUS_COMPLETE) {
Allocation *allocation = it->first;
auto *allocation = it->first;
void *ptr = allocation->ptr();
free(ptr);
npu_events_.erase(it++);
......@@ -38,7 +38,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
}
}
Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
pten::Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
std::lock_guard<std::mutex> lock(mtx_);
ProcessEventsAndFree();
void *ptr;
......@@ -50,7 +50,7 @@ Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
return new Allocation(ptr, size, platform::NPUPinnedPlace());
}
void NPUPinnedAllocator::FreeImpl(Allocation *allocation) {
void NPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
std::lock_guard<std::mutex> lock(mtx_);
void *ptr = allocation->ptr();
auto iter = npu_events_.find(allocation);
......@@ -83,7 +83,7 @@ uint64_t NPUPinnedAllocator::ReleaseImpl(const platform::Place &place) {
return static_cast<uint64_t>(0);
}
void NPUPinnedAllocator::RecordEvent(Allocation *allocation,
void NPUPinnedAllocator::RecordEvent(pten::Allocation *allocation,
aclrtStream stream) {
std::lock_guard<std::mutex> lock(mtx_);
aclrtEvent event = nullptr;
......
......@@ -32,16 +32,16 @@ class NPUPinnedAllocator : public Allocator {
public:
bool IsAllocThreadSafe() const override { return true; }
void ProcessEventsAndFree();
void RecordEvent(Allocation *allocation, aclrtStream stream);
void RecordEvent(pten::Allocation *allocation, aclrtStream stream);
constexpr static size_t kAlignment = 4096UL;
protected:
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override;
private:
std::unordered_map<Allocation *, aclrtEvent> npu_events_;
std::unordered_map<pten::Allocation *, aclrtEvent> npu_events_;
mutable std::mutex mtx_;
};
......
......@@ -18,7 +18,7 @@ namespace paddle {
namespace memory {
namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
void CPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipHostFree(allocation->ptr()));
#else
......@@ -26,7 +26,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
#endif
delete allocation;
}
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
pten::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
void *ptr;
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipHostMalloc(&ptr, size, hipHostMallocPortable));
......
......@@ -25,8 +25,8 @@ class CPUPinnedAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
};
} // namespace allocation
......
......@@ -39,7 +39,7 @@ class WaitedAllocateSizeGuard {
size_t requested_size_;
};
void RetryAllocator::FreeImpl(Allocation* allocation) {
void RetryAllocator::FreeImpl(pten::Allocation* allocation) {
// Delete underlying allocation first.
size_t size = allocation->size();
underlying_allocator_->Free(allocation);
......@@ -51,7 +51,7 @@ void RetryAllocator::FreeImpl(Allocation* allocation) {
}
}
Allocation* RetryAllocator::AllocateImpl(size_t size) {
pten::Allocation* RetryAllocator::AllocateImpl(size_t size) {
auto alloc_func = [&, this]() {
return underlying_allocator_->Allocate(size).release();
};
......
......@@ -45,8 +45,8 @@ class RetryAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation* allocation) override;
pten::Allocation* AllocateImpl(size_t size) override;
uint64_t ReleaseImpl(const platform::Place& place) override {
return underlying_allocator_->Release(place);
}
......
......@@ -98,12 +98,12 @@ class DummyAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"Here is a test exception, always BadAlloc."));
}
void FreeImpl(Allocation *) override {}
void FreeImpl(pten::Allocation *) override {}
};
TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
......
......@@ -19,7 +19,7 @@ namespace memory {
namespace allocation {
StreamSafeCUDAAllocation::StreamSafeCUDAAllocation(
AllocationPtr underlying_allocation, gpuStream_t owning_stream)
DecoratedAllocationPtr underlying_allocation, gpuStream_t owning_stream)
: Allocation(underlying_allocation->ptr(),
underlying_allocation->base_ptr(),
underlying_allocation->size(), underlying_allocation->place()),
......@@ -116,7 +116,7 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() {
bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; }
Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
pten::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
ProcessUnfreedAllocations();
VLOG(8) << "Try allocate " << size << " bytes";
AllocationPtr underlying_allocation;
......@@ -136,13 +136,14 @@ Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
throw;
}
StreamSafeCUDAAllocation* allocation = new StreamSafeCUDAAllocation(
std::move(underlying_allocation), default_stream_);
static_unique_ptr_cast<Allocation>(std::move(underlying_allocation)),
default_stream_);
VLOG(8) << "Allocate " << allocation->size() << " bytes at address "
<< allocation->ptr();
return allocation;
}
void StreamSafeCUDAAllocator::FreeImpl(Allocation* allocation) {
void StreamSafeCUDAAllocator::FreeImpl(pten::Allocation* allocation) {
StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<StreamSafeCUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
......
......@@ -34,7 +34,7 @@ namespace allocation {
class StreamSafeCUDAAllocation : public Allocation {
public:
StreamSafeCUDAAllocation(AllocationPtr underlying_allocation,
StreamSafeCUDAAllocation(DecoratedAllocationPtr underlying_allocation,
gpuStream_t owning_stream);
void RecordStream(const gpuStream_t &stream);
bool CanBeFreed();
......@@ -42,7 +42,7 @@ class StreamSafeCUDAAllocation : public Allocation {
const gpuStream_t &GetOwningStream() const;
private:
AllocationPtr underlying_allocation_;
DecoratedAllocationPtr underlying_allocation_;
std::map<gpuStream_t, gpuEvent_t> outstanding_event_map_;
gpuStream_t owning_stream_;
SpinLock outstanding_event_map_lock_;
......@@ -57,8 +57,8 @@ class StreamSafeCUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override;
protected:
Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override;
private:
......
......@@ -32,12 +32,12 @@ struct StubAllocator : public Allocator {
size_t AllocNum() const { return alloc_num_; }
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
++alloc_num_;
return new Allocation(new uint8_t[size], size, platform::CPUPlace());
}
void FreeImpl(Allocation *allocation) override {
void FreeImpl(pten::Allocation *allocation) override {
delete[] static_cast<uint8_t *>(allocation->ptr());
delete allocation;
--alloc_num_;
......
......@@ -83,11 +83,11 @@ class ThreadLocalCUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation* AllocateImpl(size_t size) override {
pten::Allocation* AllocateImpl(size_t size) override {
return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->AllocateImpl(
size);
}
void FreeImpl(Allocation* allocation) override {
void FreeImpl(pten::Allocation* allocation) override {
auto* tl_allocation = static_cast<ThreadLocalAllocation*>(allocation);
auto allocator_impl = tl_allocation->GetAllocator();
allocator_impl->FreeImpl(tl_allocation);
......
......@@ -35,7 +35,8 @@ VirtualMemoryAutoGrowthBestFitAllocator::
alignment_(alignment),
place_(place) {}
Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(
size_t size) {
std::lock_guard<SpinLock> guard(spinlock_);
size = AlignedSize(size, alignment_);
auto result = AllocFromFreeBlocks(size);
......@@ -48,7 +49,8 @@ Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
return result;
}
void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(
pten::Allocation *allocation) {
std::lock_guard<SpinLock> guard(spinlock_);
auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_;
TryMergeBlock2Blocks(block_it);
......@@ -225,7 +227,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
}
}
Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
size_t size) {
auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
if (iter != free_blocks_.end()) {
......
......@@ -60,12 +60,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation *AllocateImpl(size_t size) override;
pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override;
void FreeImpl(pten::Allocation *allocation) override;
private:
Allocation *AllocFromFreeBlocks(size_t size);
pten::Allocation *AllocFromFreeBlocks(size_t size);
void ExtendAndMerge(size_t size);
void TryMergeBlock2Blocks(std::list<Block>::iterator iter);
......
......@@ -28,7 +28,7 @@ class DeviceContext;
namespace memory {
using allocation::Allocation;
using pten::Allocation;
using allocation::Allocator;
using allocation::AllocationPtr;
......
......@@ -336,9 +336,8 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
auto* data_alloc_released = data_alloc.release();
auto* col_alloc_released = col_alloc.release();
context.AddStreamCallback([data_alloc_released, col_alloc_released] {
memory::allocation::AllocationDeleter deleter;
deleter(data_alloc_released);
deleter(col_alloc_released);
memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
memory::allocation::Allocator::AllocationDeleter(col_alloc_released);
});
#endif
}
......@@ -466,9 +465,8 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
auto* data_alloc_released = data_alloc.release();
auto* cols_alloc_released = cols_alloc.release();
context.AddStreamCallback([data_alloc_released, cols_alloc_released] {
memory::allocation::AllocationDeleter deleter;
deleter(data_alloc_released);
deleter(cols_alloc_released);
memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
memory::allocation::Allocator::AllocationDeleter(cols_alloc_released);
});
#endif
}
......
......@@ -55,7 +55,7 @@ class MLUDeviceContextAllocation : public Allocation {
<< p_allocation;
dev_ctx_->AddStreamCallback([p_allocation] {
VLOG(4) << "Delete MLUDeviceContextAllocation at " << p_allocation;
AllocationDeleter()(p_allocation);
Allocator::AllocationDeleter(p_allocation);
});
}
......@@ -91,7 +91,7 @@ class MLUDeviceContextAllocator : public Allocator {
}
protected:
Allocation *AllocateImpl(size_t size) override {
pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_ENFORCE_NOT_NULL(
default_stream_,
platform::errors::PreconditionNotMet(
......@@ -105,7 +105,7 @@ class MLUDeviceContextAllocator : public Allocator {
return allocation;
}
void FreeImpl(Allocation *allocation) override { delete allocation; }
void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
private:
platform::MLUPlace place_;
......
......@@ -158,8 +158,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place)
.get());
paddle::memory::allocation::Allocation *allocation =
npu_pinned_tensor.Holder().get();
pten::Allocation *allocation = npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(allocation, GetCurrentNPUStream());
} else {
......
......@@ -53,7 +53,7 @@ size_t PyArray_Size_(PyObject* numpy_data) {
return res;
}
class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
class EagerNumpyAllocation : public pten::Allocation {
public:
explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
: Allocation(
......
cc_library(pten_api_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS
cc_library(pten_api_utils SRCS storage.cc tensor_utils.cc DEPS
tensor_base convert_utils dense_tensor lod_tensor selected_rows place var_type_traits)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h"
namespace paddle {
namespace experimental {
memory::Allocator::AllocationDeleter DefaultAllocator::deleter_;
} // namespace experimental
} // namespace paddle
......@@ -22,14 +22,15 @@ limitations under the License. */
namespace paddle {
namespace experimental {
class DefaultAllocator : public pten::Allocator {
class DefaultAllocator : public pten::deprecated::Allocator {
public:
using Allocation = pten::Allocation;
using Allocation = pten::deprecated::Allocation;
explicit DefaultAllocator(const paddle::platform::Place& place)
: place_(place) {}
static void Delete(Allocation* allocation) {
deleter_(allocation->CastContextWithoutCheck<paddle::memory::Allocation>());
paddle::memory::allocation::Allocator::AllocationDeleter(
allocation->CastContextWithoutCheck<paddle::memory::Allocation>());
}
Allocation Allocate(size_t bytes_size) override {
......@@ -42,7 +43,6 @@ class DefaultAllocator : public pten::Allocator {
private:
paddle::platform::Place place_;
static paddle::memory::Allocator::AllocationDeleter deleter_;
};
} // namespace experimental
......
......@@ -20,14 +20,13 @@ namespace experimental {
ExternalStorage::ExternalStorage(void* ptr,
size_t size,
const paddle::platform::Place& place)
: pten::Storage(
std::make_shared<paddle::memory::Allocation>(ptr, size, place)),
: pten::Storage(std::make_shared<pten::Allocation>(ptr, size, place)),
size_(size) {}
ExternalStorage::ExternalStorage(const pten::intrusive_ptr<pten::Storage>& root,
size_t delta,
size_t size)
: Storage(std::make_shared<paddle::memory::Allocation>(
: Storage(std::make_shared<pten::Allocation>(
static_cast<uint8_t*>(root->data()) + delta, size, root->place())),
size_(size) {
PADDLE_ENFORCE_LE(static_cast<size_t>(delta + size),
......
......@@ -307,7 +307,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
dst->Resize(src->dims());
dst->set_type(pten::TransToProtoVarType(src->dtype()));
auto storage = src->release();
std::shared_ptr<paddle::memory::allocation::Allocation> holder(
std::shared_ptr<pten::Allocation> holder(
new TensorStorage(std::move(storage)));
dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
......
......@@ -16,8 +16,10 @@ limitations under the License. */
#include <cstdint>
#include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/candidate/allocator.h"
namespace pten {
namespace deprecated {
/// \brief Encapsulates strategies for access/addressing, allocation/
/// deallocation and construction/destruction of objects.
......@@ -147,4 +149,5 @@ inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) {
return a->Allocate(n);
}
} // namespace deprecated
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h"
namespace pten {
/// \brief Fancy pointer with deleter. The use of this data type
/// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not
/// support being inherited.
class Allocation {
public:
using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*);
Allocation() = default;
// Don't own resources, only provide access.
Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources.
Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent
// to copying.
swap(*this, other);
return *this;
}
virtual ~Allocation() {
if (deleter_) {
deleter_(this);
}
}
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; }
protected:
friend void swap(Allocation& a, Allocation& b) noexcept;
void* ptr_{nullptr};
size_t size_{};
DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%.
Place place_;
};
inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.ptr_, b.ptr_);
::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
}
class Allocator {
public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default;
virtual AllocationPtr Allocate(size_t bytes_size) = 0;
virtual bool IsAllocThreadSafe() const { return false; }
};
} // namespace pten
......@@ -60,6 +60,8 @@ class TensorInplaceVersion {
class DenseTensor : public TensorBase,
public TypeInfoTraits<TensorBase, DenseTensor> {
public:
using Allocator = deprecated::Allocator;
/// \brief Construct a dense tensor and allocate space.
/// \param a The allocator used to allocate space.
/// \param meta The meta data of dense tensor.
......
......@@ -91,6 +91,7 @@ class Storage : public intrusive_ref_counter<Storage> {
class TensorStorage : public Storage {
public:
using Place = paddle::platform::Place;
using Allocator = deprecated::Allocator;
explicit TensorStorage(const std::shared_ptr<Allocator>& a) : alloc_(a) {}
......
......@@ -21,7 +21,7 @@ limitations under the License. */
namespace pten {
namespace tests {
class HostAllocatorSample : public pten::RawAllocator {
class HostAllocatorSample : public pten::deprecated::RawAllocator {
public:
using Place = paddle::platform::Place;
void* Allocate(size_t bytes_size) override {
......@@ -36,8 +36,9 @@ class HostAllocatorSample : public pten::RawAllocator {
Place place_{paddle::platform::CPUPlace()};
};
class FancyAllocator : public pten::Allocator {
class FancyAllocator : public pten::deprecated::Allocator {
public:
using Allocation = pten::deprecated::Allocation;
static void Delete(Allocation* allocation) {
::operator delete(allocation->ptr());
}
......@@ -55,7 +56,7 @@ class FancyAllocator : public pten::Allocator {
template <typename T>
struct CustomAllocator {
using value_type = T;
using Allocator = pten::RawAllocator;
using Allocator = pten::deprecated::RawAllocator;
explicit CustomAllocator(const std::shared_ptr<Allocator>& a) noexcept
: alloc_(a) {}
......
......@@ -24,6 +24,10 @@ limitations under the License. */
namespace pten {
namespace tests {
using RawAllocator = pten::deprecated::RawAllocator;
using Allocator = pten::deprecated::Allocator;
using Allocation = pten::deprecated::Allocation;
template <typename T>
bool host_allocator_test(size_t vector_size) {
std::vector<T> src(vector_size);
......
......@@ -226,7 +226,7 @@ if [ "${HAS_MODIFIED_DEMO_CMAKE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
HAS_MODIFIED_ALLOCATION=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/memory/allocation" || true`
if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You must be approved by zhiqiu and Shixiaowei02 for paddle/fluid/memory/allocation.\nIt is being modularized and refactored. Thanks!\n"
check_approval 2 6888866 39303645
check_approval 1 6888866 39303645
fi
HAS_MODIFIED_TENSOR=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/framework/tensor" || true`
......@@ -241,23 +241,6 @@ if [ "${HAS_MODIFIED_TENSOR}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
check_approval 1 22561442 22334008
fi
ALLOCSHARED_FILE_CHANGED=`git diff --name-only --diff-filter=AM upstream/$BRANCH |grep -E "*\.(h|cc)" || true`
if [ "${ALLOCSHARED_FILE_CHANGED}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
ERROR_LINES=""
for TEST_FILE in ${ALLOCSHARED_FILE_CHANGED};
do
HAS_SKIP_CHECK_ALLOC_CI=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${TEST_FILE} |grep "AllocShared" || true`
if [ "${HAS_SKIP_CHECK_ALLOC_CI}" != "" ]; then
ERROR_LINES="${ERROR_LINES}\n${TEST_FILE}\n${HAS_SKIP_CHECK_ALLOC_CI}\n"
fi
done
if [ "${ERROR_LINES}" != "" ]; then
ERROR_LINES=${ERROR_LINES//+/'\n+\t'}
echo_line="memory::AllocShared is not recommended, because it is being modularized and refactored. Please use memory::Alloc here. Otherwise, please request zhiqiu and Shixiaowei02 review and approve.\n"
check_approval 2 6888866 39303645
fi
fi
ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true`
if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册