diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 0a46c83a2b3ad78d212fc05f0cedc6b50fe15265..09e4abc77f5734df79bfafe76b4eda705a724bd1 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -410,8 +410,8 @@ class ExecutionContext { auto tmp_allocation_ptr = memory::Alloc(dev_ctx, product(dim) * sizeof(T)); auto& deleter = tmp_allocation_ptr.get_deleter(); auto* allocation_ptr = tmp_allocation_ptr.release(); - auto shared_allocation = std::shared_ptr( - allocation_ptr, deleter); + auto shared_allocation = + std::shared_ptr(allocation_ptr, deleter); PADDLE_ENFORCE_GE( allocation_ptr->size(), framework::product(dim) * sizeof(T), diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index f11b37825d4f03d0053b7636876676e2199eb4ae..6aa10a058081b888523f81cac926081f8d083e16 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -17,14 +17,6 @@ limitations under the License. */ DECLARE_bool(use_stream_safe_cuda_allocator); -namespace paddle { -namespace memory { -namespace allocation { -class Allocation; -} // namespace allocation -} // namespace memory -} // namespace paddle - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index e86009e9aafeac4d693b340191ef493a64ea4ee5..fcdb837bc80ce435b2a8fa6d4bddc9d3916109ce 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -32,14 +32,6 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" -namespace paddle { -namespace memory { -namespace allocation { -class Allocation; -} // namespace allocation -} // namespace memory -} // namespace paddle - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 7fd125834a0c31f759ded34d431581997a9fb151..5fd581220097b8a690546e7b6a6e7d01a9ba490b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -151,8 +151,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(npu_pinned_place) .get()); - paddle::memory::allocation::Allocation* allocation = - npu_pinned_tensor.Holder().get(); + pten::Allocation* allocation = npu_pinned_tensor.Holder().get(); npu_pinned_allocator->RecordEvent( allocation, reinterpret_cast(ctx).stream()); diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 46eba6a1e41bbe00114824d33a4433dbccba6154..11858e4166595dd756ec51384f6da2fe9277479e 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -183,8 +183,7 @@ void TensorFromArray(const T* src, const size_t& array_size, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(npu_pinned_place) .get()); - paddle::memory::allocation::Allocation* allocation = - npu_pinned_tensor.Holder().get(); + pten::Allocation* allocation = npu_pinned_tensor.Holder().get(); npu_pinned_allocator->RecordEvent( allocation, reinterpret_cast(ctx).stream()); @@ -241,8 +240,7 @@ void TensorFromVector(const std::vector& src, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(npu_pinned_place) .get()); - paddle::memory::allocation::Allocation* allocation = - npu_pinned_tensor.Holder().get(); + pten::Allocation* allocation = npu_pinned_tensor.Holder().get(); npu_pinned_allocator->RecordEvent( allocation, reinterpret_cast(ctx).stream()); @@ -312,8 +310,7 @@ inline void TensorFromVector(const std::vector& src, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(npu_pinned_place) .get()); - paddle::memory::allocation::Allocation* allocation = - npu_pinned_tensor.Holder().get(); + pten::Allocation* allocation = npu_pinned_tensor.Holder().get(); npu_pinned_allocator->RecordEvent( allocation, reinterpret_cast(ctx).stream()); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 01d4dbccd50eaf2c288110562784bdea5a66080b..2f2f4c0ead7604d185ab3ea1066bc9f88c66a2d0 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -223,9 +223,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, auto t_place = tensor->place(); paddle::framework::Tensor out; - auto mem_allocation = std::make_shared( - static_cast(data), ele_num * sizeof(T), - paddle::platform::CPUPlace()); + auto mem_allocation = + std::make_shared( + static_cast(data), ele_num * sizeof(T), + paddle::platform::CPUPlace()); out.ResetHolder(mem_allocation); if (paddle::platform::is_cpu_place(t_place)) { diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc index b1e0eb5ef16abacdd5e572c89979503930738c06..0d5cd29a0c57921df24f4fa0c966412c1e6fa207 100644 --- a/paddle/fluid/inference/lite/tensor_utils.cc +++ b/paddle/fluid/inference/lite/tensor_utils.cc @@ -257,9 +257,8 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) { size_t memory_size = GetLiteTensorNumel(*src) * framework::SizeOfType(GetNativePrecisionType(src->precision())); - std::shared_ptr holder( - new memory::allocation::Allocation(src_raw_data, memory_size, - GetNativePlace(src->target()))); + std::shared_ptr holder(new pten::Allocation( + src_raw_data, memory_size, GetNativePlace(src->target()))); dst->Resize(paddle::framework::make_ddim(src->shape())); SetLoD(dst->mutable_lod(), src->lod()); dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision())); diff --git a/paddle/fluid/memory/allocation/aligned_allocator.cc b/paddle/fluid/memory/allocation/aligned_allocator.cc index 10380c0d6028d57422e17a7c1dff7845ad0390f1..258cff32b4fca5f0af5bfb837a185831f0140e7c 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.cc +++ b/paddle/fluid/memory/allocation/aligned_allocator.cc @@ -23,7 +23,7 @@ namespace allocation { // For memory address alignment class AlignedAllocation : public Allocation { public: - AlignedAllocation(AllocationPtr underlying_allocation, size_t offset) + AlignedAllocation(DecoratedAllocationPtr underlying_allocation, size_t offset) : Allocation( reinterpret_cast(underlying_allocation->ptr()) + offset, underlying_allocation->base_ptr(), @@ -32,7 +32,7 @@ class AlignedAllocation : public Allocation { underlying_allocation_(std::move(underlying_allocation)) {} private: - AllocationPtr underlying_allocation_; + DecoratedAllocationPtr underlying_allocation_; }; AlignedAllocator::AlignedAllocator( @@ -52,13 +52,17 @@ bool AlignedAllocator::IsAllocThreadSafe() const { return underlying_allocator_->IsAllocThreadSafe(); } -Allocation* AlignedAllocator::AllocateImpl(size_t size) { +pten::Allocation* AlignedAllocator::AllocateImpl(size_t size) { auto raw_allocation = underlying_allocator_->Allocate(size + alignment_); size_t offset = AlignedPtrOffset(raw_allocation->ptr(), alignment_); - return new AlignedAllocation(std::move(raw_allocation), offset); + auto* p = new AlignedAllocation( + static_unique_ptr_cast(std::move(raw_allocation)), offset); + return p; } -void AlignedAllocator::FreeImpl(Allocation* allocation) { delete allocation; } +void AlignedAllocator::FreeImpl(pten::Allocation* allocation) { + delete allocation; +} } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index 6fef5cae8d6af32a8b5b2eff0c74ab94410bd070..ffd5ad0fae1b0c4b83f87df877b9dfd71a286fb6 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -30,9 +30,9 @@ class AlignedAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - Allocation* AllocateImpl(size_t size) override; + pten::Allocation* AllocateImpl(size_t size) override; - void FreeImpl(Allocation* allocation) override; + void FreeImpl(pten::Allocation* allocation) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 4998f3dbb9613abbf5ca67a3d43863d01483b79f..0ef6f5cbab5cce3439378a1da38bf1adebe2e39d 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -18,11 +18,10 @@ namespace paddle { namespace memory { namespace allocation { -bool Allocator::IsAllocThreadSafe() const { return false; } - -void Allocator::FreeImpl(Allocation* allocation) { - Allocator* allocator = allocation->TopDecoratedAllocator(); - allocator->Free(allocation); +void Allocator::FreeImpl(pten::Allocation* allocation) { + static_cast(allocation) + ->TopDecoratedAllocator() + ->Free(allocation); } } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index ee802462ddc943244fc9cbdbcd7cb8cdd52f8e47..3f04d47516377251011174b1382679ba41fdca02 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -22,6 +22,7 @@ #include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/core/allocator.h" DECLARE_string(allocator_strategy); @@ -80,30 +81,19 @@ class Allocator; * e.g., something what is done in AlignedAllocator, etc. * In this case, we should declare a derived class of Allocation, which * contains an underlying Allocation allocated by the underlying allocator. - * Therefore, `decorated_allocators_` of the new Allocation object would + * Therefore, `decorated_allocators_` of the new Allocation object + * would * be a new chain, differing from the underlying Allocation object. */ -class Allocation { +class Allocation : public pten::Allocation { public: - inline Allocation(void* ptr, size_t size, platform::Place place) - : ptr_(ptr), base_ptr_(ptr), size_(size), place_(place) {} - inline Allocation(void* ptr, void* base_ptr, size_t size, - platform::Place place) - : ptr_(ptr), base_ptr_(base_ptr), size_(size), place_(place) {} - - Allocation(const Allocation& o) = delete; - Allocation& operator=(const Allocation& o) = delete; - Allocation(Allocation&& o) = delete; - Allocation& operator=(Allocation&& o) = delete; - - // Returns the holding pointer. - // NOTE: For performance consideration, it is better not to make this method - // as a virtual method. If we want to implement a `defragmentation` later, - // we might need to make `ptr_` field as a protected field, and add a virtual - // method like `defragmentation` to change `ptr_`. - inline void* ptr() const { return ptr_; } - - inline void* base_ptr() const { + Allocation(void* ptr, size_t size, platform::Place place) + : pten::Allocation(ptr, size, place), base_ptr_(ptr) {} + Allocation(void* ptr, void* base_ptr, size_t size, + const platform::Place& place) + : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {} + + void* base_ptr() const { PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth", paddle::platform::errors::Unimplemented( "base_ptr() is only implemented for auto_growth " @@ -112,21 +102,6 @@ class Allocation { return base_ptr_; } - // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the - // last valid element. - // - // NOTE: Some allocator might alloc more memory than request. The size - // could larger than its request. For example, - // the AlignedAllocator will always allocate memory as size + kAlignment. - // The raw pointer might not aligned, so an offset might be added to raw - // the pointer. The size of this allocation will be - // `size + kAlignemnt - offset`. - inline size_t size() const { return size_; } - - inline const platform::Place& place() const { return place_; } - - virtual ~Allocation() {} - private: inline void RegisterDecoratedAllocator(Allocator* allocator) { decorated_allocators_.emplace_back(allocator); @@ -139,10 +114,7 @@ class Allocation { } private: - void* ptr_; void* base_ptr_; // the point that directly requested from system - size_t size_; - platform::Place place_; /** * NOTE(zjl): Since decorated_allocators_ is usually a small vector. @@ -162,53 +134,42 @@ class Allocation { friend class Allocator; }; +using AllocationPtr = pten::Allocator::AllocationPtr; +using DecoratedAllocationPtr = + std::unique_ptr; + // Base interface class of memory Allocator. -class Allocator { +class Allocator : public pten::Allocator { public: - virtual ~Allocator() {} - - class AllocationDeleter { - public: - inline void operator()(Allocation* allocation) const { - Allocator* allocator = allocation->TopDecoratedAllocator(); - allocator->Free(allocation); - } - }; - - using AllocationPtr = std::unique_ptr; + static void AllocationDeleter(pten::Allocation* allocation) { + Allocator* allocator = + static_cast(allocation)->TopDecoratedAllocator(); + allocator->Free(allocation); + } // Allocate an allocation. // size may be 0, but it would be too complex if we handle size == 0 // in each Allocator. So we handle size == 0 inside AllocatorFacade // in our design. - inline AllocationPtr Allocate(size_t size) { + AllocationPtr Allocate(size_t size) override { auto ptr = AllocateImpl(size); - ptr->RegisterDecoratedAllocator(this); - return AllocationPtr(ptr); + static_cast(ptr)->RegisterDecoratedAllocator(this); + return AllocationPtr(ptr, AllocationDeleter); } - // This function should not be called outside Allocator class - inline void Free(Allocation* allocation) { - allocation->PopDecoratedAllocator(); + void Free(pten::Allocation* allocation) { + static_cast(allocation)->PopDecoratedAllocator(); FreeImpl(allocation); } - inline uint64_t Release(const platform::Place& place) { - return ReleaseImpl(place); - } - - // True if the `Allocate` is thread safe. - virtual bool IsAllocThreadSafe() const; + uint64_t Release(const platform::Place& place) { return ReleaseImpl(place); } protected: - virtual Allocation* AllocateImpl(size_t size) = 0; - virtual void FreeImpl(Allocation* allocation); + virtual pten::Allocation* AllocateImpl(size_t size) = 0; + virtual void FreeImpl(pten::Allocation* allocation); virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; } }; -using AllocationDeleter = Allocator::AllocationDeleter; -using AllocationPtr = Allocator::AllocationPtr; - inline size_t AlignedSize(size_t size, size_t alignment) { auto remaining = size % alignment; return remaining == 0 ? size : size + alignment - remaining; @@ -220,6 +181,14 @@ inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) { return diff == 0 ? 0 : alignment - diff; } +template +decltype(auto) static_unique_ptr_cast(std::unique_ptr&& p) { + static_assert(std::is_base_of::value, + "Derived type must derive from Base."); + auto d = static_cast(p.release()); + return std::unique_ptr(d, p.get_deleter()); +} + } // namespace allocation } // namespace memory } // namespace paddle diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 9bc2f5461f383fbeba509e6de7e5a81f7f7e2780..474b4fe3d4522d5ba413f3d499406d7a33ab8e55 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -94,7 +94,7 @@ class CUDAGraphAllocator class PrivateAllocation : public Allocation { public: PrivateAllocation(CUDAGraphAllocator* allocator, - AllocationPtr underlying_allocation) + DecoratedAllocationPtr underlying_allocation) : Allocation( underlying_allocation->ptr(), underlying_allocation->base_ptr(), underlying_allocation->size(), underlying_allocation->place()), @@ -103,7 +103,7 @@ class CUDAGraphAllocator private: std::shared_ptr allocator_; - AllocationPtr underlying_allocation_; + DecoratedAllocationPtr underlying_allocation_; }; explicit CUDAGraphAllocator(const std::shared_ptr& allocator) @@ -116,12 +116,14 @@ class CUDAGraphAllocator } protected: - Allocation* AllocateImpl(size_t size) { + pten::Allocation* AllocateImpl(size_t size) { VLOG(10) << "Allocate " << size << " for CUDA Graph"; - return new PrivateAllocation(this, underlying_allocator_->Allocate(size)); + return new PrivateAllocation(this, + static_unique_ptr_cast( + underlying_allocator_->Allocate(size))); } - void FreeImpl(Allocation* allocation) { + void FreeImpl(pten::Allocation* allocation) { VLOG(10) << "delete for CUDA Graph"; delete allocation; } @@ -322,7 +324,7 @@ class AllocatorFacadePrivate { return static_cast(pool.Get(place))->stream(); } - void RecordStream(std::shared_ptr allocation, + void RecordStream(std::shared_ptr allocation, const gpuStream_t& stream) { if (allocation->size() == 0) { return; @@ -339,7 +341,7 @@ class AllocatorFacadePrivate { } const gpuStream_t& GetStream( - const std::shared_ptr& allocation) const { + const std::shared_ptr& allocation) const { const StreamSafeCUDAAllocation* stream_safe_cuda_allocation = dynamic_cast(allocation.get()); PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, @@ -391,10 +393,10 @@ class AllocatorFacadePrivate { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation* AllocateImpl(size_t size) override { + pten::Allocation* AllocateImpl(size_t size) override { return new Allocation(nullptr, 0, place_); } - void FreeImpl(Allocation* allocation) override { delete allocation; } + void FreeImpl(pten::Allocation* allocation) override { delete allocation; } private: platform::Place place_; @@ -820,9 +822,9 @@ const std::shared_ptr& AllocatorFacade::GetAllocator( return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); } -std::shared_ptr AllocatorFacade::AllocShared( +std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size) { - return std::shared_ptr(Alloc(place, size)); + return std::shared_ptr(Alloc(place, size)); } AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, @@ -866,7 +868,7 @@ uint64_t AllocatorFacade::Release(const platform::Place& place) { ->Release(place); } -std::shared_ptr AllocatorFacade::AllocShared( +std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, const platform::Stream& stream) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PADDLE_ENFORCE_EQ( @@ -884,14 +886,14 @@ std::shared_ptr AllocatorFacade::AllocShared( } #endif gpuStream_t s = reinterpret_cast(stream.id()); - return std::shared_ptr(Alloc(place, size, s)); + return std::shared_ptr(Alloc(place, size, s)); #else PADDLE_THROW(platform::errors::PreconditionNotMet("Not compiled with GPU.")); #endif } bool AllocatorFacade::InSameStream( - const std::shared_ptr& allocation, + const std::shared_ptr& allocation, const platform::Stream& stream) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PADDLE_ENFORCE_EQ( @@ -962,7 +964,7 @@ uint64_t AllocatorFacade::Release(const platform::CUDAPlace& place, return m_->GetAllocator(place, stream)->Release(place); } -void AllocatorFacade::RecordStream(std::shared_ptr allocation, +void AllocatorFacade::RecordStream(std::shared_ptr allocation, const gpuStream_t& stream) { PADDLE_ENFORCE_EQ( FLAGS_use_stream_safe_cuda_allocator, true, @@ -983,7 +985,7 @@ void AllocatorFacade::RecordStream(std::shared_ptr allocation, } const gpuStream_t& AllocatorFacade::GetStream( - const std::shared_ptr& allocation) const { + const std::shared_ptr& allocation) const { PADDLE_ENFORCE_EQ( FLAGS_use_stream_safe_cuda_allocator, true, platform::errors::Unimplemented( diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index d59ecaece5a70f461b8443d06488ba6f2fe5446b..76e2f0b5a94f6ddae8e8fb6281bdfcf70f10b76c 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -42,6 +42,7 @@ using NPUPinnedAllocator = paddle::memory::allocation::NPUPinnedAllocator; class AllocatorFacadePrivate; class AllocatorFacade { public: + using Allocation = pten::Allocation; AllocatorFacade(const AllocatorFacade& o) = delete; const AllocatorFacade& operator=(const AllocatorFacade& o) = delete; ~AllocatorFacade(); diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index dd2a65d889d8d98ddde9910d28d9ea68811862aa..ad62af8480f58381a1e77368d2a2613d82df2b64 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -45,7 +45,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)), allow_free_idle_chunk_(allow_free_idle_chunk) {} -Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { +pten::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( + size_t unaligned_size) { size_t size = AlignedSize(unaligned_size, alignment_); VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size; @@ -78,11 +79,13 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { size_t realloc_size = std::max(size, chunk_size_); try { - chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); + chunks_.emplace_back(static_unique_ptr_cast( + underlying_allocator_->Allocate(realloc_size))); } catch (BadAlloc &ex) { if (FLAGS_free_when_no_cache_hit) throw ex; FreeIdleChunks(); - chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); + chunks_.emplace_back(static_unique_ptr_cast( + underlying_allocator_->Allocate(realloc_size))); } auto *chunk = &(*chunks_.rbegin()); @@ -104,7 +107,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { return new BlockAllocation(block_it); } -void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { +void AutoGrowthBestFitAllocator::FreeImpl(pten::Allocation *allocation) { VLOG(10) << "Free " << allocation->size() << " bytes, ptr = " << allocation->ptr(); std::lock_guard guard(spinlock_); diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h index 2334a1b6d4d55285f49a08938d8625b818dddcc8..94aff93ec50f85b1725359d53fa64eb58c28219c 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h @@ -36,9 +36,9 @@ class AutoGrowthBestFitAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation *AllocateImpl(size_t size) override; + pten::Allocation *AllocateImpl(size_t size) override; - void FreeImpl(Allocation *allocation) override; + void FreeImpl(pten::Allocation *allocation) override; // Release the memory block which is not used in pool. uint64_t ReleaseImpl(const platform::Place &place) override { @@ -64,10 +64,10 @@ class AutoGrowthBestFitAllocator : public Allocator { }; struct Chunk { - explicit Chunk(AllocationPtr allocation) + explicit Chunk(DecoratedAllocationPtr allocation) : allocation_(std::move(allocation)) {} - AllocationPtr allocation_; + DecoratedAllocationPtr allocation_; List blocks_; }; diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc index 926af8292d2e8664af0b80273a6853d8a62f5cf7..5942fbe730e5705f9ed891aaa088d83fa06fcb67 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc @@ -28,12 +28,12 @@ namespace allocation { class RecordedAllocator : public Allocator { protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { allocated_size_ += size; return new Allocation(malloc(size), size, platform::CPUPlace()); } - void FreeImpl(Allocation *allocation) { + void FreeImpl(pten::Allocation *allocation) { allocated_size_ -= allocation->size(); free(allocation->ptr()); delete allocation; @@ -79,7 +79,7 @@ class LimitedResourceAllocator : public Allocator { size_t AllocatedSize() const { return allocated_size_; } protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { if (allocated_size_ + size > capacity_) { throw BadAlloc("", __FILE__, __LINE__); } @@ -88,7 +88,7 @@ class LimitedResourceAllocator : public Allocator { return new Allocation(malloc(size), size, platform::CPUPlace()); } - void FreeImpl(Allocation *allocation) { + void FreeImpl(pten::Allocation *allocation) { allocated_size_ -= allocation->size(); free(allocation->ptr()); delete allocation; diff --git a/paddle/fluid/memory/allocation/base_ptr_test.cu b/paddle/fluid/memory/allocation/base_ptr_test.cu index a34750a5e34ba161c73ad736ab2d282470d0db24..5edabfcb9f5e7efab1242da5f5c091bebcf74c11 100644 --- a/paddle/fluid/memory/allocation/base_ptr_test.cu +++ b/paddle/fluid/memory/allocation/base_ptr_test.cu @@ -37,7 +37,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { size_t size = dis_(random_engine_); AllocationPtr allocation = Alloc(place_, size); - void* base_ptr = allocation->base_ptr(); + void* base_ptr = static_cast(allocation.get())->base_ptr(); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); @@ -56,7 +56,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { size_t size = dis_(random_engine_); AllocationPtr allocation = Alloc(place_, size); - void* base_ptr = allocation->base_ptr(); + void* base_ptr = static_cast(allocation.get())->base_ptr(); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); @@ -77,7 +77,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { size_t size = dis_(random_engine_); AllocationPtr allocation = Alloc(place_, size); - void* base_ptr = allocation->base_ptr(); + void* base_ptr = static_cast(allocation.get())->base_ptr(); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); @@ -91,7 +91,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { void ZeroSizeAllocTest() { AllocationPtr allocation = Alloc(place_, 0); - void* base_ptr = allocation->base_ptr(); + void* base_ptr = static_cast(allocation.get())->base_ptr(); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index 0955b5212622f775e3e17568b4e46d0e2d9210c3..3cba70bd5b502d7de425cd673e26fa4993861b00 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -33,7 +33,7 @@ static int HighestBitPos(size_t N) { } } -BestFitAllocator::BestFitAllocator(Allocation* allocation) +BestFitAllocator::BestFitAllocator(pten::Allocation* allocation) : allocation_(allocation) { details::Chunk chunk; chunk.size_ = allocation_->size(); @@ -115,7 +115,7 @@ size_t BestFitAllocator::NumFreeChunks() const { } return num; } -void BestFitAllocator::FreeImpl(Allocation* allocation) { +void BestFitAllocator::FreeImpl(pten::Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL( bf_allocation, @@ -150,7 +150,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) { InsertFreeNode(chunk_it); delete allocation; } -Allocation* BestFitAllocator::AllocateImpl(size_t size) { +pten::Allocation* BestFitAllocator::AllocateImpl(size_t size) { auto highest_set_bit = static_cast(HighestBitPos(size)); MapIt map_it; for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 42f69e6d704af6139aafa51ae22b16d56e9ff38c..297d876178f3d14c69c3a75a970c80b733c8e03f 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -108,7 +108,7 @@ class BestFitAllocation : public Allocation { // the prev-chunk and the next-chunk when possible. class BestFitAllocator : public Allocator { public: - explicit BestFitAllocator(Allocation* allocation); + explicit BestFitAllocator(pten::Allocation* allocation); void* BasePtr() const { return allocation_->ptr(); } @@ -127,11 +127,11 @@ class BestFitAllocator : public Allocator { void InsertFreeNode(const ListIt& it); protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; private: - Allocation* allocation_; // not owned + pten::Allocation* allocation_; // not owned details::ChunkList chunks_; details::FreeChunkBin free_chunks_; }; diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index 325cb010bf46629ca2dcbef2a6c7d184d39bc5d9..11739ebba955f69fb07e4b7084b71d60bdcade3e 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -46,12 +46,13 @@ void BufferedAllocator::FreeCache(size_t size) { bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } -void BufferedAllocator::FreeImpl(Allocation *allocation) { +void BufferedAllocator::FreeImpl(pten::Allocation *allocation) { platform::LockGuardPtr guard(mtx_); - allocations_.emplace(allocation->size(), AllocationPtr(allocation)); + allocations_.emplace(allocation->size(), + AllocationPtr(allocation, Allocator::AllocationDeleter)); } -Allocation *BufferedAllocator::AllocateImpl(size_t size) { +pten::Allocation *BufferedAllocator::AllocateImpl(size_t size) { { platform::LockGuardPtr guard(mtx_); auto it = allocations_.lower_bound(size); diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index 5e1733bd839de2c0e219a40c569e492b1aca1d56..0ccccef573963e772baffefb5a7cded1c15b0b2c 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -45,8 +45,8 @@ class BufferedAllocator : public Allocator { void FreeCache(size_t size); protected: - void FreeImpl(Allocation *allocation) override; - Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index 0bfa10a1616b67ebe258d30819184dc7aac76968..21c30efccd8ada2853f4375d2266c75ddfd8002f 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -27,7 +27,7 @@ namespace memory { namespace allocation { inline std::unique_ptr GetBufferedAllocator( - Allocation *allocation, bool thread_safe) { + pten::Allocation *allocation, bool thread_safe) { std::unique_ptr allocator(new BestFitAllocator(allocation)); if (thread_safe) { allocator.reset(new LockedAllocator(std::move(allocator))); @@ -68,7 +68,7 @@ class StubAllocator : public Allocator { size_t GetFreeCount() const { return destruct_count_; } protected: - void FreeImpl(Allocation *allocation) override { + void FreeImpl(pten::Allocation *allocation) override { auto *alloc = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL( alloc, platform::errors::InvalidArgument( @@ -77,7 +77,7 @@ class StubAllocator : public Allocator { ++destruct_count_; delete allocation; } - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { ++construct_count_; if (size == 0) { return new StubAllocation(nullptr, 0, platform::CPUPlace()); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index 128591f5a8d3e6e5ef4ca29dd97f265116635d04..bf0bd891be26ff654b3945b77ce715d260819183 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -24,7 +24,7 @@ namespace allocation { bool CPUAllocator::IsAllocThreadSafe() const { return true; } -void CPUAllocator::FreeImpl(Allocation *allocation) { +void CPUAllocator::FreeImpl(pten::Allocation *allocation) { void *p = allocation->ptr(); #ifdef _WIN32 _aligned_free(p); @@ -34,7 +34,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) { delete allocation; } -Allocation *CPUAllocator::AllocateImpl(size_t size) { +pten::Allocation *CPUAllocator::AllocateImpl(size_t size) { void *p; #ifdef _WIN32 p = _aligned_malloc(size, kAlignment); diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 058ff63381658da698841c839425dec000a748da..a64089dd2de42a1ad43b18103f16e83e94af289c 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -37,8 +37,8 @@ class CPUAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; }; } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 4242083f2e617af5e7dc8456746b3ca9738dc3f8..ff9bbf4ab3df8194d1efbf5a2df3222c2fb2ac70 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -32,7 +32,7 @@ namespace paddle { namespace memory { namespace allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } -void CUDAAllocator::FreeImpl(Allocation* allocation) { +void CUDAAllocator::FreeImpl(pten::Allocation* allocation) { PADDLE_ENFORCE_EQ( BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_, platform::errors::PermissionDenied( @@ -42,7 +42,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) { delete allocation; } -Allocation* CUDAAllocator::AllocateImpl(size_t size) { +pten::Allocation* CUDAAllocator::AllocateImpl(size_t size) { std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); }); void* ptr; diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 5969d4d20ddee498c8257412cc9d8fa7177d475b..57e85a3dc21d1cef83e0ff38c2bcc7fab5c3c001 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -28,8 +28,8 @@ class CUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; private: platform::CUDAPlace place_; diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h index 33cf2fe05424778b88eae135f582d3d39405e55a..a6696634c12d4abe48d05d7572cf89a74742e92b 100644 --- a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h @@ -41,7 +41,7 @@ namespace allocation { */ class CUDADeviceContextAllocation : public Allocation { public: - explicit CUDADeviceContextAllocation(AllocationPtr allocation) + explicit CUDADeviceContextAllocation(DecoratedAllocationPtr allocation) : Allocation(allocation->ptr(), allocation->base_ptr(), allocation->size(), allocation->place()), underlying_allocation_(std::move(allocation)) {} @@ -56,7 +56,7 @@ class CUDADeviceContextAllocation : public Allocation { << p_allocation; dev_ctx_->AddStreamCallback([p_allocation] { VLOG(4) << "Delete CUDADeviceContextAllocation at " << p_allocation; - AllocationDeleter()(p_allocation); + Allocator::AllocationDeleter(p_allocation); }); } @@ -65,7 +65,7 @@ class CUDADeviceContextAllocation : public Allocation { } private: - AllocationPtr underlying_allocation_; + DecoratedAllocationPtr underlying_allocation_; const platform::CUDADeviceContext *dev_ctx_{nullptr}; }; @@ -102,14 +102,14 @@ class CUDADeviceContextAllocator : public Allocator { } protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { PADDLE_ENFORCE_NOT_NULL( default_stream_, platform::errors::PreconditionNotMet( "Default stream is not set for CUDADeviceContextAllocator")); platform::CUDADeviceGuard guard(place_.device); - auto allocation = - new CUDADeviceContextAllocation(memory::Alloc(place_, size)); + auto allocation = new CUDADeviceContextAllocation( + static_unique_ptr_cast(memory::Alloc(place_, size))); // Wait for the event on stream #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, default_stream_)); @@ -121,7 +121,7 @@ class CUDADeviceContextAllocator : public Allocator { return allocation; } - void FreeImpl(Allocation *allocation) override { delete allocation; } + void FreeImpl(pten::Allocation *allocation) override { delete allocation; } private: platform::CUDAPlace place_; diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc index f4baca8288f03ca9073dd628ce772f383b104331..2ae2cf20ee6d4b6e90139b83436d00b3136ea11a 100644 --- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc @@ -101,7 +101,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator( bool CUDAVirtualMemAllocator::IsAllocThreadSafe() const { return false; } -void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) { +void CUDAVirtualMemAllocator::FreeImpl(pten::Allocation* allocation) { PADDLE_ENFORCE_EQ( BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_, platform::errors::PermissionDenied( @@ -140,7 +140,7 @@ void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) { delete allocation; } -Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) { +pten::Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) { size = AlignedSize(size, granularity_); CUdeviceptr ptr = virtual_mem_base_ + virtual_mem_alloced_offset_; diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h index c51b56566bb0254d3c6694a9f6411e71ab5c4dbb..0e1e59d200d91358b6be8b0c5dbd3e4ef1e68cb5 100644 --- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h @@ -37,8 +37,8 @@ class CUDAVirtualMemAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; private: platform::CUDAPlace place_; diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index 6e8f870b235ff0f5ab364f7813ef7d00985a3307..a0c8efddbd80d32223697ff4790cde31c6b64730 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -37,12 +37,12 @@ LockedAllocator::LockedAllocator( } } -void LockedAllocator::FreeImpl(Allocation *allocation) { +void LockedAllocator::FreeImpl(pten::Allocation *allocation) { platform::LockGuardPtr guard(mtx_); underlying_allocator_->Free(allocation); } -Allocation *LockedAllocator::AllocateImpl(size_t size) { +pten::Allocation *LockedAllocator::AllocateImpl(size_t size) { platform::LockGuardPtr guard(mtx_); return underlying_allocator_->Allocate(size).release(); } diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index 1b8418bc8494a270645935f13dd3c8ea62849ed2..d17c8b24e27bdb155314264abc070401127d68ca 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -29,8 +29,8 @@ class LockedAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; - Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; private: std::shared_ptr underlying_allocator_; diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index 8710bbe6ce98bfb0d6e2f141359ecafc6ee22689..ffe7ccf9190bebeeaf014310031ed026b9af4ba8 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -790,7 +790,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const { namespace allocation { -Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) { +pten::Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) { void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_); auto *tmp_alloc = new Allocation(ptr, size, place_); platform::MemEvenRecorder::Instance().PushMemRecord( @@ -798,7 +798,7 @@ Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) { return tmp_alloc; } -void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) { +void NaiveBestFitAllocator::FreeImpl(pten::Allocation *allocation) { boost::apply_visitor( legacy::FreeVisitor(allocation->ptr(), allocation->size()), allocation->place()); diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h index 474a308a064fdbc6d8e181fec0bf4aababed13e0..b7b3647ff98c157c743a34b8e7fb5bdfd3e00432 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h @@ -34,8 +34,8 @@ class NaiveBestFitAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation *AllocateImpl(size_t size) override; - void FreeImpl(Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; uint64_t ReleaseImpl(const platform::Place &place) override; private: diff --git a/paddle/fluid/memory/allocation/npu_allocator.cc b/paddle/fluid/memory/allocation/npu_allocator.cc index 074a900cf54636908d057fb2a66d9e36ee0694dd..d9fa7ec27fdde6ae1c579ba808425a6bd901fe78 100644 --- a/paddle/fluid/memory/allocation/npu_allocator.cc +++ b/paddle/fluid/memory/allocation/npu_allocator.cc @@ -22,7 +22,7 @@ namespace memory { namespace allocation { bool NPUAllocator::IsAllocThreadSafe() const { return true; } -void NPUAllocator::FreeImpl(Allocation* allocation) { +void NPUAllocator::FreeImpl(pten::Allocation* allocation) { PADDLE_ENFORCE_EQ( BOOST_GET_CONST(platform::NPUPlace, allocation->place()), place_, platform::errors::PermissionDenied( @@ -32,7 +32,7 @@ void NPUAllocator::FreeImpl(Allocation* allocation) { delete allocation; } -Allocation* NPUAllocator::AllocateImpl(size_t size) { +pten::Allocation* NPUAllocator::AllocateImpl(size_t size) { std::call_once(once_flag_, [this] { platform::SetNPUDeviceId(place_.device); }); diff --git a/paddle/fluid/memory/allocation/npu_allocator.h b/paddle/fluid/memory/allocation/npu_allocator.h index bf668973505bab0b00b2da6111709e27236ffea6..88b0c9a24bb3d1b836a6ff356ff6284acf1aa8f9 100644 --- a/paddle/fluid/memory/allocation/npu_allocator.h +++ b/paddle/fluid/memory/allocation/npu_allocator.h @@ -28,8 +28,8 @@ class NPUAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; private: platform::NPUPlace place_; diff --git a/paddle/fluid/memory/allocation/npu_pinned_allocator.cc b/paddle/fluid/memory/allocation/npu_pinned_allocator.cc index 292fe15c5d952fd68e38cb8d9beb3f98d80dd45e..2389973fa9b886eedc9ea992493289abca0a480a 100644 --- a/paddle/fluid/memory/allocation/npu_pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/npu_pinned_allocator.cc @@ -26,7 +26,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() { platform::NPUEventQuery(event, &status); if (status == ACL_EVENT_STATUS_COMPLETE) { - Allocation *allocation = it->first; + auto *allocation = it->first; void *ptr = allocation->ptr(); free(ptr); npu_events_.erase(it++); @@ -38,7 +38,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() { } } -Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) { +pten::Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) { std::lock_guard lock(mtx_); ProcessEventsAndFree(); void *ptr; @@ -50,7 +50,7 @@ Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) { return new Allocation(ptr, size, platform::NPUPinnedPlace()); } -void NPUPinnedAllocator::FreeImpl(Allocation *allocation) { +void NPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) { std::lock_guard lock(mtx_); void *ptr = allocation->ptr(); auto iter = npu_events_.find(allocation); @@ -83,7 +83,7 @@ uint64_t NPUPinnedAllocator::ReleaseImpl(const platform::Place &place) { return static_cast(0); } -void NPUPinnedAllocator::RecordEvent(Allocation *allocation, +void NPUPinnedAllocator::RecordEvent(pten::Allocation *allocation, aclrtStream stream) { std::lock_guard lock(mtx_); aclrtEvent event = nullptr; diff --git a/paddle/fluid/memory/allocation/npu_pinned_allocator.h b/paddle/fluid/memory/allocation/npu_pinned_allocator.h index 1d3f8bf1e449d7246301b0d99814aedca52a8fc7..716b12eea15f84836a43cf3365116a7f26a88b3f 100644 --- a/paddle/fluid/memory/allocation/npu_pinned_allocator.h +++ b/paddle/fluid/memory/allocation/npu_pinned_allocator.h @@ -32,16 +32,16 @@ class NPUPinnedAllocator : public Allocator { public: bool IsAllocThreadSafe() const override { return true; } void ProcessEventsAndFree(); - void RecordEvent(Allocation *allocation, aclrtStream stream); + void RecordEvent(pten::Allocation *allocation, aclrtStream stream); constexpr static size_t kAlignment = 4096UL; protected: - Allocation *AllocateImpl(size_t size) override; - void FreeImpl(Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; uint64_t ReleaseImpl(const platform::Place &place) override; private: - std::unordered_map npu_events_; + std::unordered_map npu_events_; mutable std::mutex mtx_; }; diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index c56a7235c109ca0ab3210a0fa1e9e21fce7355c3..f1175fc4374e77000b2c24714d6552684ea6fb8b 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -18,7 +18,7 @@ namespace paddle { namespace memory { namespace allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } -void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { +void CPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) { #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS(hipHostFree(allocation->ptr())); #else @@ -26,7 +26,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { #endif delete allocation; } -Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) { +pten::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) { void *ptr; #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS(hipHostMalloc(&ptr, size, hipHostMallocPortable)); diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h index 4f535ef33734a3c6f7048ae6538e4332e0c9e8e4..800e3ff3bb2e39af623d444c6dfa7f12f55d5521 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.h +++ b/paddle/fluid/memory/allocation/pinned_allocator.h @@ -25,8 +25,8 @@ class CPUPinnedAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - void FreeImpl(Allocation *allocation) override; - Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; }; } // namespace allocation diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 1607af3808b434629fad47657a6b50c2a1b10028..856b6c2e9a2b084c47204837a5c859b9edcdad63 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -39,7 +39,7 @@ class WaitedAllocateSizeGuard { size_t requested_size_; }; -void RetryAllocator::FreeImpl(Allocation* allocation) { +void RetryAllocator::FreeImpl(pten::Allocation* allocation) { // Delete underlying allocation first. size_t size = allocation->size(); underlying_allocator_->Free(allocation); @@ -51,7 +51,7 @@ void RetryAllocator::FreeImpl(Allocation* allocation) { } } -Allocation* RetryAllocator::AllocateImpl(size_t size) { +pten::Allocation* RetryAllocator::AllocateImpl(size_t size) { auto alloc_func = [&, this]() { return underlying_allocator_->Allocate(size).release(); }; diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h index 031a5e2b97f178eb002b578532bb1c615f29caf8..b427a37907a671f9dd86888ac6ed50655da9af25 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.h +++ b/paddle/fluid/memory/allocation/retry_allocator.h @@ -45,8 +45,8 @@ class RetryAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - void FreeImpl(Allocation* allocation) override; - Allocation* AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation* allocation) override; + pten::Allocation* AllocateImpl(size_t size) override; uint64_t ReleaseImpl(const platform::Place& place) override { return underlying_allocator_->Release(place); } diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc index 787f3d9dca377d54d057241d4e51ff4164c294cc..d636c73e07a18d08ef10ebd2f77601c399fd1464 100644 --- a/paddle/fluid/memory/allocation/retry_allocator_test.cc +++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc @@ -98,12 +98,12 @@ class DummyAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( "Here is a test exception, always BadAlloc.")); } - void FreeImpl(Allocation *) override {} + void FreeImpl(pten::Allocation *) override {} }; TEST(RetryAllocator, RetryAllocatorLastAllocFailure) { diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc index a4f766f1d1abc66ccd39d493e8c4abc591258e8d..05c6a7adaff8b3904e407354ece6e907d4b4ac5e 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc @@ -19,7 +19,7 @@ namespace memory { namespace allocation { StreamSafeCUDAAllocation::StreamSafeCUDAAllocation( - AllocationPtr underlying_allocation, gpuStream_t owning_stream) + DecoratedAllocationPtr underlying_allocation, gpuStream_t owning_stream) : Allocation(underlying_allocation->ptr(), underlying_allocation->base_ptr(), underlying_allocation->size(), underlying_allocation->place()), @@ -116,7 +116,7 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() { bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; } -Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { +pten::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { ProcessUnfreedAllocations(); VLOG(8) << "Try allocate " << size << " bytes"; AllocationPtr underlying_allocation; @@ -136,13 +136,14 @@ Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { throw; } StreamSafeCUDAAllocation* allocation = new StreamSafeCUDAAllocation( - std::move(underlying_allocation), default_stream_); + static_unique_ptr_cast(std::move(underlying_allocation)), + default_stream_); VLOG(8) << "Allocate " << allocation->size() << " bytes at address " << allocation->ptr(); return allocation; } -void StreamSafeCUDAAllocator::FreeImpl(Allocation* allocation) { +void StreamSafeCUDAAllocator::FreeImpl(pten::Allocation* allocation) { StreamSafeCUDAAllocation* stream_safe_cuda_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h index d84994f58a9c40e7bc2f4adc64a01ca667104382..f54cdc749611a6e951beb74e9e0cc044ee212e92 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h @@ -34,7 +34,7 @@ namespace allocation { class StreamSafeCUDAAllocation : public Allocation { public: - StreamSafeCUDAAllocation(AllocationPtr underlying_allocation, + StreamSafeCUDAAllocation(DecoratedAllocationPtr underlying_allocation, gpuStream_t owning_stream); void RecordStream(const gpuStream_t &stream); bool CanBeFreed(); @@ -42,7 +42,7 @@ class StreamSafeCUDAAllocation : public Allocation { const gpuStream_t &GetOwningStream() const; private: - AllocationPtr underlying_allocation_; + DecoratedAllocationPtr underlying_allocation_; std::map outstanding_event_map_; gpuStream_t owning_stream_; SpinLock outstanding_event_map_lock_; @@ -57,8 +57,8 @@ class StreamSafeCUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override; protected: - Allocation *AllocateImpl(size_t size) override; - void FreeImpl(Allocation *allocation) override; + pten::Allocation *AllocateImpl(size_t size) override; + void FreeImpl(pten::Allocation *allocation) override; uint64_t ReleaseImpl(const platform::Place &place) override; private: diff --git a/paddle/fluid/memory/allocation/test_aligned_allocator.cc b/paddle/fluid/memory/allocation/test_aligned_allocator.cc index 3eb1f140edd8471e32fbb68c24519d504e13e08c..987c7ea772d23af763960f4092bbb4fc45aad310 100644 --- a/paddle/fluid/memory/allocation/test_aligned_allocator.cc +++ b/paddle/fluid/memory/allocation/test_aligned_allocator.cc @@ -32,12 +32,12 @@ struct StubAllocator : public Allocator { size_t AllocNum() const { return alloc_num_; } protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { ++alloc_num_; return new Allocation(new uint8_t[size], size, platform::CPUPlace()); } - void FreeImpl(Allocation *allocation) override { + void FreeImpl(pten::Allocation *allocation) override { delete[] static_cast(allocation->ptr()); delete allocation; --alloc_num_; diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.h b/paddle/fluid/memory/allocation/thread_local_allocator.h index c55f579981b00501e830257d7097a06f7c623fb7..9c9306517021a0a9a55547b2be8c42dfc50e841e 100644 --- a/paddle/fluid/memory/allocation/thread_local_allocator.h +++ b/paddle/fluid/memory/allocation/thread_local_allocator.h @@ -83,11 +83,11 @@ class ThreadLocalCUDAAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation* AllocateImpl(size_t size) override { + pten::Allocation* AllocateImpl(size_t size) override { return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->AllocateImpl( size); } - void FreeImpl(Allocation* allocation) override { + void FreeImpl(pten::Allocation* allocation) override { auto* tl_allocation = static_cast(allocation); auto allocator_impl = tl_allocation->GetAllocator(); allocator_impl->FreeImpl(tl_allocation); diff --git a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc index 5c7e8e2d933f30c6ab247801459678a2b2a42e9b..face27debe9ff10b1967533a7d3afa4286183cf7 100644 --- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc @@ -35,7 +35,8 @@ VirtualMemoryAutoGrowthBestFitAllocator:: alignment_(alignment), place_(place) {} -Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) { +pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl( + size_t size) { std::lock_guard guard(spinlock_); size = AlignedSize(size, alignment_); auto result = AllocFromFreeBlocks(size); @@ -48,7 +49,8 @@ Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) { return result; } -void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { +void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl( + pten::Allocation *allocation) { std::lock_guard guard(spinlock_); auto block_it = static_cast(allocation)->block_it_; TryMergeBlock2Blocks(block_it); @@ -225,7 +227,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) { } } -Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks( +pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks( size_t size) { auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr)); if (iter != free_blocks_.end()) { diff --git a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h index 5171e5b3cd1bf3111c003f453582fc3d02d06f45..10bf0bbf49d5a075cfb0fbef22ef9109604d7d18 100644 --- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h @@ -60,12 +60,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - Allocation *AllocateImpl(size_t size) override; + pten::Allocation *AllocateImpl(size_t size) override; - void FreeImpl(Allocation *allocation) override; + void FreeImpl(pten::Allocation *allocation) override; private: - Allocation *AllocFromFreeBlocks(size_t size); + pten::Allocation *AllocFromFreeBlocks(size_t size); void ExtendAndMerge(size_t size); void TryMergeBlock2Blocks(std::list::iterator iter); diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 7069fb46203d6d0d96be51e556806e467d019ba0..8830c46a1779846503c039732f9fb1c9f316db52 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -28,7 +28,7 @@ class DeviceContext; namespace memory { -using allocation::Allocation; +using pten::Allocation; using allocation::Allocator; using allocation::AllocationPtr; diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index bc2d496a3e76a8fa620dcf17a0cb4818516ab302..6892f7ce4e50348978dae30857a485f380030878 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -336,9 +336,8 @@ class ConcatFunctor { auto* data_alloc_released = data_alloc.release(); auto* col_alloc_released = col_alloc.release(); context.AddStreamCallback([data_alloc_released, col_alloc_released] { - memory::allocation::AllocationDeleter deleter; - deleter(data_alloc_released); - deleter(col_alloc_released); + memory::allocation::Allocator::AllocationDeleter(data_alloc_released); + memory::allocation::Allocator::AllocationDeleter(col_alloc_released); }); #endif } @@ -466,9 +465,8 @@ class SplitFunctor { auto* data_alloc_released = data_alloc.release(); auto* cols_alloc_released = cols_alloc.release(); context.AddStreamCallback([data_alloc_released, cols_alloc_released] { - memory::allocation::AllocationDeleter deleter; - deleter(data_alloc_released); - deleter(cols_alloc_released); + memory::allocation::Allocator::AllocationDeleter(data_alloc_released); + memory::allocation::Allocator::AllocationDeleter(cols_alloc_released); }); #endif } diff --git a/paddle/fluid/platform/device/mlu/device_context_allocator.h b/paddle/fluid/platform/device/mlu/device_context_allocator.h index 9deab92af5cd6d31121637202215a3008d0c594c..408016c0f0d99c15ea5e9ea1516bc69c9c2a9bdc 100644 --- a/paddle/fluid/platform/device/mlu/device_context_allocator.h +++ b/paddle/fluid/platform/device/mlu/device_context_allocator.h @@ -55,7 +55,7 @@ class MLUDeviceContextAllocation : public Allocation { << p_allocation; dev_ctx_->AddStreamCallback([p_allocation] { VLOG(4) << "Delete MLUDeviceContextAllocation at " << p_allocation; - AllocationDeleter()(p_allocation); + Allocator::AllocationDeleter(p_allocation); }); } @@ -91,7 +91,7 @@ class MLUDeviceContextAllocator : public Allocator { } protected: - Allocation *AllocateImpl(size_t size) override { + pten::Allocation *AllocateImpl(size_t size) override { PADDLE_ENFORCE_NOT_NULL( default_stream_, platform::errors::PreconditionNotMet( @@ -105,7 +105,7 @@ class MLUDeviceContextAllocator : public Allocator { return allocation; } - void FreeImpl(Allocation *allocation) override { delete allocation; } + void FreeImpl(pten::Allocation *allocation) override { delete allocation; } private: platform::MLUPlace place_; diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.h b/paddle/fluid/platform/device/npu/npu_op_runner.h index e83057e682fef2fff739484e77037bef168c5657..c049da3b335668ef12940ba7c61cd7cd0ea17645 100644 --- a/paddle/fluid/platform/device/npu/npu_op_runner.h +++ b/paddle/fluid/platform/device/npu/npu_op_runner.h @@ -158,8 +158,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(npu_pinned_place) .get()); - paddle::memory::allocation::Allocation *allocation = - npu_pinned_tensor.Holder().get(); + pten::Allocation *allocation = npu_pinned_tensor.Holder().get(); npu_pinned_allocator->RecordEvent(allocation, GetCurrentNPUStream()); } else { diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 3f8923440be50220cd9087459e758522c0bc4b9a..659df6b9b44def9b0c5d4737e96b3de3aca98f73 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -53,7 +53,7 @@ size_t PyArray_Size_(PyObject* numpy_data) { return res; } -class EagerNumpyAllocation : public paddle::memory::allocation::Allocation { +class EagerNumpyAllocation : public pten::Allocation { public: explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype) : Allocation( diff --git a/paddle/pten/api/lib/utils/CMakeLists.txt b/paddle/pten/api/lib/utils/CMakeLists.txt index 4a44ad7758b56e3f0131600ea981bba23bf7b004..a4db8c4b193b64f111a019fe21bec6895610e65e 100644 --- a/paddle/pten/api/lib/utils/CMakeLists.txt +++ b/paddle/pten/api/lib/utils/CMakeLists.txt @@ -1,2 +1,2 @@ -cc_library(pten_api_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS +cc_library(pten_api_utils SRCS storage.cc tensor_utils.cc DEPS tensor_base convert_utils dense_tensor lod_tensor selected_rows place var_type_traits) diff --git a/paddle/pten/api/lib/utils/allocator.cc b/paddle/pten/api/lib/utils/allocator.cc deleted file mode 100644 index e80152431e71292fcd34c96108c191f8c799a220..0000000000000000000000000000000000000000 --- a/paddle/pten/api/lib/utils/allocator.cc +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/pten/api/lib/utils/allocator.h" - -namespace paddle { -namespace experimental { - -memory::Allocator::AllocationDeleter DefaultAllocator::deleter_; - -} // namespace experimental -} // namespace paddle diff --git a/paddle/pten/api/lib/utils/allocator.h b/paddle/pten/api/lib/utils/allocator.h index 4f5a810e400ce471ae05b4fef8a6b25a3465ab23..a8c05b76516892786a118f55763bc43e3d7f8742 100644 --- a/paddle/pten/api/lib/utils/allocator.h +++ b/paddle/pten/api/lib/utils/allocator.h @@ -22,14 +22,15 @@ limitations under the License. */ namespace paddle { namespace experimental { -class DefaultAllocator : public pten::Allocator { +class DefaultAllocator : public pten::deprecated::Allocator { public: - using Allocation = pten::Allocation; + using Allocation = pten::deprecated::Allocation; explicit DefaultAllocator(const paddle::platform::Place& place) : place_(place) {} static void Delete(Allocation* allocation) { - deleter_(allocation->CastContextWithoutCheck()); + paddle::memory::allocation::Allocator::AllocationDeleter( + allocation->CastContextWithoutCheck()); } Allocation Allocate(size_t bytes_size) override { @@ -42,7 +43,6 @@ class DefaultAllocator : public pten::Allocator { private: paddle::platform::Place place_; - static paddle::memory::Allocator::AllocationDeleter deleter_; }; } // namespace experimental diff --git a/paddle/pten/api/lib/utils/storage.cc b/paddle/pten/api/lib/utils/storage.cc index 9ee1b9e5b7f92ee4a709ffcb2eafafb6c6bca4ac..6116a709d506560285d33fde16a9b774972f9c69 100644 --- a/paddle/pten/api/lib/utils/storage.cc +++ b/paddle/pten/api/lib/utils/storage.cc @@ -20,14 +20,13 @@ namespace experimental { ExternalStorage::ExternalStorage(void* ptr, size_t size, const paddle::platform::Place& place) - : pten::Storage( - std::make_shared(ptr, size, place)), + : pten::Storage(std::make_shared(ptr, size, place)), size_(size) {} ExternalStorage::ExternalStorage(const pten::intrusive_ptr& root, size_t delta, size_t size) - : Storage(std::make_shared( + : Storage(std::make_shared( static_cast(root->data()) + delta, size, root->place())), size_(size) { PADDLE_ENFORCE_LE(static_cast(delta + size), diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 69a1fc274a28db254db90b9e87bb5fe0d42b8bf6..0b6cb8d95cc1a5721caef9de05b97573e195d6bd 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -307,7 +307,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { dst->Resize(src->dims()); dst->set_type(pten::TransToProtoVarType(src->dtype())); auto storage = src->release(); - std::shared_ptr holder( + std::shared_ptr holder( new TensorStorage(std::move(storage))); dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype())); dst->set_offset(src->meta().offset); diff --git a/paddle/pten/core/allocator.h b/paddle/pten/core/allocator.h index 74455be1368343306e3d0a4bc7d1581ab2b210c0..2647490c9f58ba9f4e905654c70e5a30a73cdebc 100644 --- a/paddle/pten/core/allocator.h +++ b/paddle/pten/core/allocator.h @@ -16,8 +16,10 @@ limitations under the License. */ #include #include "paddle/fluid/platform/place.h" +#include "paddle/pten/core/candidate/allocator.h" namespace pten { +namespace deprecated { /// \brief Encapsulates strategies for access/addressing, allocation/ /// deallocation and construction/destruction of objects. @@ -147,4 +149,5 @@ inline Allocation Allocate(const std::shared_ptr& a, size_t n) { return a->Allocate(n); } +} // namespace deprecated } // namespace pten diff --git a/paddle/pten/core/candidate/allocator.h b/paddle/pten/core/candidate/allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..75d42c4fd15cb13e10c86dd1f1b42700a53b83bd --- /dev/null +++ b/paddle/pten/core/candidate/allocator.h @@ -0,0 +1,107 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/platform/place.h" + +namespace pten { + +/// \brief Fancy pointer with deleter. The use of this data type +/// is to be compatible with allocators from different frameworks +/// without significant performance loss. This class does not +/// support being inherited. +class Allocation { + public: + using Place = paddle::platform::Place; + using DeleterFnPtr = void (*)(Allocation*); + + Allocation() = default; + + // Don't own resources, only provide access. + Allocation(void* data, size_t size, const Place& place) + : ptr_(data), size_(size), place_(place) {} + + // Own resources. + Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place) + : ptr_(data), size_(size), deleter_(deleter), place_(place) {} + + Allocation(Allocation&& other) noexcept { swap(*this, other); } + Allocation& operator=(Allocation&& other) noexcept { + // Exchange them explicitly to avoid moving is equivalent + // to copying. + swap(*this, other); + return *this; + } + + virtual ~Allocation() { + if (deleter_) { + deleter_(this); + } + } + + // Returns the holding pointer. + // NOTE: For performance consideration, it is better not to make this method + // as a virtual method. If we want to implement a `defragmentation` later, + // we might need to make `ptr_` field as a protected field, and add a virtual + // method like `defragmentation` to change `ptr_`. + void* ptr() const noexcept { return ptr_; } + + // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the + // last valid element. + // + // NOTE: Some allocator might alloc more memory than request. The size + // could larger than its request. For example, + // the AlignedAllocator will always allocate memory as size + kAlignment. + // The raw pointer might not aligned, so an offset might be added to raw + // the pointer. The size of this allocation will be + // `size + kAlignemnt - offset`. + size_t size() const noexcept { return size_; } + + void* operator->() const noexcept { return ptr_; } + operator bool() const noexcept { return ptr_; } + const Place& place() const noexcept { return place_; } + DeleterFnPtr deleter() const noexcept { return deleter_; } + + protected: + friend void swap(Allocation& a, Allocation& b) noexcept; + void* ptr_{nullptr}; + size_t size_{}; + DeleterFnPtr deleter_{nullptr}; + // TODO(Shixiaowei02): Enum needs to be used instead to reduce + // the construction overhead by more than 50%. + Place place_; +}; + +inline void swap(Allocation& a, Allocation& b) noexcept { + ::std::swap(a.ptr_, b.ptr_); + ::std::swap(a.deleter_, b.deleter_); + ::std::swap(a.place_, b.place_); + ::std::swap(a.size_, b.size_); +} + +class Allocator { + public: + using DeleterType = std::function; + using AllocationPtr = std::unique_ptr; + + virtual ~Allocator() = default; + virtual AllocationPtr Allocate(size_t bytes_size) = 0; + + virtual bool IsAllocThreadSafe() const { return false; } +}; + +} // namespace pten diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 1502accd197be6fddd1d0849e7373bebea7adf8b..1802a2461158fc9952857cce7d48ba42229405ed 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -60,6 +60,8 @@ class TensorInplaceVersion { class DenseTensor : public TensorBase, public TypeInfoTraits { public: + using Allocator = deprecated::Allocator; + /// \brief Construct a dense tensor and allocate space. /// \param a The allocator used to allocate space. /// \param meta The meta data of dense tensor. diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h index fc56935eeaf193398cb07df1b64524bc84291524..cf18dd913093a1f9e46ac67d78e03ed9c0a1b78b 100644 --- a/paddle/pten/core/storage.h +++ b/paddle/pten/core/storage.h @@ -91,6 +91,7 @@ class Storage : public intrusive_ref_counter { class TensorStorage : public Storage { public: using Place = paddle::platform::Place; + using Allocator = deprecated::Allocator; explicit TensorStorage(const std::shared_ptr& a) : alloc_(a) {} diff --git a/paddle/pten/tests/core/allocator.h b/paddle/pten/tests/core/allocator.h index 094c0e8437d9811709972b3482703bbefdc4e54e..c2c74e1aacf1f2cb253e434385f8c64aed52d89d 100644 --- a/paddle/pten/tests/core/allocator.h +++ b/paddle/pten/tests/core/allocator.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace pten { namespace tests { -class HostAllocatorSample : public pten::RawAllocator { +class HostAllocatorSample : public pten::deprecated::RawAllocator { public: using Place = paddle::platform::Place; void* Allocate(size_t bytes_size) override { @@ -36,8 +36,9 @@ class HostAllocatorSample : public pten::RawAllocator { Place place_{paddle::platform::CPUPlace()}; }; -class FancyAllocator : public pten::Allocator { +class FancyAllocator : public pten::deprecated::Allocator { public: + using Allocation = pten::deprecated::Allocation; static void Delete(Allocation* allocation) { ::operator delete(allocation->ptr()); } @@ -55,7 +56,7 @@ class FancyAllocator : public pten::Allocator { template struct CustomAllocator { using value_type = T; - using Allocator = pten::RawAllocator; + using Allocator = pten::deprecated::RawAllocator; explicit CustomAllocator(const std::shared_ptr& a) noexcept : alloc_(a) {} diff --git a/paddle/pten/tests/core/test_allocator.cc b/paddle/pten/tests/core/test_allocator.cc index c509d8bd20a01243eda2bb7be13166ca8d540d5b..94ba9a1e1b9a23711caf3d6e40db0cdce00dec93 100644 --- a/paddle/pten/tests/core/test_allocator.cc +++ b/paddle/pten/tests/core/test_allocator.cc @@ -24,6 +24,10 @@ limitations under the License. */ namespace pten { namespace tests { +using RawAllocator = pten::deprecated::RawAllocator; +using Allocator = pten::deprecated::Allocator; +using Allocation = pten::deprecated::Allocation; + template bool host_allocator_test(size_t vector_size) { std::vector src(vector_size); diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index e0ae6008198736aeae3cb184f6007921e2e924d9..caacecf446a82344192a4f7cd2c0e127ea1a94a1 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -226,7 +226,7 @@ if [ "${HAS_MODIFIED_DEMO_CMAKE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then HAS_MODIFIED_ALLOCATION=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/memory/allocation" || true` if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then echo_line="You must be approved by zhiqiu and Shixiaowei02 for paddle/fluid/memory/allocation.\nIt is being modularized and refactored. Thanks!\n" - check_approval 2 6888866 39303645 + check_approval 1 6888866 39303645 fi HAS_MODIFIED_TENSOR=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/framework/tensor" || true` @@ -241,23 +241,6 @@ if [ "${HAS_MODIFIED_TENSOR}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then check_approval 1 22561442 22334008 fi -ALLOCSHARED_FILE_CHANGED=`git diff --name-only --diff-filter=AM upstream/$BRANCH |grep -E "*\.(h|cc)" || true` -if [ "${ALLOCSHARED_FILE_CHANGED}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then - ERROR_LINES="" - for TEST_FILE in ${ALLOCSHARED_FILE_CHANGED}; - do - HAS_SKIP_CHECK_ALLOC_CI=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${TEST_FILE} |grep "AllocShared" || true` - if [ "${HAS_SKIP_CHECK_ALLOC_CI}" != "" ]; then - ERROR_LINES="${ERROR_LINES}\n${TEST_FILE}\n${HAS_SKIP_CHECK_ALLOC_CI}\n" - fi - done - if [ "${ERROR_LINES}" != "" ]; then - ERROR_LINES=${ERROR_LINES//+/'\n+\t'} - echo_line="memory::AllocShared is not recommended, because it is being modularized and refactored. Please use memory::Alloc here. Otherwise, please request zhiqiu and Shixiaowei02 review and approve.\n" - check_approval 2 6888866 39303645 - fi -fi - ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true` if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n"