未验证 提交 277cf900 编写于 作者: 石晓伟 提交者: GitHub

splits allocation for pten, test=develop (#38853)

上级 0efcae86
...@@ -410,8 +410,8 @@ class ExecutionContext { ...@@ -410,8 +410,8 @@ class ExecutionContext {
auto tmp_allocation_ptr = memory::Alloc(dev_ctx, product(dim) * sizeof(T)); auto tmp_allocation_ptr = memory::Alloc(dev_ctx, product(dim) * sizeof(T));
auto& deleter = tmp_allocation_ptr.get_deleter(); auto& deleter = tmp_allocation_ptr.get_deleter();
auto* allocation_ptr = tmp_allocation_ptr.release(); auto* allocation_ptr = tmp_allocation_ptr.release();
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>( auto shared_allocation =
allocation_ptr, deleter); std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
allocation_ptr->size(), framework::product(dim) * sizeof(T), allocation_ptr->size(), framework::product(dim) * sizeof(T),
......
...@@ -17,14 +17,6 @@ limitations under the License. */ ...@@ -17,14 +17,6 @@ limitations under the License. */
DECLARE_bool(use_stream_safe_cuda_allocator); DECLARE_bool(use_stream_safe_cuda_allocator);
namespace paddle {
namespace memory {
namespace allocation {
class Allocation;
} // namespace allocation
} // namespace memory
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -32,14 +32,6 @@ limitations under the License. */ ...@@ -32,14 +32,6 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace paddle {
namespace memory {
namespace allocation {
class Allocation;
} // namespace allocation
} // namespace memory
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -151,8 +151,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, ...@@ -151,8 +151,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
paddle::memory::allocation::AllocatorFacade::Instance() paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place) .GetAllocator(npu_pinned_place)
.get()); .get());
paddle::memory::allocation::Allocation* allocation = pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent( npu_pinned_allocator->RecordEvent(
allocation, allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream()); reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......
...@@ -183,8 +183,7 @@ void TensorFromArray(const T* src, const size_t& array_size, ...@@ -183,8 +183,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
paddle::memory::allocation::AllocatorFacade::Instance() paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place) .GetAllocator(npu_pinned_place)
.get()); .get());
paddle::memory::allocation::Allocation* allocation = pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent( npu_pinned_allocator->RecordEvent(
allocation, allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream()); reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
...@@ -241,8 +240,7 @@ void TensorFromVector(const std::vector<T>& src, ...@@ -241,8 +240,7 @@ void TensorFromVector(const std::vector<T>& src,
paddle::memory::allocation::AllocatorFacade::Instance() paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place) .GetAllocator(npu_pinned_place)
.get()); .get());
paddle::memory::allocation::Allocation* allocation = pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent( npu_pinned_allocator->RecordEvent(
allocation, allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream()); reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
...@@ -312,8 +310,7 @@ inline void TensorFromVector(const std::vector<bool>& src, ...@@ -312,8 +310,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
paddle::memory::allocation::AllocatorFacade::Instance() paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place) .GetAllocator(npu_pinned_place)
.get()); .get());
paddle::memory::allocation::Allocation* allocation = pten::Allocation* allocation = npu_pinned_tensor.Holder().get();
npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent( npu_pinned_allocator->RecordEvent(
allocation, allocation,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream()); reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
......
...@@ -223,9 +223,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, ...@@ -223,9 +223,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
auto t_place = tensor->place(); auto t_place = tensor->place();
paddle::framework::Tensor out; paddle::framework::Tensor out;
auto mem_allocation = std::make_shared<paddle::memory::Allocation>( auto mem_allocation =
static_cast<void *>(data), ele_num * sizeof(T), std::make_shared<paddle::memory::allocation::Allocation>(
paddle::platform::CPUPlace()); static_cast<void *>(data), ele_num * sizeof(T),
paddle::platform::CPUPlace());
out.ResetHolder(mem_allocation); out.ResetHolder(mem_allocation);
if (paddle::platform::is_cpu_place(t_place)) { if (paddle::platform::is_cpu_place(t_place)) {
......
...@@ -257,9 +257,8 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) { ...@@ -257,9 +257,8 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
size_t memory_size = size_t memory_size =
GetLiteTensorNumel(*src) * GetLiteTensorNumel(*src) *
framework::SizeOfType(GetNativePrecisionType(src->precision())); framework::SizeOfType(GetNativePrecisionType(src->precision()));
std::shared_ptr<memory::allocation::Allocation> holder( std::shared_ptr<pten::Allocation> holder(new pten::Allocation(
new memory::allocation::Allocation(src_raw_data, memory_size, src_raw_data, memory_size, GetNativePlace(src->target())));
GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->shape())); dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision())); dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
......
...@@ -23,7 +23,7 @@ namespace allocation { ...@@ -23,7 +23,7 @@ namespace allocation {
// For memory address alignment // For memory address alignment
class AlignedAllocation : public Allocation { class AlignedAllocation : public Allocation {
public: public:
AlignedAllocation(AllocationPtr underlying_allocation, size_t offset) AlignedAllocation(DecoratedAllocationPtr underlying_allocation, size_t offset)
: Allocation( : Allocation(
reinterpret_cast<uint8_t*>(underlying_allocation->ptr()) + offset, reinterpret_cast<uint8_t*>(underlying_allocation->ptr()) + offset,
underlying_allocation->base_ptr(), underlying_allocation->base_ptr(),
...@@ -32,7 +32,7 @@ class AlignedAllocation : public Allocation { ...@@ -32,7 +32,7 @@ class AlignedAllocation : public Allocation {
underlying_allocation_(std::move(underlying_allocation)) {} underlying_allocation_(std::move(underlying_allocation)) {}
private: private:
AllocationPtr underlying_allocation_; DecoratedAllocationPtr underlying_allocation_;
}; };
AlignedAllocator::AlignedAllocator( AlignedAllocator::AlignedAllocator(
...@@ -52,13 +52,17 @@ bool AlignedAllocator::IsAllocThreadSafe() const { ...@@ -52,13 +52,17 @@ bool AlignedAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe(); return underlying_allocator_->IsAllocThreadSafe();
} }
Allocation* AlignedAllocator::AllocateImpl(size_t size) { pten::Allocation* AlignedAllocator::AllocateImpl(size_t size) {
auto raw_allocation = underlying_allocator_->Allocate(size + alignment_); auto raw_allocation = underlying_allocator_->Allocate(size + alignment_);
size_t offset = AlignedPtrOffset(raw_allocation->ptr(), alignment_); size_t offset = AlignedPtrOffset(raw_allocation->ptr(), alignment_);
return new AlignedAllocation(std::move(raw_allocation), offset); auto* p = new AlignedAllocation(
static_unique_ptr_cast<Allocation>(std::move(raw_allocation)), offset);
return p;
} }
void AlignedAllocator::FreeImpl(Allocation* allocation) { delete allocation; } void AlignedAllocator::FreeImpl(pten::Allocation* allocation) {
delete allocation;
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -30,9 +30,9 @@ class AlignedAllocator : public Allocator { ...@@ -30,9 +30,9 @@ class AlignedAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -18,11 +18,10 @@ namespace paddle { ...@@ -18,11 +18,10 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool Allocator::IsAllocThreadSafe() const { return false; } void Allocator::FreeImpl(pten::Allocation* allocation) {
static_cast<Allocation*>(allocation)
void Allocator::FreeImpl(Allocation* allocation) { ->TopDecoratedAllocator()
Allocator* allocator = allocation->TopDecoratedAllocator(); ->Free(allocation);
allocator->Free(allocation);
} }
} // namespace allocation } // namespace allocation
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/allocator.h"
DECLARE_string(allocator_strategy); DECLARE_string(allocator_strategy);
...@@ -80,30 +81,19 @@ class Allocator; ...@@ -80,30 +81,19 @@ class Allocator;
* e.g., something what is done in AlignedAllocator, etc. * e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which * In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator. * contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would * Therefore, `decorated_allocators_` of the new Allocation object
* would
* be a new chain, differing from the underlying Allocation object. * be a new chain, differing from the underlying Allocation object.
*/ */
class Allocation { class Allocation : public pten::Allocation {
public: public:
inline Allocation(void* ptr, size_t size, platform::Place place) Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), base_ptr_(ptr), size_(size), place_(place) {} : pten::Allocation(ptr, size, place), base_ptr_(ptr) {}
inline Allocation(void* ptr, void* base_ptr, size_t size, Allocation(void* ptr, void* base_ptr, size_t size,
platform::Place place) const platform::Place& place)
: ptr_(ptr), base_ptr_(base_ptr), size_(size), place_(place) {} : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
Allocation(const Allocation& o) = delete; void* base_ptr() const {
Allocation& operator=(const Allocation& o) = delete;
Allocation(Allocation&& o) = delete;
Allocation& operator=(Allocation&& o) = delete;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
inline void* ptr() const { return ptr_; }
inline void* base_ptr() const {
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth", PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented( paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth " "base_ptr() is only implemented for auto_growth "
...@@ -112,21 +102,6 @@ class Allocation { ...@@ -112,21 +102,6 @@ class Allocation {
return base_ptr_; return base_ptr_;
} }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
inline size_t size() const { return size_; }
inline const platform::Place& place() const { return place_; }
virtual ~Allocation() {}
private: private:
inline void RegisterDecoratedAllocator(Allocator* allocator) { inline void RegisterDecoratedAllocator(Allocator* allocator) {
decorated_allocators_.emplace_back(allocator); decorated_allocators_.emplace_back(allocator);
...@@ -139,10 +114,7 @@ class Allocation { ...@@ -139,10 +114,7 @@ class Allocation {
} }
private: private:
void* ptr_;
void* base_ptr_; // the point that directly requested from system void* base_ptr_; // the point that directly requested from system
size_t size_;
platform::Place place_;
/** /**
* NOTE(zjl): Since decorated_allocators_ is usually a small vector. * NOTE(zjl): Since decorated_allocators_ is usually a small vector.
...@@ -162,53 +134,42 @@ class Allocation { ...@@ -162,53 +134,42 @@ class Allocation {
friend class Allocator; friend class Allocator;
}; };
using AllocationPtr = pten::Allocator::AllocationPtr;
using DecoratedAllocationPtr =
std::unique_ptr<Allocation, pten::Allocator::DeleterType>;
// Base interface class of memory Allocator. // Base interface class of memory Allocator.
class Allocator { class Allocator : public pten::Allocator {
public: public:
virtual ~Allocator() {} static void AllocationDeleter(pten::Allocation* allocation) {
Allocator* allocator =
class AllocationDeleter { static_cast<Allocation*>(allocation)->TopDecoratedAllocator();
public: allocator->Free(allocation);
inline void operator()(Allocation* allocation) const { }
Allocator* allocator = allocation->TopDecoratedAllocator();
allocator->Free(allocation);
}
};
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
// Allocate an allocation. // Allocate an allocation.
// size may be 0, but it would be too complex if we handle size == 0 // size may be 0, but it would be too complex if we handle size == 0
// in each Allocator. So we handle size == 0 inside AllocatorFacade // in each Allocator. So we handle size == 0 inside AllocatorFacade
// in our design. // in our design.
inline AllocationPtr Allocate(size_t size) { AllocationPtr Allocate(size_t size) override {
auto ptr = AllocateImpl(size); auto ptr = AllocateImpl(size);
ptr->RegisterDecoratedAllocator(this); static_cast<Allocation*>(ptr)->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr); return AllocationPtr(ptr, AllocationDeleter);
} }
// This function should not be called outside Allocator class void Free(pten::Allocation* allocation) {
inline void Free(Allocation* allocation) { static_cast<Allocation*>(allocation)->PopDecoratedAllocator();
allocation->PopDecoratedAllocator();
FreeImpl(allocation); FreeImpl(allocation);
} }
inline uint64_t Release(const platform::Place& place) { uint64_t Release(const platform::Place& place) { return ReleaseImpl(place); }
return ReleaseImpl(place);
}
// True if the `Allocate` is thread safe.
virtual bool IsAllocThreadSafe() const;
protected: protected:
virtual Allocation* AllocateImpl(size_t size) = 0; virtual pten::Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(Allocation* allocation); virtual void FreeImpl(pten::Allocation* allocation);
virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; } virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; }
}; };
using AllocationDeleter = Allocator::AllocationDeleter;
using AllocationPtr = Allocator::AllocationPtr;
inline size_t AlignedSize(size_t size, size_t alignment) { inline size_t AlignedSize(size_t size, size_t alignment) {
auto remaining = size % alignment; auto remaining = size % alignment;
return remaining == 0 ? size : size + alignment - remaining; return remaining == 0 ? size : size + alignment - remaining;
...@@ -220,6 +181,14 @@ inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) { ...@@ -220,6 +181,14 @@ inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) {
return diff == 0 ? 0 : alignment - diff; return diff == 0 ? 0 : alignment - diff;
} }
template <typename Derived, typename Base, typename BaseDel>
decltype(auto) static_unique_ptr_cast(std::unique_ptr<Base, BaseDel>&& p) {
static_assert(std::is_base_of<Base, Derived>::value,
"Derived type must derive from Base.");
auto d = static_cast<Derived*>(p.release());
return std::unique_ptr<Derived, BaseDel>(d, p.get_deleter());
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -94,7 +94,7 @@ class CUDAGraphAllocator ...@@ -94,7 +94,7 @@ class CUDAGraphAllocator
class PrivateAllocation : public Allocation { class PrivateAllocation : public Allocation {
public: public:
PrivateAllocation(CUDAGraphAllocator* allocator, PrivateAllocation(CUDAGraphAllocator* allocator,
AllocationPtr underlying_allocation) DecoratedAllocationPtr underlying_allocation)
: Allocation( : Allocation(
underlying_allocation->ptr(), underlying_allocation->base_ptr(), underlying_allocation->ptr(), underlying_allocation->base_ptr(),
underlying_allocation->size(), underlying_allocation->place()), underlying_allocation->size(), underlying_allocation->place()),
...@@ -103,7 +103,7 @@ class CUDAGraphAllocator ...@@ -103,7 +103,7 @@ class CUDAGraphAllocator
private: private:
std::shared_ptr<Allocator> allocator_; std::shared_ptr<Allocator> allocator_;
AllocationPtr underlying_allocation_; DecoratedAllocationPtr underlying_allocation_;
}; };
explicit CUDAGraphAllocator(const std::shared_ptr<Allocator>& allocator) explicit CUDAGraphAllocator(const std::shared_ptr<Allocator>& allocator)
...@@ -116,12 +116,14 @@ class CUDAGraphAllocator ...@@ -116,12 +116,14 @@ class CUDAGraphAllocator
} }
protected: protected:
Allocation* AllocateImpl(size_t size) { pten::Allocation* AllocateImpl(size_t size) {
VLOG(10) << "Allocate " << size << " for CUDA Graph"; VLOG(10) << "Allocate " << size << " for CUDA Graph";
return new PrivateAllocation(this, underlying_allocator_->Allocate(size)); return new PrivateAllocation(this,
static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(size)));
} }
void FreeImpl(Allocation* allocation) { void FreeImpl(pten::Allocation* allocation) {
VLOG(10) << "delete for CUDA Graph"; VLOG(10) << "delete for CUDA Graph";
delete allocation; delete allocation;
} }
...@@ -322,7 +324,7 @@ class AllocatorFacadePrivate { ...@@ -322,7 +324,7 @@ class AllocatorFacadePrivate {
return static_cast<platform::CUDADeviceContext*>(pool.Get(place))->stream(); return static_cast<platform::CUDADeviceContext*>(pool.Get(place))->stream();
} }
void RecordStream(std::shared_ptr<Allocation> allocation, void RecordStream(std::shared_ptr<pten::Allocation> allocation,
const gpuStream_t& stream) { const gpuStream_t& stream) {
if (allocation->size() == 0) { if (allocation->size() == 0) {
return; return;
...@@ -339,7 +341,7 @@ class AllocatorFacadePrivate { ...@@ -339,7 +341,7 @@ class AllocatorFacadePrivate {
} }
const gpuStream_t& GetStream( const gpuStream_t& GetStream(
const std::shared_ptr<Allocation>& allocation) const { const std::shared_ptr<pten::Allocation>& allocation) const {
const StreamSafeCUDAAllocation* stream_safe_cuda_allocation = const StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<const StreamSafeCUDAAllocation*>(allocation.get()); dynamic_cast<const StreamSafeCUDAAllocation*>(allocation.get());
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
...@@ -391,10 +393,10 @@ class AllocatorFacadePrivate { ...@@ -391,10 +393,10 @@ class AllocatorFacadePrivate {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation* AllocateImpl(size_t size) override { pten::Allocation* AllocateImpl(size_t size) override {
return new Allocation(nullptr, 0, place_); return new Allocation(nullptr, 0, place_);
} }
void FreeImpl(Allocation* allocation) override { delete allocation; } void FreeImpl(pten::Allocation* allocation) override { delete allocation; }
private: private:
platform::Place place_; platform::Place place_;
...@@ -820,9 +822,9 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator( ...@@ -820,9 +822,9 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
} }
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) { const platform::Place& place, size_t size) {
return std::shared_ptr<Allocation>(Alloc(place, size)); return std::shared_ptr<pten::Allocation>(Alloc(place, size));
} }
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
...@@ -866,7 +868,7 @@ uint64_t AllocatorFacade::Release(const platform::Place& place) { ...@@ -866,7 +868,7 @@ uint64_t AllocatorFacade::Release(const platform::Place& place) {
->Release(place); ->Release(place);
} }
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, const platform::Stream& stream) { const platform::Place& place, size_t size, const platform::Stream& stream) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -884,14 +886,14 @@ std::shared_ptr<Allocation> AllocatorFacade::AllocShared( ...@@ -884,14 +886,14 @@ std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
} }
#endif #endif
gpuStream_t s = reinterpret_cast<gpuStream_t>(stream.id()); gpuStream_t s = reinterpret_cast<gpuStream_t>(stream.id());
return std::shared_ptr<Allocation>(Alloc(place, size, s)); return std::shared_ptr<pten::Allocation>(Alloc(place, size, s));
#else #else
PADDLE_THROW(platform::errors::PreconditionNotMet("Not compiled with GPU.")); PADDLE_THROW(platform::errors::PreconditionNotMet("Not compiled with GPU."));
#endif #endif
} }
bool AllocatorFacade::InSameStream( bool AllocatorFacade::InSameStream(
const std::shared_ptr<Allocation>& allocation, const std::shared_ptr<pten::Allocation>& allocation,
const platform::Stream& stream) { const platform::Stream& stream) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -962,7 +964,7 @@ uint64_t AllocatorFacade::Release(const platform::CUDAPlace& place, ...@@ -962,7 +964,7 @@ uint64_t AllocatorFacade::Release(const platform::CUDAPlace& place,
return m_->GetAllocator(place, stream)->Release(place); return m_->GetAllocator(place, stream)->Release(place);
} }
void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation, void AllocatorFacade::RecordStream(std::shared_ptr<pten::Allocation> allocation,
const gpuStream_t& stream) { const gpuStream_t& stream) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
FLAGS_use_stream_safe_cuda_allocator, true, FLAGS_use_stream_safe_cuda_allocator, true,
...@@ -983,7 +985,7 @@ void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation, ...@@ -983,7 +985,7 @@ void AllocatorFacade::RecordStream(std::shared_ptr<Allocation> allocation,
} }
const gpuStream_t& AllocatorFacade::GetStream( const gpuStream_t& AllocatorFacade::GetStream(
const std::shared_ptr<Allocation>& allocation) const { const std::shared_ptr<pten::Allocation>& allocation) const {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
FLAGS_use_stream_safe_cuda_allocator, true, FLAGS_use_stream_safe_cuda_allocator, true,
platform::errors::Unimplemented( platform::errors::Unimplemented(
......
...@@ -42,6 +42,7 @@ using NPUPinnedAllocator = paddle::memory::allocation::NPUPinnedAllocator; ...@@ -42,6 +42,7 @@ using NPUPinnedAllocator = paddle::memory::allocation::NPUPinnedAllocator;
class AllocatorFacadePrivate; class AllocatorFacadePrivate;
class AllocatorFacade { class AllocatorFacade {
public: public:
using Allocation = pten::Allocation;
AllocatorFacade(const AllocatorFacade& o) = delete; AllocatorFacade(const AllocatorFacade& o) = delete;
const AllocatorFacade& operator=(const AllocatorFacade& o) = delete; const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
~AllocatorFacade(); ~AllocatorFacade();
......
...@@ -45,7 +45,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( ...@@ -45,7 +45,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)), chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)),
allow_free_idle_chunk_(allow_free_idle_chunk) {} allow_free_idle_chunk_(allow_free_idle_chunk) {}
Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { pten::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
size_t unaligned_size) {
size_t size = AlignedSize(unaligned_size, alignment_); size_t size = AlignedSize(unaligned_size, alignment_);
VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size; VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size;
...@@ -78,11 +79,13 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { ...@@ -78,11 +79,13 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
size_t realloc_size = std::max(size, chunk_size_); size_t realloc_size = std::max(size, chunk_size_);
try { try {
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(realloc_size)));
} catch (BadAlloc &ex) { } catch (BadAlloc &ex) {
if (FLAGS_free_when_no_cache_hit) throw ex; if (FLAGS_free_when_no_cache_hit) throw ex;
FreeIdleChunks(); FreeIdleChunks();
chunks_.emplace_back(underlying_allocator_->Allocate(realloc_size)); chunks_.emplace_back(static_unique_ptr_cast<Allocation>(
underlying_allocator_->Allocate(realloc_size)));
} }
auto *chunk = &(*chunks_.rbegin()); auto *chunk = &(*chunks_.rbegin());
...@@ -104,7 +107,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) { ...@@ -104,7 +107,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t unaligned_size) {
return new BlockAllocation(block_it); return new BlockAllocation(block_it);
} }
void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { void AutoGrowthBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
VLOG(10) << "Free " << allocation->size() VLOG(10) << "Free " << allocation->size()
<< " bytes, ptr = " << allocation->ptr(); << " bytes, ptr = " << allocation->ptr();
std::lock_guard<SpinLock> guard(spinlock_); std::lock_guard<SpinLock> guard(spinlock_);
......
...@@ -36,9 +36,9 @@ class AutoGrowthBestFitAllocator : public Allocator { ...@@ -36,9 +36,9 @@ class AutoGrowthBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
// Release the memory block which is not used in pool. // Release the memory block which is not used in pool.
uint64_t ReleaseImpl(const platform::Place &place) override { uint64_t ReleaseImpl(const platform::Place &place) override {
...@@ -64,10 +64,10 @@ class AutoGrowthBestFitAllocator : public Allocator { ...@@ -64,10 +64,10 @@ class AutoGrowthBestFitAllocator : public Allocator {
}; };
struct Chunk { struct Chunk {
explicit Chunk(AllocationPtr allocation) explicit Chunk(DecoratedAllocationPtr allocation)
: allocation_(std::move(allocation)) {} : allocation_(std::move(allocation)) {}
AllocationPtr allocation_; DecoratedAllocationPtr allocation_;
List<Block> blocks_; List<Block> blocks_;
}; };
......
...@@ -28,12 +28,12 @@ namespace allocation { ...@@ -28,12 +28,12 @@ namespace allocation {
class RecordedAllocator : public Allocator { class RecordedAllocator : public Allocator {
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
allocated_size_ += size; allocated_size_ += size;
return new Allocation(malloc(size), size, platform::CPUPlace()); return new Allocation(malloc(size), size, platform::CPUPlace());
} }
void FreeImpl(Allocation *allocation) { void FreeImpl(pten::Allocation *allocation) {
allocated_size_ -= allocation->size(); allocated_size_ -= allocation->size();
free(allocation->ptr()); free(allocation->ptr());
delete allocation; delete allocation;
...@@ -79,7 +79,7 @@ class LimitedResourceAllocator : public Allocator { ...@@ -79,7 +79,7 @@ class LimitedResourceAllocator : public Allocator {
size_t AllocatedSize() const { return allocated_size_; } size_t AllocatedSize() const { return allocated_size_; }
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
if (allocated_size_ + size > capacity_) { if (allocated_size_ + size > capacity_) {
throw BadAlloc("", __FILE__, __LINE__); throw BadAlloc("", __FILE__, __LINE__);
} }
...@@ -88,7 +88,7 @@ class LimitedResourceAllocator : public Allocator { ...@@ -88,7 +88,7 @@ class LimitedResourceAllocator : public Allocator {
return new Allocation(malloc(size), size, platform::CPUPlace()); return new Allocation(malloc(size), size, platform::CPUPlace());
} }
void FreeImpl(Allocation *allocation) { void FreeImpl(pten::Allocation *allocation) {
allocated_size_ -= allocation->size(); allocated_size_ -= allocation->size();
free(allocation->ptr()); free(allocation->ptr());
delete allocation; delete allocation;
......
...@@ -37,7 +37,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -37,7 +37,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr(); void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
...@@ -56,7 +56,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -56,7 +56,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr(); void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
...@@ -77,7 +77,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -77,7 +77,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); AllocationPtr allocation = Alloc(place_, size);
void* base_ptr = allocation->base_ptr(); void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
...@@ -91,7 +91,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -91,7 +91,7 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void ZeroSizeAllocTest() { void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0); AllocationPtr allocation = Alloc(place_, 0);
void* base_ptr = allocation->base_ptr(); void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
......
...@@ -33,7 +33,7 @@ static int HighestBitPos(size_t N) { ...@@ -33,7 +33,7 @@ static int HighestBitPos(size_t N) {
} }
} }
BestFitAllocator::BestFitAllocator(Allocation* allocation) BestFitAllocator::BestFitAllocator(pten::Allocation* allocation)
: allocation_(allocation) { : allocation_(allocation) {
details::Chunk chunk; details::Chunk chunk;
chunk.size_ = allocation_->size(); chunk.size_ = allocation_->size();
...@@ -115,7 +115,7 @@ size_t BestFitAllocator::NumFreeChunks() const { ...@@ -115,7 +115,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
} }
return num; return num;
} }
void BestFitAllocator::FreeImpl(Allocation* allocation) { void BestFitAllocator::FreeImpl(pten::Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
bf_allocation, bf_allocation,
...@@ -150,7 +150,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) { ...@@ -150,7 +150,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) {
InsertFreeNode(chunk_it); InsertFreeNode(chunk_it);
delete allocation; delete allocation;
} }
Allocation* BestFitAllocator::AllocateImpl(size_t size) { pten::Allocation* BestFitAllocator::AllocateImpl(size_t size) {
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size)); auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
MapIt map_it; MapIt map_it;
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) { for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
......
...@@ -108,7 +108,7 @@ class BestFitAllocation : public Allocation { ...@@ -108,7 +108,7 @@ class BestFitAllocation : public Allocation {
// the prev-chunk and the next-chunk when possible. // the prev-chunk and the next-chunk when possible.
class BestFitAllocator : public Allocator { class BestFitAllocator : public Allocator {
public: public:
explicit BestFitAllocator(Allocation* allocation); explicit BestFitAllocator(pten::Allocation* allocation);
void* BasePtr() const { return allocation_->ptr(); } void* BasePtr() const { return allocation_->ptr(); }
...@@ -127,11 +127,11 @@ class BestFitAllocator : public Allocator { ...@@ -127,11 +127,11 @@ class BestFitAllocator : public Allocator {
void InsertFreeNode(const ListIt& it); void InsertFreeNode(const ListIt& it);
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
private: private:
Allocation* allocation_; // not owned pten::Allocation* allocation_; // not owned
details::ChunkList chunks_; details::ChunkList chunks_;
details::FreeChunkBin free_chunks_; details::FreeChunkBin free_chunks_;
}; };
......
...@@ -46,12 +46,13 @@ void BufferedAllocator::FreeCache(size_t size) { ...@@ -46,12 +46,13 @@ void BufferedAllocator::FreeCache(size_t size) {
bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; } bool BufferedAllocator::IsAllocThreadSafe() const { return mtx_ != nullptr; }
void BufferedAllocator::FreeImpl(Allocation *allocation) { void BufferedAllocator::FreeImpl(pten::Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(),
AllocationPtr(allocation, Allocator::AllocationDeleter));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size) { pten::Allocation *BufferedAllocator::AllocateImpl(size_t size) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
auto it = allocations_.lower_bound(size); auto it = allocations_.lower_bound(size);
......
...@@ -45,8 +45,8 @@ class BufferedAllocator : public Allocator { ...@@ -45,8 +45,8 @@ class BufferedAllocator : public Allocator {
void FreeCache(size_t size); void FreeCache(size_t size);
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -27,7 +27,7 @@ namespace memory { ...@@ -27,7 +27,7 @@ namespace memory {
namespace allocation { namespace allocation {
inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator( inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
Allocation *allocation, bool thread_safe) { pten::Allocation *allocation, bool thread_safe) {
std::unique_ptr<Allocator> allocator(new BestFitAllocator(allocation)); std::unique_ptr<Allocator> allocator(new BestFitAllocator(allocation));
if (thread_safe) { if (thread_safe) {
allocator.reset(new LockedAllocator(std::move(allocator))); allocator.reset(new LockedAllocator(std::move(allocator)));
...@@ -68,7 +68,7 @@ class StubAllocator : public Allocator { ...@@ -68,7 +68,7 @@ class StubAllocator : public Allocator {
size_t GetFreeCount() const { return destruct_count_; } size_t GetFreeCount() const { return destruct_count_; }
protected: protected:
void FreeImpl(Allocation *allocation) override { void FreeImpl(pten::Allocation *allocation) override {
auto *alloc = dynamic_cast<StubAllocation *>(allocation); auto *alloc = dynamic_cast<StubAllocation *>(allocation);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
alloc, platform::errors::InvalidArgument( alloc, platform::errors::InvalidArgument(
...@@ -77,7 +77,7 @@ class StubAllocator : public Allocator { ...@@ -77,7 +77,7 @@ class StubAllocator : public Allocator {
++destruct_count_; ++destruct_count_;
delete allocation; delete allocation;
} }
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
++construct_count_; ++construct_count_;
if (size == 0) { if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace()); return new StubAllocation(nullptr, 0, platform::CPUPlace());
......
...@@ -24,7 +24,7 @@ namespace allocation { ...@@ -24,7 +24,7 @@ namespace allocation {
bool CPUAllocator::IsAllocThreadSafe() const { return true; } bool CPUAllocator::IsAllocThreadSafe() const { return true; }
void CPUAllocator::FreeImpl(Allocation *allocation) { void CPUAllocator::FreeImpl(pten::Allocation *allocation) {
void *p = allocation->ptr(); void *p = allocation->ptr();
#ifdef _WIN32 #ifdef _WIN32
_aligned_free(p); _aligned_free(p);
...@@ -34,7 +34,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) { ...@@ -34,7 +34,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
delete allocation; delete allocation;
} }
Allocation *CPUAllocator::AllocateImpl(size_t size) { pten::Allocation *CPUAllocator::AllocateImpl(size_t size) {
void *p; void *p;
#ifdef _WIN32 #ifdef _WIN32
p = _aligned_malloc(size, kAlignment); p = _aligned_malloc(size, kAlignment);
......
...@@ -37,8 +37,8 @@ class CPUAllocator : public Allocator { ...@@ -37,8 +37,8 @@ class CPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
}; };
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -32,7 +32,7 @@ namespace paddle { ...@@ -32,7 +32,7 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CUDAAllocator::IsAllocThreadSafe() const { return true; } bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
void CUDAAllocator::FreeImpl(Allocation* allocation) { void CUDAAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_, BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
platform::errors::PermissionDenied( platform::errors::PermissionDenied(
...@@ -42,7 +42,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) { ...@@ -42,7 +42,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size) { pten::Allocation* CUDAAllocator::AllocateImpl(size_t size) {
std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); }); std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
void* ptr; void* ptr;
......
...@@ -28,8 +28,8 @@ class CUDAAllocator : public Allocator { ...@@ -28,8 +28,8 @@ class CUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
private: private:
platform::CUDAPlace place_; platform::CUDAPlace place_;
......
...@@ -41,7 +41,7 @@ namespace allocation { ...@@ -41,7 +41,7 @@ namespace allocation {
*/ */
class CUDADeviceContextAllocation : public Allocation { class CUDADeviceContextAllocation : public Allocation {
public: public:
explicit CUDADeviceContextAllocation(AllocationPtr allocation) explicit CUDADeviceContextAllocation(DecoratedAllocationPtr allocation)
: Allocation(allocation->ptr(), allocation->base_ptr(), : Allocation(allocation->ptr(), allocation->base_ptr(),
allocation->size(), allocation->place()), allocation->size(), allocation->place()),
underlying_allocation_(std::move(allocation)) {} underlying_allocation_(std::move(allocation)) {}
...@@ -56,7 +56,7 @@ class CUDADeviceContextAllocation : public Allocation { ...@@ -56,7 +56,7 @@ class CUDADeviceContextAllocation : public Allocation {
<< p_allocation; << p_allocation;
dev_ctx_->AddStreamCallback([p_allocation] { dev_ctx_->AddStreamCallback([p_allocation] {
VLOG(4) << "Delete CUDADeviceContextAllocation at " << p_allocation; VLOG(4) << "Delete CUDADeviceContextAllocation at " << p_allocation;
AllocationDeleter()(p_allocation); Allocator::AllocationDeleter(p_allocation);
}); });
} }
...@@ -65,7 +65,7 @@ class CUDADeviceContextAllocation : public Allocation { ...@@ -65,7 +65,7 @@ class CUDADeviceContextAllocation : public Allocation {
} }
private: private:
AllocationPtr underlying_allocation_; DecoratedAllocationPtr underlying_allocation_;
const platform::CUDADeviceContext *dev_ctx_{nullptr}; const platform::CUDADeviceContext *dev_ctx_{nullptr};
}; };
...@@ -102,14 +102,14 @@ class CUDADeviceContextAllocator : public Allocator { ...@@ -102,14 +102,14 @@ class CUDADeviceContextAllocator : public Allocator {
} }
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
default_stream_, default_stream_,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"Default stream is not set for CUDADeviceContextAllocator")); "Default stream is not set for CUDADeviceContextAllocator"));
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
auto allocation = auto allocation = new CUDADeviceContextAllocation(
new CUDADeviceContextAllocation(memory::Alloc(place_, size)); static_unique_ptr_cast<Allocation>(memory::Alloc(place_, size)));
// Wait for the event on stream // Wait for the event on stream
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, default_stream_)); PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, default_stream_));
...@@ -121,7 +121,7 @@ class CUDADeviceContextAllocator : public Allocator { ...@@ -121,7 +121,7 @@ class CUDADeviceContextAllocator : public Allocator {
return allocation; return allocation;
} }
void FreeImpl(Allocation *allocation) override { delete allocation; } void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
private: private:
platform::CUDAPlace place_; platform::CUDAPlace place_;
......
...@@ -101,7 +101,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator( ...@@ -101,7 +101,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator(
bool CUDAVirtualMemAllocator::IsAllocThreadSafe() const { return false; } bool CUDAVirtualMemAllocator::IsAllocThreadSafe() const { return false; }
void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) { void CUDAVirtualMemAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_, BOOST_GET_CONST(platform::CUDAPlace, allocation->place()), place_,
platform::errors::PermissionDenied( platform::errors::PermissionDenied(
...@@ -140,7 +140,7 @@ void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) { ...@@ -140,7 +140,7 @@ void CUDAVirtualMemAllocator::FreeImpl(Allocation* allocation) {
delete allocation; delete allocation;
} }
Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) { pten::Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) {
size = AlignedSize(size, granularity_); size = AlignedSize(size, granularity_);
CUdeviceptr ptr = virtual_mem_base_ + virtual_mem_alloced_offset_; CUdeviceptr ptr = virtual_mem_base_ + virtual_mem_alloced_offset_;
......
...@@ -37,8 +37,8 @@ class CUDAVirtualMemAllocator : public Allocator { ...@@ -37,8 +37,8 @@ class CUDAVirtualMemAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
private: private:
platform::CUDAPlace place_; platform::CUDAPlace place_;
......
...@@ -37,12 +37,12 @@ LockedAllocator::LockedAllocator( ...@@ -37,12 +37,12 @@ LockedAllocator::LockedAllocator(
} }
} }
void LockedAllocator::FreeImpl(Allocation *allocation) { void LockedAllocator::FreeImpl(pten::Allocation *allocation) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
underlying_allocator_->Free(allocation); underlying_allocator_->Free(allocation);
} }
Allocation *LockedAllocator::AllocateImpl(size_t size) { pten::Allocation *LockedAllocator::AllocateImpl(size_t size) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return underlying_allocator_->Allocate(size).release(); return underlying_allocator_->Allocate(size).release();
} }
......
...@@ -29,8 +29,8 @@ class LockedAllocator : public Allocator { ...@@ -29,8 +29,8 @@ class LockedAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -790,7 +790,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const { ...@@ -790,7 +790,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
namespace allocation { namespace allocation {
Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) { pten::Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_); void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
auto *tmp_alloc = new Allocation(ptr, size, place_); auto *tmp_alloc = new Allocation(ptr, size, place_);
platform::MemEvenRecorder::Instance().PushMemRecord( platform::MemEvenRecorder::Instance().PushMemRecord(
...@@ -798,7 +798,7 @@ Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) { ...@@ -798,7 +798,7 @@ Allocation *NaiveBestFitAllocator::AllocateImpl(size_t size) {
return tmp_alloc; return tmp_alloc;
} }
void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) { void NaiveBestFitAllocator::FreeImpl(pten::Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
......
...@@ -34,8 +34,8 @@ class NaiveBestFitAllocator : public Allocator { ...@@ -34,8 +34,8 @@ class NaiveBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override; uint64_t ReleaseImpl(const platform::Place &place) override;
private: private:
......
...@@ -22,7 +22,7 @@ namespace memory { ...@@ -22,7 +22,7 @@ namespace memory {
namespace allocation { namespace allocation {
bool NPUAllocator::IsAllocThreadSafe() const { return true; } bool NPUAllocator::IsAllocThreadSafe() const { return true; }
void NPUAllocator::FreeImpl(Allocation* allocation) { void NPUAllocator::FreeImpl(pten::Allocation* allocation) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
BOOST_GET_CONST(platform::NPUPlace, allocation->place()), place_, BOOST_GET_CONST(platform::NPUPlace, allocation->place()), place_,
platform::errors::PermissionDenied( platform::errors::PermissionDenied(
...@@ -32,7 +32,7 @@ void NPUAllocator::FreeImpl(Allocation* allocation) { ...@@ -32,7 +32,7 @@ void NPUAllocator::FreeImpl(Allocation* allocation) {
delete allocation; delete allocation;
} }
Allocation* NPUAllocator::AllocateImpl(size_t size) { pten::Allocation* NPUAllocator::AllocateImpl(size_t size) {
std::call_once(once_flag_, std::call_once(once_flag_,
[this] { platform::SetNPUDeviceId(place_.device); }); [this] { platform::SetNPUDeviceId(place_.device); });
......
...@@ -28,8 +28,8 @@ class NPUAllocator : public Allocator { ...@@ -28,8 +28,8 @@ class NPUAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
private: private:
platform::NPUPlace place_; platform::NPUPlace place_;
......
...@@ -26,7 +26,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() { ...@@ -26,7 +26,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
platform::NPUEventQuery(event, &status); platform::NPUEventQuery(event, &status);
if (status == ACL_EVENT_STATUS_COMPLETE) { if (status == ACL_EVENT_STATUS_COMPLETE) {
Allocation *allocation = it->first; auto *allocation = it->first;
void *ptr = allocation->ptr(); void *ptr = allocation->ptr();
free(ptr); free(ptr);
npu_events_.erase(it++); npu_events_.erase(it++);
...@@ -38,7 +38,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() { ...@@ -38,7 +38,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
} }
} }
Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) { pten::Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
std::lock_guard<std::mutex> lock(mtx_); std::lock_guard<std::mutex> lock(mtx_);
ProcessEventsAndFree(); ProcessEventsAndFree();
void *ptr; void *ptr;
...@@ -50,7 +50,7 @@ Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) { ...@@ -50,7 +50,7 @@ Allocation *NPUPinnedAllocator::AllocateImpl(size_t size) {
return new Allocation(ptr, size, platform::NPUPinnedPlace()); return new Allocation(ptr, size, platform::NPUPinnedPlace());
} }
void NPUPinnedAllocator::FreeImpl(Allocation *allocation) { void NPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
std::lock_guard<std::mutex> lock(mtx_); std::lock_guard<std::mutex> lock(mtx_);
void *ptr = allocation->ptr(); void *ptr = allocation->ptr();
auto iter = npu_events_.find(allocation); auto iter = npu_events_.find(allocation);
...@@ -83,7 +83,7 @@ uint64_t NPUPinnedAllocator::ReleaseImpl(const platform::Place &place) { ...@@ -83,7 +83,7 @@ uint64_t NPUPinnedAllocator::ReleaseImpl(const platform::Place &place) {
return static_cast<uint64_t>(0); return static_cast<uint64_t>(0);
} }
void NPUPinnedAllocator::RecordEvent(Allocation *allocation, void NPUPinnedAllocator::RecordEvent(pten::Allocation *allocation,
aclrtStream stream) { aclrtStream stream) {
std::lock_guard<std::mutex> lock(mtx_); std::lock_guard<std::mutex> lock(mtx_);
aclrtEvent event = nullptr; aclrtEvent event = nullptr;
......
...@@ -32,16 +32,16 @@ class NPUPinnedAllocator : public Allocator { ...@@ -32,16 +32,16 @@ class NPUPinnedAllocator : public Allocator {
public: public:
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
void ProcessEventsAndFree(); void ProcessEventsAndFree();
void RecordEvent(Allocation *allocation, aclrtStream stream); void RecordEvent(pten::Allocation *allocation, aclrtStream stream);
constexpr static size_t kAlignment = 4096UL; constexpr static size_t kAlignment = 4096UL;
protected: protected:
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override; uint64_t ReleaseImpl(const platform::Place &place) override;
private: private:
std::unordered_map<Allocation *, aclrtEvent> npu_events_; std::unordered_map<pten::Allocation *, aclrtEvent> npu_events_;
mutable std::mutex mtx_; mutable std::mutex mtx_;
}; };
......
...@@ -18,7 +18,7 @@ namespace paddle { ...@@ -18,7 +18,7 @@ namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { void CPUPinnedAllocator::FreeImpl(pten::Allocation *allocation) {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipHostFree(allocation->ptr())); PADDLE_ENFORCE_GPU_SUCCESS(hipHostFree(allocation->ptr()));
#else #else
...@@ -26,7 +26,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { ...@@ -26,7 +26,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
#endif #endif
delete allocation; delete allocation;
} }
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) { pten::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
void *ptr; void *ptr;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipHostMalloc(&ptr, size, hipHostMallocPortable)); PADDLE_ENFORCE_GPU_SUCCESS(hipHostMalloc(&ptr, size, hipHostMallocPortable));
......
...@@ -25,8 +25,8 @@ class CPUPinnedAllocator : public Allocator { ...@@ -25,8 +25,8 @@ class CPUPinnedAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -39,7 +39,7 @@ class WaitedAllocateSizeGuard { ...@@ -39,7 +39,7 @@ class WaitedAllocateSizeGuard {
size_t requested_size_; size_t requested_size_;
}; };
void RetryAllocator::FreeImpl(Allocation* allocation) { void RetryAllocator::FreeImpl(pten::Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
size_t size = allocation->size(); size_t size = allocation->size();
underlying_allocator_->Free(allocation); underlying_allocator_->Free(allocation);
...@@ -51,7 +51,7 @@ void RetryAllocator::FreeImpl(Allocation* allocation) { ...@@ -51,7 +51,7 @@ void RetryAllocator::FreeImpl(Allocation* allocation) {
} }
} }
Allocation* RetryAllocator::AllocateImpl(size_t size) { pten::Allocation* RetryAllocator::AllocateImpl(size_t size) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return underlying_allocator_->Allocate(size).release(); return underlying_allocator_->Allocate(size).release();
}; };
......
...@@ -45,8 +45,8 @@ class RetryAllocator : public Allocator { ...@@ -45,8 +45,8 @@ class RetryAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(pten::Allocation* allocation) override;
Allocation* AllocateImpl(size_t size) override; pten::Allocation* AllocateImpl(size_t size) override;
uint64_t ReleaseImpl(const platform::Place& place) override { uint64_t ReleaseImpl(const platform::Place& place) override {
return underlying_allocator_->Release(place); return underlying_allocator_->Release(place);
} }
......
...@@ -98,12 +98,12 @@ class DummyAllocator : public Allocator { ...@@ -98,12 +98,12 @@ class DummyAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted( PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
"Here is a test exception, always BadAlloc.")); "Here is a test exception, always BadAlloc."));
} }
void FreeImpl(Allocation *) override {} void FreeImpl(pten::Allocation *) override {}
}; };
TEST(RetryAllocator, RetryAllocatorLastAllocFailure) { TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
......
...@@ -19,7 +19,7 @@ namespace memory { ...@@ -19,7 +19,7 @@ namespace memory {
namespace allocation { namespace allocation {
StreamSafeCUDAAllocation::StreamSafeCUDAAllocation( StreamSafeCUDAAllocation::StreamSafeCUDAAllocation(
AllocationPtr underlying_allocation, gpuStream_t owning_stream) DecoratedAllocationPtr underlying_allocation, gpuStream_t owning_stream)
: Allocation(underlying_allocation->ptr(), : Allocation(underlying_allocation->ptr(),
underlying_allocation->base_ptr(), underlying_allocation->base_ptr(),
underlying_allocation->size(), underlying_allocation->place()), underlying_allocation->size(), underlying_allocation->place()),
...@@ -116,7 +116,7 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() { ...@@ -116,7 +116,7 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() {
bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; } bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; }
Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { pten::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
ProcessUnfreedAllocations(); ProcessUnfreedAllocations();
VLOG(8) << "Try allocate " << size << " bytes"; VLOG(8) << "Try allocate " << size << " bytes";
AllocationPtr underlying_allocation; AllocationPtr underlying_allocation;
...@@ -136,13 +136,14 @@ Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { ...@@ -136,13 +136,14 @@ Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
throw; throw;
} }
StreamSafeCUDAAllocation* allocation = new StreamSafeCUDAAllocation( StreamSafeCUDAAllocation* allocation = new StreamSafeCUDAAllocation(
std::move(underlying_allocation), default_stream_); static_unique_ptr_cast<Allocation>(std::move(underlying_allocation)),
default_stream_);
VLOG(8) << "Allocate " << allocation->size() << " bytes at address " VLOG(8) << "Allocate " << allocation->size() << " bytes at address "
<< allocation->ptr(); << allocation->ptr();
return allocation; return allocation;
} }
void StreamSafeCUDAAllocator::FreeImpl(Allocation* allocation) { void StreamSafeCUDAAllocator::FreeImpl(pten::Allocation* allocation) {
StreamSafeCUDAAllocation* stream_safe_cuda_allocation = StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<StreamSafeCUDAAllocation*>(allocation); dynamic_cast<StreamSafeCUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
......
...@@ -34,7 +34,7 @@ namespace allocation { ...@@ -34,7 +34,7 @@ namespace allocation {
class StreamSafeCUDAAllocation : public Allocation { class StreamSafeCUDAAllocation : public Allocation {
public: public:
StreamSafeCUDAAllocation(AllocationPtr underlying_allocation, StreamSafeCUDAAllocation(DecoratedAllocationPtr underlying_allocation,
gpuStream_t owning_stream); gpuStream_t owning_stream);
void RecordStream(const gpuStream_t &stream); void RecordStream(const gpuStream_t &stream);
bool CanBeFreed(); bool CanBeFreed();
...@@ -42,7 +42,7 @@ class StreamSafeCUDAAllocation : public Allocation { ...@@ -42,7 +42,7 @@ class StreamSafeCUDAAllocation : public Allocation {
const gpuStream_t &GetOwningStream() const; const gpuStream_t &GetOwningStream() const;
private: private:
AllocationPtr underlying_allocation_; DecoratedAllocationPtr underlying_allocation_;
std::map<gpuStream_t, gpuEvent_t> outstanding_event_map_; std::map<gpuStream_t, gpuEvent_t> outstanding_event_map_;
gpuStream_t owning_stream_; gpuStream_t owning_stream_;
SpinLock outstanding_event_map_lock_; SpinLock outstanding_event_map_lock_;
...@@ -57,8 +57,8 @@ class StreamSafeCUDAAllocator : public Allocator { ...@@ -57,8 +57,8 @@ class StreamSafeCUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
uint64_t ReleaseImpl(const platform::Place &place) override; uint64_t ReleaseImpl(const platform::Place &place) override;
private: private:
......
...@@ -32,12 +32,12 @@ struct StubAllocator : public Allocator { ...@@ -32,12 +32,12 @@ struct StubAllocator : public Allocator {
size_t AllocNum() const { return alloc_num_; } size_t AllocNum() const { return alloc_num_; }
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
++alloc_num_; ++alloc_num_;
return new Allocation(new uint8_t[size], size, platform::CPUPlace()); return new Allocation(new uint8_t[size], size, platform::CPUPlace());
} }
void FreeImpl(Allocation *allocation) override { void FreeImpl(pten::Allocation *allocation) override {
delete[] static_cast<uint8_t *>(allocation->ptr()); delete[] static_cast<uint8_t *>(allocation->ptr());
delete allocation; delete allocation;
--alloc_num_; --alloc_num_;
......
...@@ -83,11 +83,11 @@ class ThreadLocalCUDAAllocator : public Allocator { ...@@ -83,11 +83,11 @@ class ThreadLocalCUDAAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation* AllocateImpl(size_t size) override { pten::Allocation* AllocateImpl(size_t size) override {
return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->AllocateImpl( return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->AllocateImpl(
size); size);
} }
void FreeImpl(Allocation* allocation) override { void FreeImpl(pten::Allocation* allocation) override {
auto* tl_allocation = static_cast<ThreadLocalAllocation*>(allocation); auto* tl_allocation = static_cast<ThreadLocalAllocation*>(allocation);
auto allocator_impl = tl_allocation->GetAllocator(); auto allocator_impl = tl_allocation->GetAllocator();
allocator_impl->FreeImpl(tl_allocation); allocator_impl->FreeImpl(tl_allocation);
......
...@@ -35,7 +35,8 @@ VirtualMemoryAutoGrowthBestFitAllocator:: ...@@ -35,7 +35,8 @@ VirtualMemoryAutoGrowthBestFitAllocator::
alignment_(alignment), alignment_(alignment),
place_(place) {} place_(place) {}
Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) { pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(
size_t size) {
std::lock_guard<SpinLock> guard(spinlock_); std::lock_guard<SpinLock> guard(spinlock_);
size = AlignedSize(size, alignment_); size = AlignedSize(size, alignment_);
auto result = AllocFromFreeBlocks(size); auto result = AllocFromFreeBlocks(size);
...@@ -48,7 +49,8 @@ Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) { ...@@ -48,7 +49,8 @@ Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
return result; return result;
} }
void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) { void VirtualMemoryAutoGrowthBestFitAllocator::FreeImpl(
pten::Allocation *allocation) {
std::lock_guard<SpinLock> guard(spinlock_); std::lock_guard<SpinLock> guard(spinlock_);
auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_; auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_;
TryMergeBlock2Blocks(block_it); TryMergeBlock2Blocks(block_it);
...@@ -225,7 +227,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) { ...@@ -225,7 +227,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
} }
} }
Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks( pten::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
size_t size) { size_t size) {
auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr)); auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
if (iter != free_blocks_.end()) { if (iter != free_blocks_.end()) {
......
...@@ -60,12 +60,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator { ...@@ -60,12 +60,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation *AllocateImpl(size_t size) override; pten::Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(pten::Allocation *allocation) override;
private: private:
Allocation *AllocFromFreeBlocks(size_t size); pten::Allocation *AllocFromFreeBlocks(size_t size);
void ExtendAndMerge(size_t size); void ExtendAndMerge(size_t size);
void TryMergeBlock2Blocks(std::list<Block>::iterator iter); void TryMergeBlock2Blocks(std::list<Block>::iterator iter);
......
...@@ -28,7 +28,7 @@ class DeviceContext; ...@@ -28,7 +28,7 @@ class DeviceContext;
namespace memory { namespace memory {
using allocation::Allocation; using pten::Allocation;
using allocation::Allocator; using allocation::Allocator;
using allocation::AllocationPtr; using allocation::AllocationPtr;
......
...@@ -336,9 +336,8 @@ class ConcatFunctor<platform::CUDADeviceContext, T> { ...@@ -336,9 +336,8 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
auto* data_alloc_released = data_alloc.release(); auto* data_alloc_released = data_alloc.release();
auto* col_alloc_released = col_alloc.release(); auto* col_alloc_released = col_alloc.release();
context.AddStreamCallback([data_alloc_released, col_alloc_released] { context.AddStreamCallback([data_alloc_released, col_alloc_released] {
memory::allocation::AllocationDeleter deleter; memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
deleter(data_alloc_released); memory::allocation::Allocator::AllocationDeleter(col_alloc_released);
deleter(col_alloc_released);
}); });
#endif #endif
} }
...@@ -466,9 +465,8 @@ class SplitFunctor<platform::CUDADeviceContext, T> { ...@@ -466,9 +465,8 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
auto* data_alloc_released = data_alloc.release(); auto* data_alloc_released = data_alloc.release();
auto* cols_alloc_released = cols_alloc.release(); auto* cols_alloc_released = cols_alloc.release();
context.AddStreamCallback([data_alloc_released, cols_alloc_released] { context.AddStreamCallback([data_alloc_released, cols_alloc_released] {
memory::allocation::AllocationDeleter deleter; memory::allocation::Allocator::AllocationDeleter(data_alloc_released);
deleter(data_alloc_released); memory::allocation::Allocator::AllocationDeleter(cols_alloc_released);
deleter(cols_alloc_released);
}); });
#endif #endif
} }
......
...@@ -55,7 +55,7 @@ class MLUDeviceContextAllocation : public Allocation { ...@@ -55,7 +55,7 @@ class MLUDeviceContextAllocation : public Allocation {
<< p_allocation; << p_allocation;
dev_ctx_->AddStreamCallback([p_allocation] { dev_ctx_->AddStreamCallback([p_allocation] {
VLOG(4) << "Delete MLUDeviceContextAllocation at " << p_allocation; VLOG(4) << "Delete MLUDeviceContextAllocation at " << p_allocation;
AllocationDeleter()(p_allocation); Allocator::AllocationDeleter(p_allocation);
}); });
} }
...@@ -91,7 +91,7 @@ class MLUDeviceContextAllocator : public Allocator { ...@@ -91,7 +91,7 @@ class MLUDeviceContextAllocator : public Allocator {
} }
protected: protected:
Allocation *AllocateImpl(size_t size) override { pten::Allocation *AllocateImpl(size_t size) override {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
default_stream_, default_stream_,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
...@@ -105,7 +105,7 @@ class MLUDeviceContextAllocator : public Allocator { ...@@ -105,7 +105,7 @@ class MLUDeviceContextAllocator : public Allocator {
return allocation; return allocation;
} }
void FreeImpl(Allocation *allocation) override { delete allocation; } void FreeImpl(pten::Allocation *allocation) override { delete allocation; }
private: private:
platform::MLUPlace place_; platform::MLUPlace place_;
......
...@@ -158,8 +158,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { ...@@ -158,8 +158,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
paddle::memory::allocation::AllocatorFacade::Instance() paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(npu_pinned_place) .GetAllocator(npu_pinned_place)
.get()); .get());
paddle::memory::allocation::Allocation *allocation = pten::Allocation *allocation = npu_pinned_tensor.Holder().get();
npu_pinned_tensor.Holder().get();
npu_pinned_allocator->RecordEvent(allocation, GetCurrentNPUStream()); npu_pinned_allocator->RecordEvent(allocation, GetCurrentNPUStream());
} else { } else {
......
...@@ -53,7 +53,7 @@ size_t PyArray_Size_(PyObject* numpy_data) { ...@@ -53,7 +53,7 @@ size_t PyArray_Size_(PyObject* numpy_data) {
return res; return res;
} }
class EagerNumpyAllocation : public paddle::memory::allocation::Allocation { class EagerNumpyAllocation : public pten::Allocation {
public: public:
explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype) explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
: Allocation( : Allocation(
......
cc_library(pten_api_utils SRCS allocator.cc storage.cc tensor_utils.cc DEPS cc_library(pten_api_utils SRCS storage.cc tensor_utils.cc DEPS
tensor_base convert_utils dense_tensor lod_tensor selected_rows place var_type_traits) tensor_base convert_utils dense_tensor lod_tensor selected_rows place var_type_traits)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h"
namespace paddle {
namespace experimental {
memory::Allocator::AllocationDeleter DefaultAllocator::deleter_;
} // namespace experimental
} // namespace paddle
...@@ -22,14 +22,15 @@ limitations under the License. */ ...@@ -22,14 +22,15 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
class DefaultAllocator : public pten::Allocator { class DefaultAllocator : public pten::deprecated::Allocator {
public: public:
using Allocation = pten::Allocation; using Allocation = pten::deprecated::Allocation;
explicit DefaultAllocator(const paddle::platform::Place& place) explicit DefaultAllocator(const paddle::platform::Place& place)
: place_(place) {} : place_(place) {}
static void Delete(Allocation* allocation) { static void Delete(Allocation* allocation) {
deleter_(allocation->CastContextWithoutCheck<paddle::memory::Allocation>()); paddle::memory::allocation::Allocator::AllocationDeleter(
allocation->CastContextWithoutCheck<paddle::memory::Allocation>());
} }
Allocation Allocate(size_t bytes_size) override { Allocation Allocate(size_t bytes_size) override {
...@@ -42,7 +43,6 @@ class DefaultAllocator : public pten::Allocator { ...@@ -42,7 +43,6 @@ class DefaultAllocator : public pten::Allocator {
private: private:
paddle::platform::Place place_; paddle::platform::Place place_;
static paddle::memory::Allocator::AllocationDeleter deleter_;
}; };
} // namespace experimental } // namespace experimental
......
...@@ -20,14 +20,13 @@ namespace experimental { ...@@ -20,14 +20,13 @@ namespace experimental {
ExternalStorage::ExternalStorage(void* ptr, ExternalStorage::ExternalStorage(void* ptr,
size_t size, size_t size,
const paddle::platform::Place& place) const paddle::platform::Place& place)
: pten::Storage( : pten::Storage(std::make_shared<pten::Allocation>(ptr, size, place)),
std::make_shared<paddle::memory::Allocation>(ptr, size, place)),
size_(size) {} size_(size) {}
ExternalStorage::ExternalStorage(const pten::intrusive_ptr<pten::Storage>& root, ExternalStorage::ExternalStorage(const pten::intrusive_ptr<pten::Storage>& root,
size_t delta, size_t delta,
size_t size) size_t size)
: Storage(std::make_shared<paddle::memory::Allocation>( : Storage(std::make_shared<pten::Allocation>(
static_cast<uint8_t*>(root->data()) + delta, size, root->place())), static_cast<uint8_t*>(root->data()) + delta, size, root->place())),
size_(size) { size_(size) {
PADDLE_ENFORCE_LE(static_cast<size_t>(delta + size), PADDLE_ENFORCE_LE(static_cast<size_t>(delta + size),
......
...@@ -307,7 +307,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { ...@@ -307,7 +307,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
dst->Resize(src->dims()); dst->Resize(src->dims());
dst->set_type(pten::TransToProtoVarType(src->dtype())); dst->set_type(pten::TransToProtoVarType(src->dtype()));
auto storage = src->release(); auto storage = src->release();
std::shared_ptr<paddle::memory::allocation::Allocation> holder( std::shared_ptr<pten::Allocation> holder(
new TensorStorage(std::move(storage))); new TensorStorage(std::move(storage)));
dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype())); dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset); dst->set_offset(src->meta().offset);
......
...@@ -16,8 +16,10 @@ limitations under the License. */ ...@@ -16,8 +16,10 @@ limitations under the License. */
#include <cstdint> #include <cstdint>
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/candidate/allocator.h"
namespace pten { namespace pten {
namespace deprecated {
/// \brief Encapsulates strategies for access/addressing, allocation/ /// \brief Encapsulates strategies for access/addressing, allocation/
/// deallocation and construction/destruction of objects. /// deallocation and construction/destruction of objects.
...@@ -147,4 +149,5 @@ inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) { ...@@ -147,4 +149,5 @@ inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) {
return a->Allocate(n); return a->Allocate(n);
} }
} // namespace deprecated
} // namespace pten } // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h"
namespace pten {
/// \brief Fancy pointer with deleter. The use of this data type
/// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not
/// support being inherited.
class Allocation {
public:
using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*);
Allocation() = default;
// Don't own resources, only provide access.
Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources.
Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent
// to copying.
swap(*this, other);
return *this;
}
virtual ~Allocation() {
if (deleter_) {
deleter_(this);
}
}
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; }
protected:
friend void swap(Allocation& a, Allocation& b) noexcept;
void* ptr_{nullptr};
size_t size_{};
DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%.
Place place_;
};
inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.ptr_, b.ptr_);
::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
}
class Allocator {
public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default;
virtual AllocationPtr Allocate(size_t bytes_size) = 0;
virtual bool IsAllocThreadSafe() const { return false; }
};
} // namespace pten
...@@ -60,6 +60,8 @@ class TensorInplaceVersion { ...@@ -60,6 +60,8 @@ class TensorInplaceVersion {
class DenseTensor : public TensorBase, class DenseTensor : public TensorBase,
public TypeInfoTraits<TensorBase, DenseTensor> { public TypeInfoTraits<TensorBase, DenseTensor> {
public: public:
using Allocator = deprecated::Allocator;
/// \brief Construct a dense tensor and allocate space. /// \brief Construct a dense tensor and allocate space.
/// \param a The allocator used to allocate space. /// \param a The allocator used to allocate space.
/// \param meta The meta data of dense tensor. /// \param meta The meta data of dense tensor.
......
...@@ -91,6 +91,7 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -91,6 +91,7 @@ class Storage : public intrusive_ref_counter<Storage> {
class TensorStorage : public Storage { class TensorStorage : public Storage {
public: public:
using Place = paddle::platform::Place; using Place = paddle::platform::Place;
using Allocator = deprecated::Allocator;
explicit TensorStorage(const std::shared_ptr<Allocator>& a) : alloc_(a) {} explicit TensorStorage(const std::shared_ptr<Allocator>& a) : alloc_(a) {}
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
namespace pten { namespace pten {
namespace tests { namespace tests {
class HostAllocatorSample : public pten::RawAllocator { class HostAllocatorSample : public pten::deprecated::RawAllocator {
public: public:
using Place = paddle::platform::Place; using Place = paddle::platform::Place;
void* Allocate(size_t bytes_size) override { void* Allocate(size_t bytes_size) override {
...@@ -36,8 +36,9 @@ class HostAllocatorSample : public pten::RawAllocator { ...@@ -36,8 +36,9 @@ class HostAllocatorSample : public pten::RawAllocator {
Place place_{paddle::platform::CPUPlace()}; Place place_{paddle::platform::CPUPlace()};
}; };
class FancyAllocator : public pten::Allocator { class FancyAllocator : public pten::deprecated::Allocator {
public: public:
using Allocation = pten::deprecated::Allocation;
static void Delete(Allocation* allocation) { static void Delete(Allocation* allocation) {
::operator delete(allocation->ptr()); ::operator delete(allocation->ptr());
} }
...@@ -55,7 +56,7 @@ class FancyAllocator : public pten::Allocator { ...@@ -55,7 +56,7 @@ class FancyAllocator : public pten::Allocator {
template <typename T> template <typename T>
struct CustomAllocator { struct CustomAllocator {
using value_type = T; using value_type = T;
using Allocator = pten::RawAllocator; using Allocator = pten::deprecated::RawAllocator;
explicit CustomAllocator(const std::shared_ptr<Allocator>& a) noexcept explicit CustomAllocator(const std::shared_ptr<Allocator>& a) noexcept
: alloc_(a) {} : alloc_(a) {}
......
...@@ -24,6 +24,10 @@ limitations under the License. */ ...@@ -24,6 +24,10 @@ limitations under the License. */
namespace pten { namespace pten {
namespace tests { namespace tests {
using RawAllocator = pten::deprecated::RawAllocator;
using Allocator = pten::deprecated::Allocator;
using Allocation = pten::deprecated::Allocation;
template <typename T> template <typename T>
bool host_allocator_test(size_t vector_size) { bool host_allocator_test(size_t vector_size) {
std::vector<T> src(vector_size); std::vector<T> src(vector_size);
......
...@@ -226,7 +226,7 @@ if [ "${HAS_MODIFIED_DEMO_CMAKE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then ...@@ -226,7 +226,7 @@ if [ "${HAS_MODIFIED_DEMO_CMAKE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
HAS_MODIFIED_ALLOCATION=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/memory/allocation" || true` HAS_MODIFIED_ALLOCATION=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/memory/allocation" || true`
if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You must be approved by zhiqiu and Shixiaowei02 for paddle/fluid/memory/allocation.\nIt is being modularized and refactored. Thanks!\n" echo_line="You must be approved by zhiqiu and Shixiaowei02 for paddle/fluid/memory/allocation.\nIt is being modularized and refactored. Thanks!\n"
check_approval 2 6888866 39303645 check_approval 1 6888866 39303645
fi fi
HAS_MODIFIED_TENSOR=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/framework/tensor" || true` HAS_MODIFIED_TENSOR=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/framework/tensor" || true`
...@@ -241,23 +241,6 @@ if [ "${HAS_MODIFIED_TENSOR}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then ...@@ -241,23 +241,6 @@ if [ "${HAS_MODIFIED_TENSOR}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
check_approval 1 22561442 22334008 check_approval 1 22561442 22334008
fi fi
ALLOCSHARED_FILE_CHANGED=`git diff --name-only --diff-filter=AM upstream/$BRANCH |grep -E "*\.(h|cc)" || true`
if [ "${ALLOCSHARED_FILE_CHANGED}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
ERROR_LINES=""
for TEST_FILE in ${ALLOCSHARED_FILE_CHANGED};
do
HAS_SKIP_CHECK_ALLOC_CI=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${TEST_FILE} |grep "AllocShared" || true`
if [ "${HAS_SKIP_CHECK_ALLOC_CI}" != "" ]; then
ERROR_LINES="${ERROR_LINES}\n${TEST_FILE}\n${HAS_SKIP_CHECK_ALLOC_CI}\n"
fi
done
if [ "${ERROR_LINES}" != "" ]; then
ERROR_LINES=${ERROR_LINES//+/'\n+\t'}
echo_line="memory::AllocShared is not recommended, because it is being modularized and refactored. Please use memory::Alloc here. Otherwise, please request zhiqiu and Shixiaowei02 review and approve.\n"
check_approval 2 6888866 39303645
fi
fi
ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true` ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true`
if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n" echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册