// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" DECLARE_string(allocator_strategy); namespace paddle { namespace memory { namespace allocation { // Exception when `Alloc`/`AllocShared` failed struct BadAlloc : public std::exception { inline explicit BadAlloc(std::string err_msg, const char* file, int line) : err_str_(platform::GetTraceBackString(std::move(err_msg), file, line)) { } const char* what() const noexcept override { return err_str_.c_str(); } std::string err_str_; }; class Allocator; // Allocation is the object holding the actually pointer. Use // `Allocation::ptr()` will returns the pointer that allocated. // // NOTE: this is the base class of Allocation. Each allocator can use its own // allocation object. // NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0 /** * Allocation is returned by Allocator::Allocate() method. * * An allocator may be decorated by another allocator. For example, we can * decorate a RetryAllocator to any allocator to perform allocation retry when * first allocation request fails. * * Explanations of Allocator design are as follows: * * Suppose we have an allocator which is decorated by several allocators: * * A(1) <- A(2) <- A(3) <- ... <- A(n) * * , and the public allocator is A(1). * * The allocation process would be: * * A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate() * * , and the free process would be: * * A(1).Free() -> A(2).Free() -> ... -> A(n).Free() * * Therefore, we should record the allocator chain when allocating, so * that we can free the allocation in the reverse order of allocator chain. * The field `decorated_allocators_` is used to record this chain. * * Another example is that we want to add additional fields in Allocation, * e.g., something what is done in AlignedAllocator, etc. * In this case, we should declare a derived class of Allocation, which * contains an underlying Allocation allocated by the underlying allocator. * Therefore, `decorated_allocators_` of the new Allocation object would * be a new chain, differing from the underlying Allocation object. */ class Allocation { public: inline Allocation(void* ptr, size_t size, platform::Place place) : ptr_(ptr), base_ptr_(ptr), size_(size), place_(place) {} inline Allocation(void* ptr, void* base_ptr, size_t size, platform::Place place) : ptr_(ptr), base_ptr_(base_ptr), size_(size), place_(place) {} Allocation(const Allocation& o) = delete; Allocation& operator=(const Allocation& o) = delete; Allocation(Allocation&& o) = delete; Allocation& operator=(Allocation&& o) = delete; // Returns the holding pointer. // NOTE: For performance consideration, it is better not to make this method // as a virtual method. If we want to implement a `defragmentation` later, // we might need to make `ptr_` field as a protected field, and add a virtual // method like `defragmentation` to change `ptr_`. inline void* ptr() const { return ptr_; } inline void* base_ptr() const { PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth", paddle::platform::errors::Unimplemented( "base_ptr() is only implemented for auto_growth " "strategy, not support %s strategy", FLAGS_allocator_strategy)); return base_ptr_; } // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the // last valid element. // // NOTE: Some allocator might alloc more memory than request. The size // could larger than its request. For example, // the AlignedAllocator will always allocate memory as size + kAlignment. // The raw pointer might not aligned, so an offset might be added to raw // the pointer. The size of this allocation will be // `size + kAlignemnt - offset`. inline size_t size() const { return size_; } inline const platform::Place& place() const { return place_; } virtual ~Allocation() {} private: inline void RegisterDecoratedAllocator(Allocator* allocator) { decorated_allocators_.emplace_back(allocator); } inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); } inline Allocator* TopDecoratedAllocator() { return decorated_allocators_.back(); } private: void* ptr_; void* base_ptr_; // the point that directly requested from system size_t size_; platform::Place place_; /** * NOTE(zjl): Since decorated_allocators_ is usually a small vector. * We reserve a small buffer to it to prevent frequent heap allocation * * Instead, we can use a std::vector here, and reserve * kReserveAllocatorNum in constructor of Allocation. * But using std::vector would make ocr recognition model * fail in CE. The train duration is 8% slower than KPI. */ static constexpr size_t kReserveAllocatorNum = 8; using DecoratedAllocatorStack = framework::InlinedVector; DecoratedAllocatorStack decorated_allocators_; friend class Allocator; }; // Base interface class of memory Allocator. class Allocator { public: virtual ~Allocator() {} class AllocationDeleter { public: inline void operator()(Allocation* allocation) const { Allocator* allocator = allocation->TopDecoratedAllocator(); allocator->Free(allocation); } }; using AllocationPtr = std::unique_ptr; // Allocate an allocation. // size may be 0, but it would be too complex if we handle size == 0 // in each Allocator. So we handle size == 0 inside AllocatorFacade // in our design. inline AllocationPtr Allocate(size_t size) { auto ptr = AllocateImpl(size); ptr->RegisterDecoratedAllocator(this); return AllocationPtr(ptr); } // This function should not be called outside Allocator class inline void Free(Allocation* allocation) { allocation->PopDecoratedAllocator(); FreeImpl(allocation); } inline uint64_t Release(const platform::Place& place) { return ReleaseImpl(place); } // True if the `Allocate` is thread safe. virtual bool IsAllocThreadSafe() const; protected: virtual Allocation* AllocateImpl(size_t size) = 0; virtual void FreeImpl(Allocation* allocation); virtual uint64_t ReleaseImpl(const platform::Place& place) { return 0; } }; using AllocationDeleter = Allocator::AllocationDeleter; using AllocationPtr = Allocator::AllocationPtr; inline size_t AlignedSize(size_t size, size_t alignment) { auto remaining = size % alignment; return remaining == 0 ? size : size + alignment - remaining; } inline size_t AlignedPtrOffset(const void* ptr, size_t alignment) { auto ptr_addr = reinterpret_cast(ptr); auto diff = ptr_addr % alignment; return diff == 0 ? 0 : alignment - diff; } } // namespace allocation } // namespace memory } // namespace paddle