提交 bbd3eab7 编写于 作者: L liaogang

ENH: Add Alloc for buddy Allocator

* Free will be added soon
上级 929f9cbd
...@@ -12,22 +12,161 @@ ...@@ -12,22 +12,161 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once
#include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/buddy_allocator.h"
#include "glog/logging.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools, BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator,
SystemAllocator* system_allocator) size_t min_chunk_size, size_t max_chunk_size) {
: pool_size_(pool_size), PADDLE_ASSERT(min_chunk_size > 0);
max_pools_(max_pools), PADDLE_ASSERT(max_chunk_size > 0);
system_allocator_(system_allocator) {
PADDLE_ASSERT(pool_size > 0);
PADDLE_ASSERT(max_pools > 0);
PADDLE_ASSERT(system_allocator != nullptr); PADDLE_ASSERT(system_allocator != nullptr);
system_allocator_ = std::move(system_allocator);
min_chunk_size_ = min_chunk_size;
max_chunk_size_ = max_chunk_size;
}
inline size_t align(size_t size, size_t alignment) {
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
void* BuddyAllocator::Alloc(size_t unaligned_size) {
// adjust allocation alignment
size_t size = align(unaligned_size + sizeof(Metadata), min_chunk_size_);
// acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
DLOG(INFO) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
// if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) {
DLOG(INFO) << "Allocate from system allocator.";
return SystemAlloc(size);
}
// query and allocate from the existing chunk
auto it = FindExistChunk(size);
// refill the pool if failure
if (it == pool_.end()) {
it = RefillPool();
} else {
DLOG(INFO) << " Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
}
// if still failure, fail fatally
if (it == pool_.end()) {
return nullptr;
}
total_used_ += size;
total_free_ -= size;
// split the allocation and return data for use
return reinterpret_cast<MemoryBlock*>(SplitToAlloc(it, size))->data();
}
void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0;
void* p = system_allocator_->Alloc(index, size);
DLOG(INFO) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr;
static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::HUGE_CHUNK, index,
size, nullptr, nullptr);
return static_cast<MemoryBlock*>(p)->data();
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifndef PADDLE_ONLY_CPU
if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation.
max_chunk_size_ = platform::GpuMaxChunkSize();
}
}
#endif // PADDLE_ONLY_CPU
// Allocate a new maximum sized block
size_t index = 0;
void* p = system_allocator_->Alloc(index, max_chunk_size_);
if (p == nullptr) return pool_.end();
DLOG(INFO) << " Creating and inserting new block " << p
<< " from system allocator";
static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr);
total_free_ += max_chunk_size_;
// dump the block into pool
return pool_.insert({index, max_chunk_size_, p}).first;
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) {
size_t index = 0;
while (1) {
auto it = pool_.lower_bound({index, size, nullptr});
if (it == pool_.end()) return it;
if (std::get<0>(*it) > index) {
if (std::get<1>(*it) >= size) {
return it;
}
index = std::get<0>(*it);
continue;
}
return it;
}
}
void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
size_t size) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it);
DLOG(INFO) << " Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
block->split(cache_, size);
DLOG(INFO) << " Left block (" << block << ", " << block->total_size(cache_)
<< ")";
block->set_type(cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist
if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
DLOG(INFO) << " Insert right block (" << block->right_buddy(cache_)
<< ", " << block->right_buddy(cache_)->total_size(cache_)
<< ")";
pool_.insert({block->right_buddy(cache_)->index(cache_),
block->right_buddy(cache_)->total_size(cache_),
block->right_buddy(cache_)});
}
}
return block;
} }
} // namespace detail } // namespace detail
......
...@@ -15,9 +15,15 @@ ...@@ -15,9 +15,15 @@
#pragma once #pragma once
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/memory/detail/metadata.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cpu_info.h"
#include "paddle/platform/gpu_info.h"
#include <set>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include <unordered_map>
namespace paddle { namespace paddle {
namespace memory { namespace memory {
...@@ -25,55 +31,83 @@ namespace detail { ...@@ -25,55 +31,83 @@ namespace detail {
class BuddyAllocator { class BuddyAllocator {
public: public:
BuddyAllocator(size_t pool_size, size_t max_pools, BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size,
SystemAllocator* system_allocator); size_t max_chunk_size);
~BuddyAllocator(); ~BuddyAllocator();
void* Alloc(size_t size); public:
void* Alloc(size_t unaligned_size);
void Free(void*); void Free(void*);
size_t Used(); size_t Used();
public:
// Disable copy and assignment.
BuddyAllocator(const BuddyAllocator&) = delete;
BuddyAllocator& operator=(const BuddyAllocator&) = delete;
private: private:
struct Block { // Tuple type: allocator index, memory size, memory address
size_t size_; using IndexSizeAddress = std::tuple<size_t, size_t, void*>;
Block* left_; // left buddy using PoolSet = std::set<IndexSizeAddress>;
Block* right_; // right buddy
};
// Initially, there is only one pool. If a Alloc founds not enough /*! \brief Allocate fixed-size memory from system */
// memory from that pool, and there has not been max_num_pools_, void* SystemAlloc(size_t size);
// create a new pool by calling system_allocator_.Alloc(pool_size_).
std::vector<void*> pools_;
size_t pool_size_; // the size of each pool; /*! \brief If existing chunks are not suitable, refill pool */
size_t max_num_pools_; // the size of all pools; PoolSet::iterator RefillPool();
SystemAllocator* system_allocator_; /**
* \brief Find the suitable chunk from existing pool
*
* \param it pool iterator which contains suitable block.
* \param size the size of allocation.
*/
void* SplitToAlloc(PoolSet::iterator it, size_t size);
std::mutex mutex_; /*! \brief Find the existing chunk which used to allocation */
PoolSet::iterator FindExistChunk(size_t size);
// Disable copy and assignment. private:
BuddyAllocator(const BuddyAllocator&) = delete; size_t total_used_ = 0; // the total size of used memory
BuddyAllocator& operator=(const BuddyAllocator&) = delete; size_t total_free_ = 0; // the total size of free memory
size_t min_chunk_size_; // the minimum size of each chunk
size_t max_chunk_size_; // the maximum size of each chunk
private:
PoolSet pool_;
private:
// Unify the metadata format between GPU and CPU allocations
using MetadataCache = std::unordered_map<const MemoryBlock*, Metadata>;
MetadataCache cache_;
private:
SystemAllocator* system_allocator_;
std::mutex mutex_;
}; };
BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() { BuddyAllocator* GetCPUBuddyAllocator() {
static BuddyAllocator<CPUAllocator>* a = nullptr; static BuddyAllocator* a = nullptr;
if (a == nullptr) { if (a == nullptr) {
a = new BuddyAllocator<CPUAllocator>(); a = new BuddyAllocator(new CPUAllocator, platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
} }
return a; return a;
} }
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. #ifndef PADDLE_ONLY_CPU // The following code are for CUDA.
BuddyAllocator<GPUAllocator>* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static BuddyAllocator<GPUAllocator>** as = NULL; static BuddyAllocator** as = NULL;
if (as == NULL) { if (as == NULL) {
int gpu_num = platform::GetDeviceCount(); int gpu_num = platform::GpuDeviceCount();
as = new BuddyAllocator<GPUAllocator>*[gpu_num]; as = new BuddyAllocator*[gpu_num];
for (int gpu = 0; gpu < gpu_num; gpu++) { for (int gpu = 0; gpu < gpu_num; gpu++) {
as[gpu] = new BuddyAllocator<GPUAllocator>(); as[gpu] =
new BuddyAllocator(new GPUAllocator, platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize());
} }
} }
return as[gpu_id]; return as[gpu_id];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册