提交 9dcddf92 编写于 作者: Y Yu Yang

Polish best_fit_allocator

上级 0c25da39
...@@ -41,8 +41,7 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation) ...@@ -41,8 +41,7 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
chunk.offset_ = 0; chunk.offset_ = 0;
chunk.is_free = true; chunk.is_free = true;
chunks_.emplace_back(chunk); chunks_.emplace_back(chunk);
free_chunks_[HighestBitPos(chunk.size_)].insert( InsertFreeNode(chunks_.begin());
{chunk.size_, chunks_.begin()});
} }
std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) { std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) {
...@@ -86,35 +85,33 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size, ...@@ -86,35 +85,33 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
details::Chunk remaining; details::Chunk remaining;
to_use.size_ = request_size; to_use.size_ = request_size;
to_use.is_free = false; to_use.is_free = false;
remaining.size_ = remaining_size;
remaining.is_free = true;
// calc offsets // calc offsets
to_use.offset_ = to_split_it->offset_; to_use.offset_ = to_split_it->offset_;
remaining.offset_ = to_use.offset_ + to_use.size_;
// insert to chunk list // insert to chunk list
auto to_use_it = chunks_.insert(to_split_it, to_use); auto to_use_it = chunks_.insert(to_split_it, to_use);
if (remaining.size_ != 0) { if (remaining_size != 0) {
auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_)); remaining.size_ = remaining_size;
free_chunks_[bit_size].insert( remaining.is_free = true;
{remaining.size_, chunks_.insert(to_split_it, remaining)}); remaining.offset_ = to_use.offset_ + to_use.size_;
auto remaining_it = chunks_.insert(to_split_it, remaining);
InsertFreeNode(remaining_it);
} }
chunks_.erase(to_split_it); chunks_.erase(to_split_it);
return to_use_it; return to_use_it;
} }
void BestFitAllocator::Free(Allocation* allocation) { void BestFitAllocator::Free(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation); auto* bf_allocation = reinterpret_cast<BestFitAllocation*>(allocation);
auto chunk_it = bf_allocation->ChunkIterator(); auto chunk_it = bf_allocation->ChunkIterator();
PADDLE_ENFORCE(!chunk_it->is_free); PADDLE_ENFORCE(!chunk_it->is_free);
chunk_it->is_free = true; chunk_it->is_free = true;
if (chunk_it != chunks_.begin()) { if (chunk_it != chunks_.begin()) { // not the first chunk, try to merge prev.
auto prev_it = chunk_it; auto prev_it = chunk_it;
--prev_it; --prev_it;
if (prev_it->is_free) { if (prev_it->is_free) {
// Merge Left. // Merge Prev.
EraseFreeNode(prev_it); EraseFreeNode(prev_it);
prev_it->size_ += chunk_it->size_; prev_it->size_ += chunk_it->size_;
chunks_.erase(chunk_it); chunks_.erase(chunk_it);
...@@ -125,6 +122,7 @@ void BestFitAllocator::Free(Allocation* allocation) { ...@@ -125,6 +122,7 @@ void BestFitAllocator::Free(Allocation* allocation) {
auto next_it = chunk_it; auto next_it = chunk_it;
++next_it; ++next_it;
if (next_it != chunks_.end() && next_it->is_free) { if (next_it != chunks_.end() && next_it->is_free) {
// not the last chunk, try to merge next
EraseFreeNode(next_it); EraseFreeNode(next_it);
chunk_it->size_ += next_it->size_; chunk_it->size_ += next_it->size_;
chunks_.erase(next_it); chunks_.erase(next_it);
...@@ -139,9 +137,11 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) { ...@@ -139,9 +137,11 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
free_map.insert({it->size_, it}); free_map.insert({it->size_, it});
} }
void BestFitAllocator::EraseFreeNode(const ListIt& it) { void BestFitAllocator::EraseFreeNode(const ListIt& it) {
size_t pos = static_cast<size_t>(HighestBitPos(it->size_)); auto pos = static_cast<size_t>(HighestBitPos(it->size_));
auto& free_map = free_chunks_[pos]; auto& free_map = free_chunks_[pos];
auto map_it = free_map.find(it->size_); auto map_it = free_map.find(it->size_);
// This while loop because it is a multi-map
while (map_it->second != it && map_it != free_map.end()) { while (map_it->second != it && map_it != free_map.end()) {
++map_it; ++map_it;
} }
......
...@@ -37,8 +37,8 @@ struct Chunk { ...@@ -37,8 +37,8 @@ struct Chunk {
// | Chunk | prev_ pointer | next_ pointer | payload .... | // | Chunk | prev_ pointer | next_ pointer | payload .... |
// *-------*---------------*---------------*--------------* // *-------*---------------*---------------*--------------*
// This implementation can just return a raw pointer, and we can get the list // This implementation can just return a raw pointer, and we can get the list
// structure by it. However, we cannot use the same code on GPU since CPU // structure by the raw pointer. However, we cannot use the same code on GPU
// cannot access GPU memory directly. // since CPU cannot access GPU memory directly.
// //
// So we choose to use `std::list` and return an allocation instance, which // So we choose to use `std::list` and return an allocation instance, which
// contains the list node iterator, then we can unify CPU/GPU code. // contains the list node iterator, then we can unify CPU/GPU code.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册