diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index 1d9e7177f9547af8a083b2304163497b987fac46..706216c8bfdcc85a891572c4290b5ba8ac76f360 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -41,8 +41,7 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation) chunk.offset_ = 0; chunk.is_free = true; chunks_.emplace_back(chunk); - free_chunks_[HighestBitPos(chunk.size_)].insert( - {chunk.size_, chunks_.begin()}); + InsertFreeNode(chunks_.begin()); } std::unique_ptr BestFitAllocator::Allocate(size_t size, Attr attr) { @@ -86,35 +85,33 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size, details::Chunk remaining; to_use.size_ = request_size; to_use.is_free = false; - remaining.size_ = remaining_size; - remaining.is_free = true; - // calc offsets to_use.offset_ = to_split_it->offset_; - remaining.offset_ = to_use.offset_ + to_use.size_; // insert to chunk list auto to_use_it = chunks_.insert(to_split_it, to_use); - if (remaining.size_ != 0) { - auto bit_size = static_cast(HighestBitPos(remaining.size_)); - free_chunks_[bit_size].insert( - {remaining.size_, chunks_.insert(to_split_it, remaining)}); + if (remaining_size != 0) { + remaining.size_ = remaining_size; + remaining.is_free = true; + remaining.offset_ = to_use.offset_ + to_use.size_; + auto remaining_it = chunks_.insert(to_split_it, remaining); + InsertFreeNode(remaining_it); } chunks_.erase(to_split_it); return to_use_it; } void BestFitAllocator::Free(Allocation* allocation) { - auto* bf_allocation = dynamic_cast(allocation); + auto* bf_allocation = reinterpret_cast(allocation); auto chunk_it = bf_allocation->ChunkIterator(); PADDLE_ENFORCE(!chunk_it->is_free); chunk_it->is_free = true; - if (chunk_it != chunks_.begin()) { + if (chunk_it != chunks_.begin()) { // not the first chunk, try to merge prev. auto prev_it = chunk_it; --prev_it; if (prev_it->is_free) { - // Merge Left. + // Merge Prev. EraseFreeNode(prev_it); prev_it->size_ += chunk_it->size_; chunks_.erase(chunk_it); @@ -125,6 +122,7 @@ void BestFitAllocator::Free(Allocation* allocation) { auto next_it = chunk_it; ++next_it; if (next_it != chunks_.end() && next_it->is_free) { + // not the last chunk, try to merge next EraseFreeNode(next_it); chunk_it->size_ += next_it->size_; chunks_.erase(next_it); @@ -139,9 +137,11 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) { free_map.insert({it->size_, it}); } void BestFitAllocator::EraseFreeNode(const ListIt& it) { - size_t pos = static_cast(HighestBitPos(it->size_)); + auto pos = static_cast(HighestBitPos(it->size_)); auto& free_map = free_chunks_[pos]; auto map_it = free_map.find(it->size_); + + // This while loop because it is a multi-map while (map_it->second != it && map_it != free_map.end()) { ++map_it; } diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 309a2a77088d6d2d182590f8b2671ea0b5a474f2..da62bc4bb61e09f8b49cdc5e67f7932aa21a860e 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -37,8 +37,8 @@ struct Chunk { // | Chunk | prev_ pointer | next_ pointer | payload .... | // *-------*---------------*---------------*--------------* // This implementation can just return a raw pointer, and we can get the list -// structure by it. However, we cannot use the same code on GPU since CPU -// cannot access GPU memory directly. +// structure by the raw pointer. However, we cannot use the same code on GPU +// since CPU cannot access GPU memory directly. // // So we choose to use `std::list` and return an allocation instance, which // contains the list node iterator, then we can unify CPU/GPU code.