提交 71c846ef 编写于 作者: Y Yu Yang

Revert buggy changes

test=develop
上级 dbf9f6f4
...@@ -26,7 +26,7 @@ static int HighestBitPos(size_t N) { ...@@ -26,7 +26,7 @@ static int HighestBitPos(size_t N) {
if (UNLIKELY(N == 0)) { if (UNLIKELY(N == 0)) {
return 0; return 0;
} else { } else {
#ifdef __GNUC__ #ifdef __GNUCC__
return sizeof(unsigned int) * 8 - __builtin_clz(N); return sizeof(unsigned int) * 8 - __builtin_clz(N);
#else #else
return static_cast<int>(std::log2(N) + 1); return static_cast<int>(std::log2(N) + 1);
...@@ -41,7 +41,8 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation) ...@@ -41,7 +41,8 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
chunk.offset_ = 0; chunk.offset_ = 0;
chunk.is_free = true; chunk.is_free = true;
chunks_.emplace_back(chunk); chunks_.emplace_back(chunk);
InsertFreeNode(chunks_.begin()); free_chunks_[HighestBitPos(chunk.size_)].insert(
{chunk.size_, chunks_.begin()});
} }
std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) { std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) {
...@@ -85,33 +86,35 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size, ...@@ -85,33 +86,35 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
details::Chunk remaining; details::Chunk remaining;
to_use.size_ = request_size; to_use.size_ = request_size;
to_use.is_free = false; to_use.is_free = false;
remaining.size_ = remaining_size;
remaining.is_free = true;
// calc offsets // calc offsets
to_use.offset_ = to_split_it->offset_; to_use.offset_ = to_split_it->offset_;
remaining.offset_ = to_use.offset_ + to_use.size_;
// insert to chunk list // insert to chunk list
auto to_use_it = chunks_.insert(to_split_it, to_use); auto to_use_it = chunks_.insert(to_split_it, to_use);
if (remaining_size != 0) { if (remaining.size_ != 0) {
remaining.size_ = remaining_size; auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_));
remaining.is_free = true; free_chunks_[bit_size].insert(
remaining.offset_ = to_use.offset_ + to_use.size_; {remaining.size_, chunks_.insert(to_split_it, remaining)});
auto remaining_it = chunks_.insert(to_split_it, remaining);
InsertFreeNode(remaining_it);
} }
chunks_.erase(to_split_it); chunks_.erase(to_split_it);
return to_use_it; return to_use_it;
} }
void BestFitAllocator::Free(Allocation* allocation) { void BestFitAllocator::Free(Allocation* allocation) {
auto* bf_allocation = reinterpret_cast<BestFitAllocation*>(allocation); auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
auto chunk_it = bf_allocation->ChunkIterator(); auto chunk_it = bf_allocation->ChunkIterator();
PADDLE_ENFORCE(!chunk_it->is_free); PADDLE_ENFORCE(!chunk_it->is_free);
chunk_it->is_free = true; chunk_it->is_free = true;
if (chunk_it != chunks_.begin()) { // not the first chunk, try to merge prev. if (chunk_it != chunks_.begin()) {
auto prev_it = chunk_it; auto prev_it = chunk_it;
--prev_it; --prev_it;
if (prev_it->is_free) { if (prev_it->is_free) {
// Merge Prev. // Merge Left.
EraseFreeNode(prev_it); EraseFreeNode(prev_it);
prev_it->size_ += chunk_it->size_; prev_it->size_ += chunk_it->size_;
chunks_.erase(chunk_it); chunks_.erase(chunk_it);
...@@ -122,7 +125,6 @@ void BestFitAllocator::Free(Allocation* allocation) { ...@@ -122,7 +125,6 @@ void BestFitAllocator::Free(Allocation* allocation) {
auto next_it = chunk_it; auto next_it = chunk_it;
++next_it; ++next_it;
if (next_it != chunks_.end() && next_it->is_free) { if (next_it != chunks_.end() && next_it->is_free) {
// not the last chunk, try to merge next
EraseFreeNode(next_it); EraseFreeNode(next_it);
chunk_it->size_ += next_it->size_; chunk_it->size_ += next_it->size_;
chunks_.erase(next_it); chunks_.erase(next_it);
...@@ -137,11 +139,9 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) { ...@@ -137,11 +139,9 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
free_map.insert({it->size_, it}); free_map.insert({it->size_, it});
} }
void BestFitAllocator::EraseFreeNode(const ListIt& it) { void BestFitAllocator::EraseFreeNode(const ListIt& it) {
auto pos = static_cast<size_t>(HighestBitPos(it->size_)); size_t pos = static_cast<size_t>(HighestBitPos(it->size_));
auto& free_map = free_chunks_[pos]; auto& free_map = free_chunks_[pos];
auto map_it = free_map.find(it->size_); auto map_it = free_map.find(it->size_);
// This while loop because it is a multi-map
while (map_it->second != it && map_it != free_map.end()) { while (map_it->second != it && map_it != free_map.end()) {
++map_it; ++map_it;
} }
......
...@@ -42,8 +42,7 @@ static std::shared_ptr<memory::Allocation> GetCommunicationAllocationFromTensor( ...@@ -42,8 +42,7 @@ static std::shared_ptr<memory::Allocation> GetCommunicationAllocationFromTensor(
memory::Copy(cuda_pinned, result->ptr(), memory::Copy(cuda_pinned, result->ptr(),
boost::get<platform::CUDAPlace>(tensor.place()), boost::get<platform::CUDAPlace>(tensor.place()),
reinterpret_cast<const void*>(tensor.data<void>()), copy_size, tensor.data<void>(), copy_size, gpu_dev_ctx.stream());
gpu_dev_ctx.stream());
ctx.Wait(); ctx.Wait();
return result; return result;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册