buddy_allocator.cc 10.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/memory/detail/buddy_allocator.h"
16
#include <algorithm>
17

18
#include "gflags/gflags.h"
L
liaogang 已提交
19
#include "glog/logging.h"
20

21
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
22 23
DECLARE_uint64(reallocate_gpu_memory_in_mb);
#endif
24 25 26
#ifdef PADDLE_WITH_ASCEND_CL
DECLARE_uint64(reallocate_gpu_memory_in_mb);
#endif
27

28 29 30 31
namespace paddle {
namespace memory {
namespace detail {

G
gongweibao 已提交
32 33
BuddyAllocator::BuddyAllocator(
    std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size,
34
    size_t max_chunk_size, size_t extra_padding_size)
35 36
    : min_chunk_size_(min_chunk_size),
      max_chunk_size_(max_chunk_size),
37
      extra_padding_size_(extra_padding_size),
38
      cache_(system_allocator->UseGpu()),
L
liaogang 已提交
39 40 41
      system_allocator_(std::move(system_allocator)) {}

BuddyAllocator::~BuddyAllocator() {
M
minqiyang 已提交
42 43
  VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these "
              "have actually been freed";
L
liaogang 已提交
44 45
  while (!pool_.empty()) {
    auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
46
    auto desc = cache_.LoadDesc(block);
47 48
    VLOG(10) << "Free from block (" << block << ", " << desc->get_total_size()
             << ")";
L
liaogang 已提交
49

50
    system_allocator_->Free(block, desc->get_total_size(), desc->get_index());
51
    cache_.Invalidate(block);
L
liaogang 已提交
52 53
    pool_.erase(pool_.begin());
  }
L
liaogang 已提交
54 55 56 57 58 59 60 61 62 63
}

inline size_t align(size_t size, size_t alignment) {
  size_t remaining = size % alignment;
  return remaining == 0 ? size : size + (alignment - remaining);
}

void* BuddyAllocator::Alloc(size_t unaligned_size) {
  // adjust allocation alignment

64 65 66 67 68 69 70
  size_t size =
      align(unaligned_size + sizeof(MemoryBlock::Desc) + extra_padding_size_,
            min_chunk_size_);
  VLOG(10) << "alloc: " << unaligned_size
           << ", padding for desc: " << sizeof(MemoryBlock::Desc)
           << ", extra padding: " << extra_padding_size_
           << ", alignment: " << min_chunk_size_;
L
liaogang 已提交
71 72 73
  // acquire the allocator lock
  std::lock_guard<std::mutex> lock(mutex_);

M
minqiyang 已提交
74 75
  VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size "
           << size;
L
liaogang 已提交
76 77 78

  // if the allocation is huge, send directly to the system allocator
  if (size > max_chunk_size_) {
M
minqiyang 已提交
79
    VLOG(10) << "Allocate from system allocator.";
L
liaogang 已提交
80 81 82 83 84 85 86 87
    return SystemAlloc(size);
  }

  // query and allocate from the existing chunk
  auto it = FindExistChunk(size);

  // refill the pool if failure
  if (it == pool_.end()) {
88
    it = RefillPool(size);
L
liaogang 已提交
89 90 91 92
    // if still failure, fail fatally
    if (it == pool_.end()) {
      return nullptr;
    }
L
liaogang 已提交
93
  } else {
M
minqiyang 已提交
94 95
    VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it)
             << " at address "
96
             << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->Data();
L
liaogang 已提交
97 98 99 100 101 102
  }

  total_used_ += size;
  total_free_ -= size;

  // split the allocation and return data for use
103
  return reinterpret_cast<MemoryBlock*>(SplitToAlloc(it, size))->Data();
L
liaogang 已提交
104 105
}

L
liaogang 已提交
106
void BuddyAllocator::Free(void* p) {
L
liaogang 已提交
107
  // Point back to metadata
108
  auto block = static_cast<MemoryBlock*>(p)->Metadata();
L
liaogang 已提交
109

L
liaogang 已提交
110
  // Acquire the allocator lock
L
liaogang 已提交
111
  std::lock_guard<std::mutex> lock(mutex_);
L
liaogang 已提交
112

M
minqiyang 已提交
113
  VLOG(10) << "Free from address " << block;
L
liaogang 已提交
114

115 116
  auto* desc = cache_.LoadDesc(block);
  if (desc->get_type() == MemoryBlock::HUGE_CHUNK) {
M
minqiyang 已提交
117
    VLOG(10) << "Free directly from system allocator";
118
    system_allocator_->Free(block, desc->get_total_size(), desc->get_index());
L
liaogang 已提交
119 120

    // Invalidate GPU allocation from cache
121
    cache_.Invalidate(block);
122

L
liaogang 已提交
123 124 125
    return;
  }

126
  block->MarkAsFree(&cache_);
L
liaogang 已提交
127

128 129
  total_used_ -= desc->get_total_size();
  total_free_ += desc->get_total_size();
L
liaogang 已提交
130 131

  // Trying to merge the right buddy
132 133
  MemoryBlock* right_buddy = block->GetRightBuddy(&cache_);
  if (right_buddy) {
M
minqiyang 已提交
134
    VLOG(10) << "Merging this block " << block << " with its right buddy "
135
             << right_buddy;
136

137 138
    auto rb_desc = cache_.LoadDesc(right_buddy);
    if (rb_desc->get_type() == MemoryBlock::FREE_CHUNK) {
139
      // Take away right buddy from pool
140 141
      pool_.erase(IndexSizeAddress(rb_desc->get_index(),
                                   rb_desc->get_total_size(), right_buddy));
142 143

      // merge its right buddy to the block
144
      block->Merge(&cache_, right_buddy);
145
    }
L
liaogang 已提交
146 147 148
  }

  // Trying to merge the left buddy
149 150
  MemoryBlock* left_buddy = block->GetLeftBuddy(&cache_);
  if (left_buddy) {
M
minqiyang 已提交
151
    VLOG(10) << "Merging this block " << block << " with its left buddy "
152
             << left_buddy;
153

154 155 156
    // auto left_buddy = block->left_buddy(cache_);
    auto* lb_desc = cache_.LoadDesc(left_buddy);
    if (lb_desc->get_type() == MemoryBlock::FREE_CHUNK) {
157
      // Take away right buddy from pool
158 159
      pool_.erase(IndexSizeAddress(lb_desc->get_index(),
                                   lb_desc->get_total_size(), left_buddy));
160 161

      // merge the block to its left buddy
162
      left_buddy->Merge(&cache_, block);
163
      block = left_buddy;
164
      desc = lb_desc;
165
    }
L
liaogang 已提交
166 167 168
  }

  // Dumping this block into pool
M
minqiyang 已提交
169
  VLOG(10) << "Inserting free block (" << block << ", "
170
           << desc->get_total_size() << ")";
L
liaogang 已提交
171
  pool_.insert(
172
      IndexSizeAddress(desc->get_index(), desc->get_total_size(), block));
L
liaogang 已提交
173 174
}

W
Wilber 已提交
175
uint64_t BuddyAllocator::Release() {
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
  std::lock_guard<std::mutex> lock(mutex_);
  int num = 0;
  uint64_t bytes = 0;
  bool del_flag = false;
  for (auto iter = pool_.begin(); iter != pool_.end();) {
    auto remain_size = std::get<1>(*iter);
    auto remain_ptr = std::get<2>(*iter);
    for (auto& chunk : chunks_) {
      auto init_size = std::get<1>(chunk);
      auto init_ptr = std::get<2>(chunk);

      if (init_size == remain_size && init_ptr == remain_ptr) {
        ++num;
        bytes += init_size;
        total_free_ -= init_size;
        auto block = static_cast<MemoryBlock*>(std::get<2>(chunk));
        system_allocator_->Free(init_ptr, init_size, std::get<0>(chunk));
        cache_.Invalidate(block);
        del_flag = true;
        break;
      }
    }

    if (del_flag) {
      iter = pool_.erase(iter);
    } else {
      iter++;
    }
  }
  VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes.";
W
Wilber 已提交
206
  return bytes;
207 208
}

L
liaogang 已提交
209
size_t BuddyAllocator::Used() { return total_used_; }
D
Dun Liang 已提交
210 211
size_t BuddyAllocator::GetMinChunkSize() { return min_chunk_size_; }
size_t BuddyAllocator::GetMaxChunkSize() { return max_chunk_size_; }
L
liaogang 已提交
212

L
liaogang 已提交
213 214
void* BuddyAllocator::SystemAlloc(size_t size) {
  size_t index = 0;
Y
Update  
Yi Wang 已提交
215
  void* p = system_allocator_->Alloc(&index, size);
L
liaogang 已提交
216

M
minqiyang 已提交
217
  VLOG(10) << "Allocated " << p << " from system allocator.";
L
liaogang 已提交
218 219 220

  if (p == nullptr) return nullptr;

221
  static_cast<MemoryBlock*>(p)->Init(&cache_, MemoryBlock::HUGE_CHUNK, index,
L
liaogang 已提交
222 223
                                     size, nullptr, nullptr);

224
  return static_cast<MemoryBlock*>(p)->Data();
L
liaogang 已提交
225 226
}

227 228
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
    size_t request_bytes) {
Z
zhhsplendid 已提交
229 230 231
  size_t allocate_bytes = max_chunk_size_;
  size_t index = 0;

232
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
L
liaogang 已提交
233 234
  if (system_allocator_->UseGpu()) {
    if ((total_used_ + total_free_) == 0) {
Z
zhhsplendid 已提交
235
      // Compute the allocation size for gpu for the first allocation.
236
      allocate_bytes = std::max(platform::GpuInitAllocSize(), request_bytes);
Z
zhhsplendid 已提交
237
    } else {
238 239 240
      // Compute the re-allocation size, we store the re-allocation size when
      // user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
      if (realloc_size_ == 0 || FLAGS_reallocate_gpu_memory_in_mb == 0ul) {
Z
zhhsplendid 已提交
241 242
        realloc_size_ = platform::GpuReallocSize();
      }
243
      allocate_bytes = std::max(realloc_size_, request_bytes);
L
liaogang 已提交
244 245
    }
  }
L
Luo Tao 已提交
246
#endif
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
#ifdef PADDLE_WITH_ASCEND_CL
  if (system_allocator_->UseGpu()) {
    if ((total_used_ + total_free_) == 0) {
      // Compute the allocation size for gpu for the first allocation.
      allocate_bytes = std::max(platform::NPUInitAllocSize(), request_bytes);
    } else {
      // Compute the re-allocation size, we store the re-allocation size when
      // user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
      if (realloc_size_ == 0 || FLAGS_reallocate_gpu_memory_in_mb == 0ul) {
        realloc_size_ = platform::NPUReallocSize();
      }
      allocate_bytes = std::max(realloc_size_, request_bytes);
    }
  }
#endif
L
liaogang 已提交
262

Z
zhhsplendid 已提交
263 264
  // Allocate a new block
  void* p = system_allocator_->Alloc(&index, allocate_bytes);
L
liaogang 已提交
265 266 267

  if (p == nullptr) return pool_.end();

M
minqiyang 已提交
268 269
  VLOG(10) << "Creating and inserting new block " << p
           << " from system allocator";
L
liaogang 已提交
270

271
  static_cast<MemoryBlock*>(p)->Init(&cache_, MemoryBlock::FREE_CHUNK, index,
Z
zhhsplendid 已提交
272
                                     allocate_bytes, nullptr, nullptr);
L
liaogang 已提交
273

Z
zhhsplendid 已提交
274
  total_free_ += allocate_bytes;
L
liaogang 已提交
275

276 277 278
  // record the chunk.
  chunks_.insert(IndexSizeAddress(index, allocate_bytes, p));

L
liaogang 已提交
279
  // dump the block into pool
Z
zhhsplendid 已提交
280
  return pool_.insert(IndexSizeAddress(index, allocate_bytes, p)).first;
L
liaogang 已提交
281 282 283 284 285 286
}

BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) {
  size_t index = 0;

  while (1) {
L
liaogang 已提交
287
    auto it = pool_.lower_bound(IndexSizeAddress(index, size, nullptr));
288 289

    // no match chunk memory
L
liaogang 已提交
290 291 292
    if (it == pool_.end()) return it;

    if (std::get<0>(*it) > index) {
293
      // find suitable one
L
liaogang 已提交
294 295 296
      if (std::get<1>(*it) >= size) {
        return it;
      }
297
      // update and continue
L
liaogang 已提交
298 299 300 301 302 303 304 305 306 307
      index = std::get<0>(*it);
      continue;
    }
    return it;
  }
}

void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
                                   size_t size) {
  auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
308
  auto desc = cache_.LoadDesc(block);
L
liaogang 已提交
309 310
  pool_.erase(it);

311
  VLOG(10) << "Split block (" << block << ", " << desc->get_total_size()
M
minqiyang 已提交
312
           << ") into";
313
  block->Split(&cache_, size);
L
liaogang 已提交
314

315 316
  VLOG(10) << "Left block (" << block << ", " << desc->get_total_size() << ")";
  desc->set_type(MemoryBlock::ARENA_CHUNK);
L
liaogang 已提交
317 318

  // the rest of memory if exist
319 320 321 322 323 324 325 326 327
  MemoryBlock* right_buddy = block->GetRightBuddy(&cache_);
  if (right_buddy) {
    auto* rb_desc = cache_.LoadDesc(right_buddy);
    if (rb_desc->get_type() == MemoryBlock::FREE_CHUNK) {
      VLOG(10) << "Insert right block (" << right_buddy << ", "
               << rb_desc->get_total_size() << ")";

      pool_.insert(IndexSizeAddress(rb_desc->get_index(),
                                    rb_desc->get_total_size(), right_buddy));
L
liaogang 已提交
328 329 330 331
    }
  }

  return block;
332 333 334 335 336
}

}  // namespace detail
}  // namespace memory
}  // namespace paddle