[NPU] add 32 extra bytes for npu memory slot (#35347)

668bfb35 · Leo Chen · GitHub · e913796c · 668bfb35 · 668bfb35
4 changed file
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -225,6 +225,7 @@ size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) {
 // For Ascend NPU
 #ifdef PADDLE_WITH_ASCEND_CL
+constexpr int EXTRA_PADDING_SIZE = 32;
 class NPUBuddyAllocatorList {
 private:
  NPUBuddyAllocatorList() : devices_(platform::GetSelectedNPUDevices()) {
@@ -257,10 +258,11 @@ class NPUBuddyAllocatorList {
    std::call_once(*init_flags_[pos], [this, pos] {
      platform::SetNPUDeviceId(devices_[pos]);
-      allocators_[pos].reset(new BuddyAllocator(
+      allocators_[pos].reset(
-          std::unique_ptr<detail::SystemAllocator>(
+          new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
                                 new detail::NPUAllocator(devices_[pos])),
-          platform::NPUMinChunkSize(), platform::NPUMaxChunkSize()));
+                             platform::NPUMinChunkSize(),
+                             platform::NPUMaxChunkSize(), EXTRA_PADDING_SIZE));
      VLOG(10) << "\n\nNOTE:\n"
               << "You can set GFlags environment variable "
               << "'FLAGS_fraction_of_gpu_memory_to_use' "

--- a/paddle/fluid/memory/detail/buddy_allocator.cc
+++ b/paddle/fluid/memory/detail/buddy_allocator.cc
@@ -31,9 +31,10 @@ namespace detail {
 BuddyAllocator::BuddyAllocator(
    std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size,
-    size_t max_chunk_size)
+    size_t max_chunk_size, size_t extra_padding_size)
    : min_chunk_size_(min_chunk_size),
      max_chunk_size_(max_chunk_size),
+      extra_padding_size_(extra_padding_size),
      cache_(system_allocator->UseGpu()),
      system_allocator_(std::move(system_allocator)) {}
@@ -59,9 +60,14 @@ inline size_t align(size_t size, size_t alignment) {
 void* BuddyAllocator::Alloc(size_t unaligned_size) {
  // adjust allocation alignment
-  size_t size =
-      align(unaligned_size + sizeof(MemoryBlock::Desc), min_chunk_size_);
+  size_t size =
+      align(unaligned_size + sizeof(MemoryBlock::Desc) + extra_padding_size_,
+            min_chunk_size_);
+  VLOG(10) << "alloc: " << unaligned_size
+           << ", padding for desc: " << sizeof(MemoryBlock::Desc)
+           << ", extra padding: " << extra_padding_size_
+           << ", alignment: " << min_chunk_size_;
  // acquire the allocator lock
  std::lock_guard<std::mutex> lock(mutex_);

--- a/paddle/fluid/memory/detail/buddy_allocator.h
+++ b/paddle/fluid/memory/detail/buddy_allocator.h
@@ -35,7 +35,8 @@ namespace detail {
 class BuddyAllocator {
 public:
  BuddyAllocator(std::unique_ptr<SystemAllocator> system_allocator,
-                 size_t min_chunk_size, size_t max_chunk_size);
+                 size_t min_chunk_size, size_t max_chunk_size,
+                 size_t extra_padding_size = 0);
  ~BuddyAllocator();
@@ -87,6 +88,8 @@ class BuddyAllocator {
  size_t max_chunk_size_;  // the maximum size of each chunk
  size_t realloc_size_ = 0;        // the size of re-allocated chunk
+  size_t extra_padding_size_ = 0;  // the size of padding to the size of memory
+                                   // to alloc, especially used in NPU
 private:
  /**

--- a/paddle/fluid/platform/device_memory_aligment.cc
+++ b/paddle/fluid/platform/device_memory_aligment.cc
@@ -37,6 +37,9 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) {
 #endif
    }
  }
+  if (is_npu_place(place)) {
+    size += 32;  // required by ascendcl
+  }
  size_t remaining = size % alignment;
  return remaining == 0 ? size : size + (alignment - remaining);
 }