diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index 6c2fb82cb7cbe1f8600e177e1843ddc134f0c443..9cd35ad8ad9da959606d895063fe1981c5ade18f 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -225,6 +225,7 @@ size_t Used(const platform::XPUPlace &place) { // For Ascend NPU #ifdef PADDLE_WITH_ASCEND_CL +constexpr int EXTRA_PADDING_SIZE = 32; class NPUBuddyAllocatorList { private: NPUBuddyAllocatorList() : devices_(platform::GetSelectedNPUDevices()) { @@ -257,10 +258,11 @@ class NPUBuddyAllocatorList { std::call_once(*init_flags_[pos], [this, pos] { platform::SetNPUDeviceId(devices_[pos]); - allocators_[pos].reset(new BuddyAllocator( - std::unique_ptr( - new detail::NPUAllocator(devices_[pos])), - platform::NPUMinChunkSize(), platform::NPUMaxChunkSize())); + allocators_[pos].reset( + new BuddyAllocator(std::unique_ptr( + new detail::NPUAllocator(devices_[pos])), + platform::NPUMinChunkSize(), + platform::NPUMaxChunkSize(), EXTRA_PADDING_SIZE)); VLOG(10) << "\n\nNOTE:\n" << "You can set GFlags environment variable " << "'FLAGS_fraction_of_gpu_memory_to_use' " diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc index 55436f451a41ff2a77acddfaff3c5a7c290b7ac2..e714a020165d17a0ceb5c93ccf01ee99a147cd95 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.cc +++ b/paddle/fluid/memory/detail/buddy_allocator.cc @@ -31,9 +31,10 @@ namespace detail { BuddyAllocator::BuddyAllocator( std::unique_ptr system_allocator, size_t min_chunk_size, - size_t max_chunk_size) + size_t max_chunk_size, size_t extra_padding_size) : min_chunk_size_(min_chunk_size), max_chunk_size_(max_chunk_size), + extra_padding_size_(extra_padding_size), cache_(system_allocator->UseGpu()), system_allocator_(std::move(system_allocator)) {} @@ -59,9 +60,14 @@ inline size_t align(size_t size, size_t alignment) { void* BuddyAllocator::Alloc(size_t unaligned_size) { // adjust allocation alignment - size_t size = - align(unaligned_size + sizeof(MemoryBlock::Desc), min_chunk_size_); + size_t size = + align(unaligned_size + sizeof(MemoryBlock::Desc) + extra_padding_size_, + min_chunk_size_); + VLOG(10) << "alloc: " << unaligned_size + << ", padding for desc: " << sizeof(MemoryBlock::Desc) + << ", extra padding: " << extra_padding_size_ + << ", alignment: " << min_chunk_size_; // acquire the allocator lock std::lock_guard lock(mutex_); diff --git a/paddle/fluid/memory/detail/buddy_allocator.h b/paddle/fluid/memory/detail/buddy_allocator.h index 135c3b6d04f346d361530ad5586e8f11e023d05c..2ded5dccf6ee084b1214efb251c94f55680ed982 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.h +++ b/paddle/fluid/memory/detail/buddy_allocator.h @@ -35,7 +35,8 @@ namespace detail { class BuddyAllocator { public: BuddyAllocator(std::unique_ptr system_allocator, - size_t min_chunk_size, size_t max_chunk_size); + size_t min_chunk_size, size_t max_chunk_size, + size_t extra_padding_size = 0); ~BuddyAllocator(); @@ -86,7 +87,9 @@ class BuddyAllocator { size_t min_chunk_size_; // the minimum size of each chunk size_t max_chunk_size_; // the maximum size of each chunk - size_t realloc_size_ = 0; // the size of re-allocated chunk + size_t realloc_size_ = 0; // the size of re-allocated chunk + size_t extra_padding_size_ = 0; // the size of padding to the size of memory + // to alloc, especially used in NPU private: /** diff --git a/paddle/fluid/platform/device_memory_aligment.cc b/paddle/fluid/platform/device_memory_aligment.cc index 383dbd23ca0a59ab6c7289ae18d04ec11d429661..8261c866d073d69206fbdac09ec49567daef3e50 100644 --- a/paddle/fluid/platform/device_memory_aligment.cc +++ b/paddle/fluid/platform/device_memory_aligment.cc @@ -37,6 +37,9 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) { #endif } } + if (is_npu_place(place)) { + size += 32; // required by ascendcl + } size_t remaining = size % alignment; return remaining == 0 ? size : size + (alignment - remaining); }