未验证 提交 668bfb35 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] add 32 extra bytes for npu memory slot (#35347)

上级 e913796c
...@@ -225,6 +225,7 @@ size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) { ...@@ -225,6 +225,7 @@ size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) {
// For Ascend NPU // For Ascend NPU
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
constexpr int EXTRA_PADDING_SIZE = 32;
class NPUBuddyAllocatorList { class NPUBuddyAllocatorList {
private: private:
NPUBuddyAllocatorList() : devices_(platform::GetSelectedNPUDevices()) { NPUBuddyAllocatorList() : devices_(platform::GetSelectedNPUDevices()) {
...@@ -257,10 +258,11 @@ class NPUBuddyAllocatorList { ...@@ -257,10 +258,11 @@ class NPUBuddyAllocatorList {
std::call_once(*init_flags_[pos], [this, pos] { std::call_once(*init_flags_[pos], [this, pos] {
platform::SetNPUDeviceId(devices_[pos]); platform::SetNPUDeviceId(devices_[pos]);
allocators_[pos].reset(new BuddyAllocator( allocators_[pos].reset(
std::unique_ptr<detail::SystemAllocator>( new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
new detail::NPUAllocator(devices_[pos])), new detail::NPUAllocator(devices_[pos])),
platform::NPUMinChunkSize(), platform::NPUMaxChunkSize())); platform::NPUMinChunkSize(),
platform::NPUMaxChunkSize(), EXTRA_PADDING_SIZE));
VLOG(10) << "\n\nNOTE:\n" VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable " << "You can set GFlags environment variable "
<< "'FLAGS_fraction_of_gpu_memory_to_use' " << "'FLAGS_fraction_of_gpu_memory_to_use' "
......
...@@ -31,9 +31,10 @@ namespace detail { ...@@ -31,9 +31,10 @@ namespace detail {
BuddyAllocator::BuddyAllocator( BuddyAllocator::BuddyAllocator(
std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size, std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size,
size_t max_chunk_size) size_t max_chunk_size, size_t extra_padding_size)
: min_chunk_size_(min_chunk_size), : min_chunk_size_(min_chunk_size),
max_chunk_size_(max_chunk_size), max_chunk_size_(max_chunk_size),
extra_padding_size_(extra_padding_size),
cache_(system_allocator->UseGpu()), cache_(system_allocator->UseGpu()),
system_allocator_(std::move(system_allocator)) {} system_allocator_(std::move(system_allocator)) {}
...@@ -59,9 +60,14 @@ inline size_t align(size_t size, size_t alignment) { ...@@ -59,9 +60,14 @@ inline size_t align(size_t size, size_t alignment) {
void* BuddyAllocator::Alloc(size_t unaligned_size) { void* BuddyAllocator::Alloc(size_t unaligned_size) {
// adjust allocation alignment // adjust allocation alignment
size_t size =
align(unaligned_size + sizeof(MemoryBlock::Desc), min_chunk_size_);
size_t size =
align(unaligned_size + sizeof(MemoryBlock::Desc) + extra_padding_size_,
min_chunk_size_);
VLOG(10) << "alloc: " << unaligned_size
<< ", padding for desc: " << sizeof(MemoryBlock::Desc)
<< ", extra padding: " << extra_padding_size_
<< ", alignment: " << min_chunk_size_;
// acquire the allocator lock // acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
......
...@@ -35,7 +35,8 @@ namespace detail { ...@@ -35,7 +35,8 @@ namespace detail {
class BuddyAllocator { class BuddyAllocator {
public: public:
BuddyAllocator(std::unique_ptr<SystemAllocator> system_allocator, BuddyAllocator(std::unique_ptr<SystemAllocator> system_allocator,
size_t min_chunk_size, size_t max_chunk_size); size_t min_chunk_size, size_t max_chunk_size,
size_t extra_padding_size = 0);
~BuddyAllocator(); ~BuddyAllocator();
...@@ -87,6 +88,8 @@ class BuddyAllocator { ...@@ -87,6 +88,8 @@ class BuddyAllocator {
size_t max_chunk_size_; // the maximum size of each chunk size_t max_chunk_size_; // the maximum size of each chunk
size_t realloc_size_ = 0; // the size of re-allocated chunk size_t realloc_size_ = 0; // the size of re-allocated chunk
size_t extra_padding_size_ = 0; // the size of padding to the size of memory
// to alloc, especially used in NPU
private: private:
/** /**
......
...@@ -37,6 +37,9 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) { ...@@ -37,6 +37,9 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) {
#endif #endif
} }
} }
if (is_npu_place(place)) {
size += 32; // required by ascendcl
}
size_t remaining = size % alignment; size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining); return remaining == 0 ? size : size + (alignment - remaining);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册