未验证 提交 668bfb35 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] add 32 extra bytes for npu memory slot (#35347)

上级 e913796c
......@@ -225,6 +225,7 @@ size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) {
// For Ascend NPU
#ifdef PADDLE_WITH_ASCEND_CL
constexpr int EXTRA_PADDING_SIZE = 32;
class NPUBuddyAllocatorList {
private:
NPUBuddyAllocatorList() : devices_(platform::GetSelectedNPUDevices()) {
......@@ -257,10 +258,11 @@ class NPUBuddyAllocatorList {
std::call_once(*init_flags_[pos], [this, pos] {
platform::SetNPUDeviceId(devices_[pos]);
allocators_[pos].reset(new BuddyAllocator(
std::unique_ptr<detail::SystemAllocator>(
new detail::NPUAllocator(devices_[pos])),
platform::NPUMinChunkSize(), platform::NPUMaxChunkSize()));
allocators_[pos].reset(
new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
new detail::NPUAllocator(devices_[pos])),
platform::NPUMinChunkSize(),
platform::NPUMaxChunkSize(), EXTRA_PADDING_SIZE));
VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable "
<< "'FLAGS_fraction_of_gpu_memory_to_use' "
......
......@@ -31,9 +31,10 @@ namespace detail {
BuddyAllocator::BuddyAllocator(
std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size,
size_t max_chunk_size)
size_t max_chunk_size, size_t extra_padding_size)
: min_chunk_size_(min_chunk_size),
max_chunk_size_(max_chunk_size),
extra_padding_size_(extra_padding_size),
cache_(system_allocator->UseGpu()),
system_allocator_(std::move(system_allocator)) {}
......@@ -59,9 +60,14 @@ inline size_t align(size_t size, size_t alignment) {
void* BuddyAllocator::Alloc(size_t unaligned_size) {
// adjust allocation alignment
size_t size =
align(unaligned_size + sizeof(MemoryBlock::Desc), min_chunk_size_);
size_t size =
align(unaligned_size + sizeof(MemoryBlock::Desc) + extra_padding_size_,
min_chunk_size_);
VLOG(10) << "alloc: " << unaligned_size
<< ", padding for desc: " << sizeof(MemoryBlock::Desc)
<< ", extra padding: " << extra_padding_size_
<< ", alignment: " << min_chunk_size_;
// acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
......
......@@ -35,7 +35,8 @@ namespace detail {
class BuddyAllocator {
public:
BuddyAllocator(std::unique_ptr<SystemAllocator> system_allocator,
size_t min_chunk_size, size_t max_chunk_size);
size_t min_chunk_size, size_t max_chunk_size,
size_t extra_padding_size = 0);
~BuddyAllocator();
......@@ -86,7 +87,9 @@ class BuddyAllocator {
size_t min_chunk_size_; // the minimum size of each chunk
size_t max_chunk_size_; // the maximum size of each chunk
size_t realloc_size_ = 0; // the size of re-allocated chunk
size_t realloc_size_ = 0; // the size of re-allocated chunk
size_t extra_padding_size_ = 0; // the size of padding to the size of memory
// to alloc, especially used in NPU
private:
/**
......
......@@ -37,6 +37,9 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) {
#endif
}
}
if (is_npu_place(place)) {
size += 32; // required by ascendcl
}
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册