Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
8fe1cb72
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8fe1cb72
编写于
12月 22, 2021
作者:
Y
Yang
提交者:
GitHub
12月 22, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize buddy_allocator (#38312)
上级
64e2f670
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
91 addition
and
70 deletion
+91
-70
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+49
-67
paddle/fluid/memory/detail/buddy_allocator.h
paddle/fluid/memory/detail/buddy_allocator.h
+13
-2
paddle/fluid/memory/detail/buddy_allocator_test.cc
paddle/fluid/memory/detail/buddy_allocator_test.cc
+29
-1
未找到文件。
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
8fe1cb72
...
...
@@ -13,18 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include <algorithm>
#include "gflags/gflags.h"
#include "glog/logging.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_MLU)
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
defined(PADDLE_WITH_MLU) || defined(PADDLE_WITH_ASCEND_CL)
#define USE_DEVICE
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
#endif
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif
...
...
@@ -180,33 +180,24 @@ uint64_t BuddyAllocator::Release() {
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
int
num
=
0
;
uint64_t
bytes
=
0
;
bool
del_flag
=
false
;
for
(
auto
iter
=
pool_
.
begin
();
iter
!=
pool_
.
end
();)
{
auto
remain_size
=
std
::
get
<
1
>
(
*
iter
);
auto
remain_ptr
=
std
::
get
<
2
>
(
*
iter
);
for
(
auto
&
chunk
:
chunks_
)
{
auto
init_size
=
std
::
get
<
1
>
(
chunk
);
auto
init_ptr
=
std
::
get
<
2
>
(
chunk
);
if
(
init_size
==
remain_size
&&
init_ptr
==
remain_ptr
)
{
++
num
;
bytes
+=
init_size
;
total_free_
-=
init_size
;
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
chunk
));
system_allocator_
->
Free
(
init_ptr
,
init_size
,
std
::
get
<
0
>
(
chunk
));
cache_
.
Invalidate
(
block
);
del_flag
=
true
;
break
;
}
}
if
(
del_flag
)
{
auto
found
=
chunks_
.
find
({
remain_size
,
remain_ptr
});
if
(
found
!=
chunks_
.
end
())
{
size_t
index
=
found
->
second
;
++
num
;
bytes
+=
remain_size
;
total_free_
-=
remain_size
;
auto
block
=
static_cast
<
MemoryBlock
*>
(
remain_ptr
);
system_allocator_
->
Free
(
remain_ptr
,
remain_size
,
index
);
cache_
.
Invalidate
(
block
);
iter
=
pool_
.
erase
(
iter
);
}
else
{
iter
++
;
}
}
VLOG
(
10
)
<<
"Release "
<<
num
<<
" chunk, Free "
<<
bytes
<<
" bytes."
;
VLOG
(
10
)
<<
"Release "
<<
num
<<
" chunk
s
, Free "
<<
bytes
<<
" bytes."
;
return
bytes
;
}
...
...
@@ -234,49 +225,15 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
size_t
index
=
0
;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the allocation size for gpu for the first allocation.
allocate_bytes
=
std
::
max
(
platform
::
GpuInitAllocSize
(),
request_bytes
);
}
else
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
platform
::
GpuReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
}
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the allocation size for gpu for the first allocation.
allocate_bytes
=
std
::
max
(
platform
::
NPUInitAllocSize
(),
request_bytes
);
}
else
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
platform
::
NPUReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
}
}
#endif
#ifdef PADDLE_WITH_MLU
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the allocation size for mlu for the first allocation.
allocate_bytes
=
std
::
max
(
platform
::
MLUInitAllocSize
(),
request_bytes
);
}
else
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
platform
::
MLUReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
}
}
allocate_bytes
=
DeviceAllocateSize
(
&
platform
::
GpuInitAllocSize
,
&
platform
::
GpuReallocSize
,
request_bytes
);
#elif defined(PADDLE_WITH_ASCEND_CL)
allocate_bytes
=
DeviceAllocateSize
(
&
platform
::
NPUInitAllocSize
,
&
platform
::
NPUReallocSize
,
request_bytes
);
#elif defined(PADDLE_WITH_MLU)
allocate_bytes
=
DeviceAllocateSize
(
&
platform
::
MLUInitAllocSize
(),
&
platform
::
MLUReallocSize
(),
request_bytes
);
#endif
// Allocate a new block
...
...
@@ -293,7 +250,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
total_free_
+=
allocate_bytes
;
// record the chunk.
chunks_
.
insert
(
IndexSizeAddress
(
index
,
allocate_bytes
,
p
)
);
chunks_
.
insert
(
{{
allocate_bytes
,
p
},
index
}
);
// dump the block into pool
return
pool_
.
insert
(
IndexSizeAddress
(
index
,
allocate_bytes
,
p
)).
first
;
...
...
@@ -350,6 +307,31 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
return
block
;
}
size_t
BuddyAllocator
::
DeviceAllocateSize
(
std
::
function
<
size_t
()
>
init_allocate_size_func
,
std
::
function
<
size_t
()
>
re_allocate_size_func
,
size_t
request_bytes
)
{
size_t
allocate_bytes
=
max_chunk_size_
;
#if defined(USE_DEVICE)
const
bool
use_gpu
=
system_allocator_
->
UseGpu
();
VLOG
(
10
)
<<
"use_gpu "
<<
use_gpu
<<
", total_used "
<<
total_used_
<<
", total_free "
<<
total_free_
;
if
(
use_gpu
)
{
if
(
total_used_
==
0
&&
total_free_
==
0
)
{
// Compute the allocation size for gpu for the first allocation.
allocate_bytes
=
std
::
max
(
init_allocate_size_func
(),
request_bytes
);
}
else
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
re_allocate_size_func
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
}
}
#endif
return
allocate_bytes
;
}
}
// namespace detail
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/detail/buddy_allocator.h
浏览文件 @
8fe1cb72
...
...
@@ -15,11 +15,14 @@ limitations under the License. */
#pragma once
#include <stdint.h>
#include <functional>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <set>
#include <tuple>
#include <u
nordered_map
>
#include <u
tility
>
#include <vector>
#include "paddle/fluid/memory/detail/memory_block.h"
...
...
@@ -59,6 +62,9 @@ class BuddyAllocator {
using
IndexSizeAddress
=
std
::
tuple
<
size_t
,
size_t
,
void
*>
;
// Each element in PoolSet is a free allocation
using
PoolSet
=
std
::
set
<
IndexSizeAddress
>
;
// Each element in PoolMap is an allocation record
// key: <size, ptr>, value: index
using
PoolMap
=
std
::
map
<
std
::
pair
<
size_t
,
void
*>
,
size_t
>
;
/*! \brief Allocate fixed-size memory from system */
void
*
SystemAlloc
(
size_t
size
);
...
...
@@ -80,6 +86,11 @@ class BuddyAllocator {
/*! \brief Find the existing chunk which used to allocation */
PoolSet
::
iterator
FindExistChunk
(
size_t
size
);
/*! \brief Allocate bytes from the device */
size_t
DeviceAllocateSize
(
std
::
function
<
size_t
()
>
init_allocate_size_func
,
std
::
function
<
size_t
()
>
re_allocate_size_func
,
size_t
request_bytes
);
private:
size_t
total_used_
=
0
;
// the total size of used memory
size_t
total_free_
=
0
;
// the total size of free memory
...
...
@@ -102,7 +113,7 @@ class BuddyAllocator {
/**
* \brief Record the allocated chunks when Refill pool.
*/
Pool
Set
chunks_
;
Pool
Map
chunks_
;
private:
/*! Unify the metadata format between GPU and CPU allocations */
...
...
paddle/fluid/memory/detail/buddy_allocator_test.cc
浏览文件 @
8fe1cb72
...
...
@@ -189,6 +189,35 @@ TEST(BuddyAllocator, FractionRefillPool) {
buddy_allocator
.
Free
(
p1
);
}
TEST
(
BuddyAllocator
,
DeviceRefillPool
)
{
const
size_t
malloc_size
=
10
;
const
size_t
malloc_bytes
=
malloc_size
<<
20
;
FLAGS_initial_gpu_memory_in_mb
=
malloc_size
;
FLAGS_reallocate_gpu_memory_in_mb
=
malloc_size
;
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
malloc_bytes
);
size_t
max_chunk_size
=
platform
::
GpuMaxChunkSize
();
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
max_chunk_size
);
int
*
p0
=
TestBuddyAllocator
(
&
buddy_allocator
,
malloc_bytes
-
1000
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
int
*
p1
=
TestBuddyAllocator
(
&
buddy_allocator
,
malloc_bytes
-
1000
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
buddy_allocator
.
Free
(
p0
);
buddy_allocator
.
Free
(
p1
);
}
TEST
(
BuddyAllocator
,
AllocFromAvailable
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.7
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
...
...
@@ -350,7 +379,6 @@ TEST(BuddyAllocator, Release) {
#ifdef PADDLE_WITH_ASCEND_CL
TEST
(
BuddyAllocator
,
NpuFraction
)
{
// In a 16 GB machine, the pool size will be about 160 MB
FLAGS_fraction_of_gpu_memory_to_use
=
0.005
;
FLAGS_fraction_of_gpu_memory_to_use
=
0.92
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录