Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
737f21b8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
737f21b8
编写于
8月 21, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
8月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick refine default gpu memory, test=release/1.5 (#19281)
上级
71168dad
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
181 addition
and
76 deletion
+181
-76
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+7
-2
paddle/fluid/memory/detail/buddy_allocator_test.cc
paddle/fluid/memory/detail/buddy_allocator_test.cc
+134
-25
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+36
-49
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+4
-0
未找到文件。
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
737f21b8
...
...
@@ -23,6 +23,10 @@ DEFINE_bool(free_idle_memory, false,
"If it is true, Paddle will try to free idle memory trunks during "
"running time."
);
#ifdef PADDLE_WITH_CUDA
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
#endif
namespace
paddle
{
namespace
memory
{
namespace
detail
{
...
...
@@ -200,8 +204,9 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
// Compute the allocation size for gpu for the first allocation.
allocate_bytes
=
std
::
max
(
platform
::
GpuInitAllocSize
(),
request_bytes
);
}
else
{
// Reallocation size
if
(
realloc_size_
==
0
)
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
platform
::
GpuReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
...
...
paddle/fluid/memory/detail/buddy_allocator_test.cc
浏览文件 @
737f21b8
...
...
@@ -22,6 +22,8 @@ limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
...
...
@@ -31,9 +33,11 @@ namespace paddle {
namespace
memory
{
namespace
detail
{
constexpr
static
int
test_gpu_id
=
0
;
constexpr
static
int
TEST_GPU_ID
=
0
;
void
TestBuddyAllocator
(
BuddyAllocator
*
allocator
,
size_t
size_bytes
)
{
int
*
TestBuddyAllocator
(
BuddyAllocator
*
allocator
,
size_t
size_bytes
,
bool
use_system_allocator
=
false
,
bool
free_ptr
=
true
)
{
bool
freed
=
false
;
size_t
used_bytes
=
allocator
->
Used
();
...
...
@@ -41,19 +45,25 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
void
*
p
=
allocator
->
Alloc
(
size_bytes
);
EXPECT_NE
(
p
,
nullptr
);
#ifdef PADDLE_WITH_CUDA
if
(
size_bytes
<
platform
::
Gpu
MaxChunkSize
())
{
if
(
size_bytes
<
allocator
->
Get
MaxChunkSize
())
{
#else
if
(
size_bytes
<
platform
::
Cpu
MaxChunkSize
())
{
if
(
size_bytes
<
allocator
->
Get
MaxChunkSize
())
{
#endif
// Not allocate from SystemAllocator
EXPECT_FALSE
(
use_system_allocator
);
EXPECT_GE
(
allocator
->
Used
(),
used_bytes
+
size_bytes
);
}
else
{
// Allocate from SystemAllocator doesn't count in Used()
EXPECT_TRUE
(
use_system_allocator
);
EXPECT_EQ
(
allocator
->
Used
(),
used_bytes
);
}
int
*
intp
=
static_cast
<
int
*>
(
p
);
if
(
!
free_ptr
)
{
return
intp
;
}
std
::
shared_ptr
<
int
>
ptr
(
intp
,
[
&
](
void
*
p
)
{
allocator
->
Free
(
intp
);
freed
=
true
;
...
...
@@ -64,20 +74,30 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
EXPECT_EQ
(
used_bytes
,
allocator
->
Used
());
EXPECT_TRUE
(
freed
);
return
nullptr
;
}
#ifdef PADDLE_WITH_CUDA
TEST
(
BuddyAllocator
,
GpuFraction
)
{
// In a 16 GB machine, the pool size will be about 160 MB
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less than pool size
TestBuddyAllocator
(
&
buddy_allocator
,
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
// Greater than max chunk size
TestBuddyAllocator
(
&
buddy_allocator
,
499
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
InitRealloc
)
{
...
...
@@ -87,19 +107,19 @@ TEST(BuddyAllocator, InitRealloc) {
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
100
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Less then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
40
<<
20
);
// Greater then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
)
);
TestBuddyAllocator
(
&
buddy_allocator
,
99
<<
20
);
// Greater than max chunk size
TestBuddyAllocator
(
&
buddy_allocator
,
101
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
ReallocSizeGreaterThanInit
)
{
...
...
@@ -109,23 +129,112 @@ TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
10
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less th
e
n initial size and reallocate size
// Less th
a
n initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
1
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then initial size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
// MB)
TestBuddyAllocator
(
&
buddy_allocator
,
7
<<
20
);
// Less then reallocate size and exceed pool
// Between initial size and reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
6
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
8
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
TestBuddyAllocator
(
&
buddy_allocator
,
9
<<
20
);
// Greater than max trunk size
TestBuddyAllocator
(
&
buddy_allocator
,
11
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
FractionRefillPool
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.6
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
size_t
max_chunk_size
=
platform
::
GpuMaxChunkSize
();
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
max_chunk_size
);
// Less than pool size
int
*
p0
=
TestBuddyAllocator
(
&
buddy_allocator
,
max_chunk_size
-
1000
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
size_t
alloc
=
platform
::
GpuAvailableMemToAlloc
()
*
FLAGS_fraction_of_gpu_memory_to_use
;
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
// be able to alloc 60% of the remaining GPU
int
*
p1
=
TestBuddyAllocator
(
&
buddy_allocator
,
alloc
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
alloc
=
platform
::
GpuAvailableMemToAlloc
()
*
FLAGS_fraction_of_gpu_memory_to_use
;
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
// be able to alloc 60% of the remaining GPU
TestBuddyAllocator
(
&
buddy_allocator
,
alloc
,
/* use_system_allocator = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
buddy_allocator
.
Free
(
p0
);
buddy_allocator
.
Free
(
p1
);
}
TEST
(
BuddyAllocator
,
AllocFromAvailable
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.7
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
size_t
total
=
0
,
available
=
0
;
platform
::
SetDeviceId
(
TEST_GPU_ID
);
platform
::
GpuMemoryUsage
(
&
available
,
&
total
);
// Take half of available GPU
void
*
p
;
cudaError_t
result
=
cudaMalloc
(
&
p
,
available
>>
1
);
EXPECT_TRUE
(
result
==
cudaSuccess
);
// BuddyAllocator should be able to alloc the remaining GPU
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
TestBuddyAllocator
(
&
buddy_allocator
,
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
1
<<
30
));
if
(
p
)
{
EXPECT_TRUE
(
cudaFree
(
p
)
==
cudaSuccess
);
}
}
TEST
(
BuddyAllocator
,
AllocFromAvailableWhenFractionIsOne
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
1.0
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
void
*
p
=
nullptr
;
EXPECT_TRUE
(
cudaMalloc
(
&
p
,
static_cast
<
size_t
>
(
4
)
<<
30
)
==
cudaSuccess
);
// BuddyAllocator should be able to alloc the remaining GPU
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
1
)
<<
30
);
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
5
)
<<
30
);
if
(
p
)
{
EXPECT_TRUE
(
cudaFree
(
p
)
==
cudaSuccess
);
}
}
#endif
}
// namespace detail
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
737f21b8
...
...
@@ -215,45 +215,47 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"
);
}
size_t
GpuMaxAllocSize
()
{
return
std
::
max
(
GpuInitAllocSize
(),
GpuReallocSize
());
}
size_t
GpuInitAllocSize
()
{
if
(
FLAGS_initial_gpu_memory_in_mb
>
0ul
)
{
// Initial memory will be allocated by FLAGS_initial_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_initial_gpu_memory_in_mb
<<
20
);
}
// FLAGS_initial_gpu_memory_in_mb is 0, initial memory will be allocated by
// fraction
size_t
GpuAvailableMemToAlloc
()
{
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
FLAGS_fraction_of_gpu_memory_to_use
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
available
);
// If available size is less than minimum chunk size, no usable memory exists
size_t
available_to_alloc
=
available
-
reserving
;
size_t
min_chunk_size
=
GpuMinChunkSize
();
if
(
available_to_alloc
<
min_chunk_size
)
{
available_to_alloc
=
0
;
}
VLOG
(
10
)
<<
"GPU usage "
<<
(
available
>>
20
)
<<
"M/"
<<
(
total
>>
20
)
<<
"M, "
<<
(
available_to_alloc
>>
20
)
<<
"M available to allocate"
;
return
available_to_alloc
;
}
size_t
GpuReallocSize
()
{
if
(
FLAGS_reallocate_gpu_memory_in_mb
>
0ul
)
{
// Additional memory will be allocated by FLAGS_reallocate_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_reallocate_gpu_memory_in_mb
<<
20
);
}
size_t
GpuMaxAllocSize
()
{
return
std
::
max
(
GpuInitAllocSize
(),
GpuReallocSize
());
}
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be allocated
// by fraction
size_t
total
=
0
;
size_t
available
=
0
;
static
size_t
GpuAllocSize
(
bool
realloc
)
{
size_t
available_to_alloc
=
GpuAvailableMemToAlloc
();
PADDLE_ENFORCE_GT
(
available_to_alloc
,
0
,
"No enough available GPU memory"
);
// If FLAGS_initial_gpu_memory_in_mb is 0, then initial memory will be
// allocated by fraction
size_t
flag_mb
=
realloc
?
FLAGS_reallocate_gpu_memory_in_mb
:
FLAGS_initial_gpu_memory_in_mb
;
size_t
alloc_bytes
=
(
flag_mb
>
0ul
?
flag_mb
<<
20
:
available_to_alloc
*
FLAGS_fraction_of_gpu_memory_to_use
);
PADDLE_ENFORCE_GE
(
available_to_alloc
,
alloc_bytes
,
"No enough available GPU memory"
);
VLOG
(
10
)
<<
"Alloc size is "
<<
(
alloc_bytes
>>
20
)
<<
" MiB, is it Re-alloc: "
<<
realloc
;
return
alloc_bytes
;
}
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
size_t
GpuInitAllocSize
()
{
return
GpuAllocSize
(
/* realloc = */
false
);
}
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
FLAGS_fraction_of_gpu_memory_to_use
);
}
size_t
GpuReallocSize
()
{
return
GpuAllocSize
(
/* realloc = */
true
);
}
size_t
GpuMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 256 bytes.
...
...
@@ -261,24 +263,9 @@ size_t GpuMinChunkSize() {
}
size_t
GpuMaxChunkSize
()
{
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
// If available less than minimum chunk size, no usable memory exists.
available
=
std
::
min
(
std
::
max
(
available
,
GpuMinChunkSize
())
-
GpuMinChunkSize
(),
total
-
reserving
);
size_t
allocating
=
GpuMaxAllocSize
();
PADDLE_ENFORCE_LE
(
allocating
,
available
,
"Insufficient GPU memory to allocation."
);
return
allocating
;
size_t
max_chunk_size
=
GpuMaxAllocSize
();
VLOG
(
10
)
<<
"Max chunk size "
<<
(
max_chunk_size
>>
20
)
<<
"M"
;
return
max_chunk_size
;
}
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
737f21b8
...
...
@@ -57,6 +57,10 @@ void SetDeviceId(int device_id);
//! Get the memory usage of current GPU device.
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
);
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t
GpuAvailableMemToAlloc
();
//! Get the maximum allocation size of current GPU device.
size_t
GpuMaxAllocSize
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录