Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
737f21b8
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
737f21b8
编写于
8月 21, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
8月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick refine default gpu memory, test=release/1.5 (#19281)
上级
71168dad
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
181 addition
and
76 deletion
+181
-76
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+7
-2
paddle/fluid/memory/detail/buddy_allocator_test.cc
paddle/fluid/memory/detail/buddy_allocator_test.cc
+134
-25
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+36
-49
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+4
-0
未找到文件。
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
737f21b8
...
...
@@ -23,6 +23,10 @@ DEFINE_bool(free_idle_memory, false,
"If it is true, Paddle will try to free idle memory trunks during "
"running time."
);
#ifdef PADDLE_WITH_CUDA
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
#endif
namespace
paddle
{
namespace
memory
{
namespace
detail
{
...
...
@@ -200,8 +204,9 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
// Compute the allocation size for gpu for the first allocation.
allocate_bytes
=
std
::
max
(
platform
::
GpuInitAllocSize
(),
request_bytes
);
}
else
{
// Reallocation size
if
(
realloc_size_
==
0
)
{
// Compute the re-allocation size, we store the re-allocation size when
// user set FLAGS_reallocate_gpu_memory_in_mb to fix value.
if
(
realloc_size_
==
0
||
FLAGS_reallocate_gpu_memory_in_mb
==
0ul
)
{
realloc_size_
=
platform
::
GpuReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
...
...
paddle/fluid/memory/detail/buddy_allocator_test.cc
浏览文件 @
737f21b8
...
...
@@ -22,6 +22,8 @@ limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
...
...
@@ -31,9 +33,11 @@ namespace paddle {
namespace
memory
{
namespace
detail
{
constexpr
static
int
test_gpu_id
=
0
;
constexpr
static
int
TEST_GPU_ID
=
0
;
void
TestBuddyAllocator
(
BuddyAllocator
*
allocator
,
size_t
size_bytes
)
{
int
*
TestBuddyAllocator
(
BuddyAllocator
*
allocator
,
size_t
size_bytes
,
bool
use_system_allocator
=
false
,
bool
free_ptr
=
true
)
{
bool
freed
=
false
;
size_t
used_bytes
=
allocator
->
Used
();
...
...
@@ -41,19 +45,25 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
void
*
p
=
allocator
->
Alloc
(
size_bytes
);
EXPECT_NE
(
p
,
nullptr
);
#ifdef PADDLE_WITH_CUDA
if
(
size_bytes
<
platform
::
Gpu
MaxChunkSize
())
{
if
(
size_bytes
<
allocator
->
Get
MaxChunkSize
())
{
#else
if
(
size_bytes
<
platform
::
Cpu
MaxChunkSize
())
{
if
(
size_bytes
<
allocator
->
Get
MaxChunkSize
())
{
#endif
// Not allocate from SystemAllocator
EXPECT_FALSE
(
use_system_allocator
);
EXPECT_GE
(
allocator
->
Used
(),
used_bytes
+
size_bytes
);
}
else
{
// Allocate from SystemAllocator doesn't count in Used()
EXPECT_TRUE
(
use_system_allocator
);
EXPECT_EQ
(
allocator
->
Used
(),
used_bytes
);
}
int
*
intp
=
static_cast
<
int
*>
(
p
);
if
(
!
free_ptr
)
{
return
intp
;
}
std
::
shared_ptr
<
int
>
ptr
(
intp
,
[
&
](
void
*
p
)
{
allocator
->
Free
(
intp
);
freed
=
true
;
...
...
@@ -64,20 +74,30 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
EXPECT_EQ
(
used_bytes
,
allocator
->
Used
());
EXPECT_TRUE
(
freed
);
return
nullptr
;
}
#ifdef PADDLE_WITH_CUDA
TEST
(
BuddyAllocator
,
GpuFraction
)
{
// In a 16 GB machine, the pool size will be about 160 MB
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less than pool size
TestBuddyAllocator
(
&
buddy_allocator
,
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
// Greater than max chunk size
TestBuddyAllocator
(
&
buddy_allocator
,
499
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
InitRealloc
)
{
...
...
@@ -87,19 +107,19 @@ TEST(BuddyAllocator, InitRealloc) {
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
100
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Less then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
40
<<
20
);
// Greater then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
)
);
TestBuddyAllocator
(
&
buddy_allocator
,
99
<<
20
);
// Greater than max chunk size
TestBuddyAllocator
(
&
buddy_allocator
,
101
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
ReallocSizeGreaterThanInit
)
{
...
...
@@ -109,23 +129,112 @@ TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
10
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less th
e
n initial size and reallocate size
// Less th
a
n initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
1
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then initial size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
// MB)
TestBuddyAllocator
(
&
buddy_allocator
,
7
<<
20
);
// Less then reallocate size and exceed pool
// Between initial size and reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
6
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
8
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
TestBuddyAllocator
(
&
buddy_allocator
,
9
<<
20
);
// Greater than max trunk size
TestBuddyAllocator
(
&
buddy_allocator
,
11
<<
20
,
/* use_system_allocator = */
true
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
),
/* use_system_allocator = */
true
);
}
TEST
(
BuddyAllocator
,
FractionRefillPool
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.6
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
size_t
max_chunk_size
=
platform
::
GpuMaxChunkSize
();
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
max_chunk_size
);
// Less than pool size
int
*
p0
=
TestBuddyAllocator
(
&
buddy_allocator
,
max_chunk_size
-
1000
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
size_t
alloc
=
platform
::
GpuAvailableMemToAlloc
()
*
FLAGS_fraction_of_gpu_memory_to_use
;
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
// be able to alloc 60% of the remaining GPU
int
*
p1
=
TestBuddyAllocator
(
&
buddy_allocator
,
alloc
,
/* use_system_allocator = */
false
,
/* free_ptr = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
alloc
=
platform
::
GpuAvailableMemToAlloc
()
*
FLAGS_fraction_of_gpu_memory_to_use
;
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
// be able to alloc 60% of the remaining GPU
TestBuddyAllocator
(
&
buddy_allocator
,
alloc
,
/* use_system_allocator = */
false
);
// Max chunk size should be same during allocation
EXPECT_EQ
(
max_chunk_size
,
buddy_allocator
.
GetMaxChunkSize
());
buddy_allocator
.
Free
(
p0
);
buddy_allocator
.
Free
(
p1
);
}
TEST
(
BuddyAllocator
,
AllocFromAvailable
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.7
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
size_t
total
=
0
,
available
=
0
;
platform
::
SetDeviceId
(
TEST_GPU_ID
);
platform
::
GpuMemoryUsage
(
&
available
,
&
total
);
// Take half of available GPU
void
*
p
;
cudaError_t
result
=
cudaMalloc
(
&
p
,
available
>>
1
);
EXPECT_TRUE
(
result
==
cudaSuccess
);
// BuddyAllocator should be able to alloc the remaining GPU
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
TestBuddyAllocator
(
&
buddy_allocator
,
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
1
<<
30
));
if
(
p
)
{
EXPECT_TRUE
(
cudaFree
(
p
)
==
cudaSuccess
);
}
}
TEST
(
BuddyAllocator
,
AllocFromAvailableWhenFractionIsOne
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
1.0
;
FLAGS_initial_gpu_memory_in_mb
=
0
;
FLAGS_reallocate_gpu_memory_in_mb
=
0
;
void
*
p
=
nullptr
;
EXPECT_TRUE
(
cudaMalloc
(
&
p
,
static_cast
<
size_t
>
(
4
)
<<
30
)
==
cudaSuccess
);
// BuddyAllocator should be able to alloc the remaining GPU
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
TEST_GPU_ID
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
1
)
<<
30
);
TestBuddyAllocator
(
&
buddy_allocator
,
static_cast
<
size_t
>
(
5
)
<<
30
);
if
(
p
)
{
EXPECT_TRUE
(
cudaFree
(
p
)
==
cudaSuccess
);
}
}
#endif
}
// namespace detail
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
737f21b8
...
...
@@ -215,70 +215,57 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"
);
}
size_t
GpuMaxAllocSize
()
{
return
std
::
max
(
GpuInitAllocSize
(),
GpuReallocSize
());
}
size_t
GpuInitAllocSize
()
{
if
(
FLAGS_initial_gpu_memory_in_mb
>
0ul
)
{
// Initial memory will be allocated by FLAGS_initial_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_initial_gpu_memory_in_mb
<<
20
);
}
// FLAGS_initial_gpu_memory_in_mb is 0, initial memory will be allocated by
// fraction
size_t
GpuAvailableMemToAlloc
()
{
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
FLAGS_fraction_of_gpu_memory_to_use
);
}
size_t
GpuReallocSize
()
{
if
(
FLAGS_reallocate_gpu_memory_in_mb
>
0ul
)
{
// Additional memory will be allocated by FLAGS_reallocate_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_reallocate_gpu_memory_in_mb
<<
20
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
available
);
// If available size is less than minimum chunk size, no usable memory exists
size_t
available_to_alloc
=
available
-
reserving
;
size_t
min_chunk_size
=
GpuMinChunkSize
();
if
(
available_to_alloc
<
min_chunk_size
)
{
available_to_alloc
=
0
;
}
VLOG
(
10
)
<<
"GPU usage "
<<
(
available
>>
20
)
<<
"M/"
<<
(
total
>>
20
)
<<
"M, "
<<
(
available_to_alloc
>>
20
)
<<
"M available to allocate"
;
return
available_to_alloc
;
}
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be allocated
// by fraction
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
size_t
GpuMaxAllocSize
()
{
return
std
::
max
(
GpuInitAllocSize
(),
GpuReallocSize
());
}
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
static
size_t
GpuAllocSize
(
bool
realloc
)
{
size_t
available_to_alloc
=
GpuAvailableMemToAlloc
();
PADDLE_ENFORCE_GT
(
available_to_alloc
,
0
,
"No enough available GPU memory"
);
// If FLAGS_initial_gpu_memory_in_mb is 0, then initial memory will be
// allocated by fraction
size_t
flag_mb
=
realloc
?
FLAGS_reallocate_gpu_memory_in_mb
:
FLAGS_initial_gpu_memory_in_mb
;
size_t
alloc_bytes
=
(
flag_mb
>
0ul
?
flag_mb
<<
20
:
available_to_alloc
*
FLAGS_fraction_of_gpu_memory_to_use
);
PADDLE_ENFORCE_GE
(
available_to_alloc
,
alloc_bytes
,
"No enough available GPU memory"
);
VLOG
(
10
)
<<
"Alloc size is "
<<
(
alloc_bytes
>>
20
)
<<
" MiB, is it Re-alloc: "
<<
realloc
;
return
alloc_bytes
;
}
size_t
GpuInitAllocSize
()
{
return
GpuAllocSize
(
/* realloc = */
false
);
}
size_t
GpuReallocSize
()
{
return
GpuAllocSize
(
/* realloc = */
true
);
}
size_t
GpuMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 256 bytes.
return
1
<<
8
;
}
size_t
GpuMaxChunkSize
()
{
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
// If available less than minimum chunk size, no usable memory exists.
available
=
std
::
min
(
std
::
max
(
available
,
GpuMinChunkSize
())
-
GpuMinChunkSize
(),
total
-
reserving
);
size_t
allocating
=
GpuMaxAllocSize
();
PADDLE_ENFORCE_LE
(
allocating
,
available
,
"Insufficient GPU memory to allocation."
);
return
allocating
;
size_t
max_chunk_size
=
GpuMaxAllocSize
();
VLOG
(
10
)
<<
"Max chunk size "
<<
(
max_chunk_size
>>
20
)
<<
"M"
;
return
max_chunk_size
;
}
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
737f21b8
...
...
@@ -57,6 +57,10 @@ void SetDeviceId(int device_id);
//! Get the memory usage of current GPU device.
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
);
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t
GpuAvailableMemToAlloc
();
//! Get the maximum allocation size of current GPU device.
size_t
GpuMaxAllocSize
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录