Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c64d9593
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c64d9593
编写于
3月 25, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
3月 25, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16295 from zhhsplendid/zhenghuihuang-dev-2
Add support for init_memory and re-allocate_memory
上级
b55dd32e
124f1df4
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
378 addition
and
45 deletion
+378
-45
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+3
-1
paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
...luid/memory/allocation/allocator_facade_abs_flags_test.cc
+100
-0
paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
...uid/memory/allocation/allocator_facade_frac_flags_test.cc
+14
-7
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+14
-6
paddle/fluid/memory/detail/CMakeLists.txt
paddle/fluid/memory/detail/CMakeLists.txt
+2
-0
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+30
-16
paddle/fluid/memory/detail/buddy_allocator.h
paddle/fluid/memory/detail/buddy_allocator.h
+3
-1
paddle/fluid/memory/detail/buddy_allocator_test.cc
paddle/fluid/memory/detail/buddy_allocator_test.cc
+133
-0
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+15
-5
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+54
-8
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+6
-0
paddle/testing/paddle_gtest_main.cc
paddle/testing/paddle_gtest_main.cc
+2
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
未找到文件。
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
c64d9593
...
@@ -61,4 +61,6 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
...
@@ -61,4 +61,6 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
0 → 100644
浏览文件 @
c64d9593
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
//! Run allocate test cases for different places
void
AllocateTestCases
()
{
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
size
);
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
size
);
}
#endif
}
TEST
(
Allocator
,
SpecifyGpuMemory
)
{
#ifdef PADDLE_WITH_CUDA
// Set to 0.0 to test FLAGS_initial_gpu_memory_in_mb and
// FLAGS_reallocate_gpu_memory_in_mb
FLAGS_fraction_of_gpu_memory_to_use
=
0.0
;
// 512 MB
FLAGS_initial_gpu_memory_in_mb
=
512
;
// 4 MB
FLAGS_reallocate_gpu_memory_in_mb
=
4
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
AllocateTestCases
();
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/allocator_facade_test.cc
→
paddle/fluid/memory/allocation/allocator_facade_
frac_flags_
test.cc
浏览文件 @
c64d9593
...
@@ -19,6 +19,8 @@
...
@@ -19,6 +19,8 @@
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_int64
(
gpu_allocator_retry_time
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
#endif
...
@@ -26,13 +28,8 @@ namespace paddle {
...
@@ -26,13 +28,8 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
TEST
(
allocator
,
allocator
)
{
//! Run allocate test cases for different places
#ifdef PADDLE_WITH_CUDA
void
AllocateTestCases
()
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
auto
&
instance
=
AllocatorFacade
::
Instance
();
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
platform
::
Place
place
;
size_t
size
=
1024
;
size_t
size
=
1024
;
...
@@ -82,6 +79,16 @@ TEST(allocator, allocator) {
...
@@ -82,6 +79,16 @@ TEST(allocator, allocator) {
#endif
#endif
}
}
TEST
(
Allocator
,
Allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
AllocateTestCases
();
}
}
// namespace allocation
}
// namespace allocation
}
// namespace memory
}
// namespace memory
}
// namespace paddle
}
// namespace paddle
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
c64d9593
...
@@ -37,6 +37,8 @@ DEFINE_bool(init_allocated_mem, false,
...
@@ -37,6 +37,8 @@ DEFINE_bool(init_allocated_mem, false,
"that initializing the allocated memory with a small value "
"that initializing the allocated memory with a small value "
"during unit testing."
);
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
namespace
paddle
{
namespace
paddle
{
...
@@ -153,12 +155,18 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
...
@@ -153,12 +155,18 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
platform
::
GpuMinChunkSize
(),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"You can set GFlags environment variable "
<<
"% of GPU memory.
\n
"
<<
"'FLAGS_fraction_of_gpu_memory_to_use' "
<<
"You can set GFlags environment variable '"
<<
"or 'FLAGS_initial_gpu_memory_in_mb' "
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"or 'FLAGS_reallocate_gpu_memory_in_mb' "
<<
"' to change the fraction of GPU usage.
\n\n
"
;
<<
"to change the memory size for GPU usage.
\n
"
<<
"Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
<<
FLAGS_fraction_of_gpu_memory_to_use
<<
". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
}
}
});
});
...
...
paddle/fluid/memory/detail/CMakeLists.txt
浏览文件 @
c64d9593
...
@@ -9,3 +9,5 @@ endif(${WITH_GPU})
...
@@ -9,3 +9,5 @@ endif(${WITH_GPU})
cc_test
(
system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator
)
cc_test
(
system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator
)
cc_library
(
buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog
)
cc_library
(
buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog
)
cc_test
(
buddy_allocator_test SRCS buddy_allocator_test.cc DEPS buddy_allocator
)
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
c64d9593
...
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include <algorithm>
#include <utility>
#include "glog/logging.h"
#include "glog/logging.h"
DEFINE_bool
(
free_idle_memory
,
false
,
DEFINE_bool
(
free_idle_memory
,
false
,
...
@@ -36,9 +40,10 @@ BuddyAllocator::~BuddyAllocator() {
...
@@ -36,9 +40,10 @@ BuddyAllocator::~BuddyAllocator() {
"have actually been freed"
;
"have actually been freed"
;
while
(
!
pool_
.
empty
())
{
while
(
!
pool_
.
empty
())
{
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool_
.
begin
()));
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool_
.
begin
()));
VLOG
(
10
)
<<
"Free from block ("
<<
block
<<
", "
<<
max_chunk_size_
<<
")"
;
VLOG
(
10
)
<<
"Free from block ("
<<
block
<<
", "
<<
block
->
size
(
cache_
)
<<
")"
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
block
->
size
(
cache_
)
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
pool_
.
erase
(
pool_
.
begin
());
pool_
.
erase
(
pool_
.
begin
());
}
}
...
@@ -71,7 +76,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
...
@@ -71,7 +76,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// refill the pool if failure
// refill the pool if failure
if
(
it
==
pool_
.
end
())
{
if
(
it
==
pool_
.
end
())
{
it
=
RefillPool
();
it
=
RefillPool
(
size
);
// if still failure, fail fatally
// if still failure, fail fatally
if
(
it
==
pool_
.
end
())
{
if
(
it
==
pool_
.
end
())
{
return
nullptr
;
return
nullptr
;
...
@@ -184,19 +189,28 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
...
@@ -184,19 +189,28 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
return
static_cast
<
MemoryBlock
*>
(
p
)
->
data
();
return
static_cast
<
MemoryBlock
*>
(
p
)
->
data
();
}
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
(
size_t
request_bytes
)
{
size_t
allocate_bytes
=
max_chunk_size_
;
size_t
index
=
0
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
system_allocator_
->
UseGpu
())
{
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
// Compute the allocation size for gpu for the first allocation.
max_chunk_size_
=
platform
::
GpuMaxChunkSize
();
allocate_bytes
=
std
::
max
(
platform
::
GpuInitAllocSize
(),
request_bytes
);
}
else
{
// Reallocation size
if
(
realloc_size_
==
0
)
{
realloc_size_
=
platform
::
GpuReallocSize
();
}
allocate_bytes
=
std
::
max
(
realloc_size_
,
request_bytes
);
}
}
}
}
#endif
#endif
// Allocate a new maximum sized block
// Allocate a new block
size_t
index
=
0
;
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
allocate_bytes
);
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
max_chunk_size_
);
if
(
p
==
nullptr
)
return
pool_
.
end
();
if
(
p
==
nullptr
)
return
pool_
.
end
();
...
@@ -204,7 +218,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
...
@@ -204,7 +218,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
<<
" from system allocator"
;
<<
" from system allocator"
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
max_chunk_size_
,
nullptr
,
nullptr
);
allocate_bytes
,
nullptr
,
nullptr
);
// gpu fallback allocation
// gpu fallback allocation
if
(
system_allocator_
->
UseGpu
()
&&
if
(
system_allocator_
->
UseGpu
()
&&
...
@@ -212,10 +226,10 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
...
@@ -212,10 +226,10 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
fallback_alloc_count_
++
;
fallback_alloc_count_
++
;
}
}
total_free_
+=
max_chunk_size_
;
total_free_
+=
allocate_bytes
;
// dump the block into pool
// dump the block into pool
return
pool_
.
insert
(
IndexSizeAddress
(
index
,
max_chunk_size_
,
p
)).
first
;
return
pool_
.
insert
(
IndexSizeAddress
(
index
,
allocate_bytes
,
p
)).
first
;
}
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
FindExistChunk
(
size_t
size
)
{
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
FindExistChunk
(
size_t
size
)
{
...
@@ -286,12 +300,12 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
...
@@ -286,12 +300,12 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to fallback allocator."
;
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to fallback allocator."
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
block
->
size
(
cache_
)
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
total_free_
-=
max_chunk_size_
;
total_free_
-=
block
->
size
(
cache_
)
;
fallback_alloc_count_
--
;
fallback_alloc_count_
--
;
// If no fall allocation exists, return directly
// If no fall allocation exists, return directly
...
@@ -322,12 +336,12 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
...
@@ -322,12 +336,12 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to base allocator."
;
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to base allocator."
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
block
->
size
(
cache_
)
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
total_free_
-=
max_chunk_size_
;
total_free_
-=
block
->
size
(
cache_
)
;
if
(
!
shall_free_alloc
())
return
;
if
(
!
shall_free_alloc
())
return
;
}
}
...
...
paddle/fluid/memory/detail/buddy_allocator.h
浏览文件 @
c64d9593
...
@@ -60,7 +60,7 @@ class BuddyAllocator {
...
@@ -60,7 +60,7 @@ class BuddyAllocator {
void
*
SystemAlloc
(
size_t
size
);
void
*
SystemAlloc
(
size_t
size
);
/*! \brief If existing chunks are not suitable, refill pool */
/*! \brief If existing chunks are not suitable, refill pool */
PoolSet
::
iterator
RefillPool
();
PoolSet
::
iterator
RefillPool
(
size_t
request_bytes
);
/**
/**
* \brief Find the suitable chunk from existing pool and split
* \brief Find the suitable chunk from existing pool and split
...
@@ -89,6 +89,8 @@ class BuddyAllocator {
...
@@ -89,6 +89,8 @@ class BuddyAllocator {
size_t
min_chunk_size_
;
// the minimum size of each chunk
size_t
min_chunk_size_
;
// the minimum size of each chunk
size_t
max_chunk_size_
;
// the maximum size of each chunk
size_t
max_chunk_size_
;
// the maximum size of each chunk
size_t
realloc_size_
=
0
;
// the size of re-allocated chunk
private:
private:
/**
/**
* \brief A list of free allocation
* \brief A list of free allocation
...
...
paddle/fluid/memory/detail/buddy_allocator_test.cc
0 → 100644
浏览文件 @
c64d9593
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include <memory>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
#endif
namespace
paddle
{
namespace
memory
{
namespace
detail
{
constexpr
static
int
test_gpu_id
=
0
;
void
TestBuddyAllocator
(
BuddyAllocator
*
allocator
,
size_t
size_bytes
)
{
bool
freed
=
false
;
size_t
used_bytes
=
allocator
->
Used
();
if
(
size_bytes
>
0
)
{
void
*
p
=
allocator
->
Alloc
(
size_bytes
);
EXPECT_NE
(
p
,
nullptr
);
#ifdef PADDLE_WITH_CUDA
if
(
size_bytes
<
platform
::
GpuMaxChunkSize
())
{
#else
if
(
size_bytes
<
platform
::
CpuMaxChunkSize
())
{
#endif
// Not allocate from SystemAllocator
EXPECT_GE
(
allocator
->
Used
(),
used_bytes
+
size_bytes
);
}
else
{
// Allocate from SystemAllocator doesn't count in Used()
EXPECT_EQ
(
allocator
->
Used
(),
used_bytes
);
}
int
*
intp
=
static_cast
<
int
*>
(
p
);
std
::
shared_ptr
<
int
>
ptr
(
intp
,
[
&
](
void
*
p
)
{
allocator
->
Free
(
intp
);
freed
=
true
;
});
}
else
{
freed
=
true
;
}
EXPECT_EQ
(
used_bytes
,
allocator
->
Used
());
EXPECT_TRUE
(
freed
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
BuddyAllocator
,
GpuFraction
)
{
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
TestBuddyAllocator
(
&
buddy_allocator
,
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
10
);
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
}
TEST
(
BuddyAllocator
,
InitRealloc
)
{
FLAGS_initial_gpu_memory_in_mb
=
100
;
FLAGS_reallocate_gpu_memory_in_mb
=
50
;
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
100
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
10
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Less then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
40
<<
20
);
// Greater then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
80
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
}
TEST
(
BuddyAllocator
,
ReallocSizeGreaterThanInit
)
{
FLAGS_initial_gpu_memory_in_mb
=
5
;
FLAGS_reallocate_gpu_memory_in_mb
=
10
;
EXPECT_EQ
(
platform
::
GpuMaxChunkSize
(),
static_cast
<
size_t
>
(
10
<<
20
));
BuddyAllocator
buddy_allocator
(
std
::
unique_ptr
<
SystemAllocator
>
(
new
GPUAllocator
(
test_gpu_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
// Less then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
1
<<
20
);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then initial size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
3
<<
20
);
// Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
// MB)
TestBuddyAllocator
(
&
buddy_allocator
,
7
<<
20
);
// Less then reallocate size and exceed pool
TestBuddyAllocator
(
&
buddy_allocator
,
8
<<
20
);
// Greater then initial size and reallocate size
TestBuddyAllocator
(
&
buddy_allocator
,
2
*
static_cast
<
size_t
>
(
1
<<
30
));
}
#endif
}
// namespace detail
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
c64d9593
...
@@ -32,6 +32,9 @@ limitations under the License. */
...
@@ -32,6 +32,9 @@ limitations under the License. */
DECLARE_bool
(
use_pinned_memory
);
DECLARE_bool
(
use_pinned_memory
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
namespace
detail
{
namespace
detail
{
...
@@ -119,11 +122,18 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
...
@@ -119,11 +122,18 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_
+=
size
;
gpu_alloc_size_
+=
size
;
return
p
;
return
p
;
}
else
{
}
else
{
LOG
(
WARNING
)
LOG
(
WARNING
)
<<
"Cannot malloc "
<<
size
/
1024.0
/
1024.0
<<
"Cannot malloc "
<<
size
/
1024.0
/
1024.0
<<
" MB GPU memory. Please shrink "
<<
" MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
"FLAGS_fraction_of_gpu_memory_to_use or "
"environment variable to a lower value. Current value is "
"FLAGS_initial_gpu_memory_in_mb or "
<<
FLAGS_fraction_of_gpu_memory_to_use
;
"FLAGS_reallocate_gpu_memory_in_mb"
"environment variable to a lower value. "
<<
"Current FLAGS_fraction_of_gpu_memory_to_use value is "
<<
FLAGS_fraction_of_gpu_memory_to_use
<<
". Current FLAGS_initial_gpu_memory_in_mb value is "
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current FLAGS_reallocate_gpu_memory_in_mb value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
;
return
nullptr
;
return
nullptr
;
}
}
}
}
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
c64d9593
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include <algorithm>
#include <algorithm>
#include <cstdlib>
#include <cstdlib>
#include <string>
#include <string>
...
@@ -31,6 +30,8 @@ constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
...
@@ -31,6 +30,8 @@ constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
#endif
constexpr
static
float
fraction_reserve_gpu_memory
=
0.05
f
;
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"Allocate a trunk of gpu memory that is this fraction of the "
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"total gpu memory size. Future memory usage will be allocated "
...
@@ -38,6 +39,24 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
...
@@ -38,6 +39,24 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
"additional trunks of the same size will be requested from gpu "
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk."
);
"until the gpu has no memory left for another trunk."
);
DEFINE_uint64
(
initial_gpu_memory_in_mb
,
0ul
,
"Allocate a trunk of gpu memory whose byte size is specified by "
"the flag. Future memory usage will be allocated from the "
"truck. If the trunk doesn't have enough gpu memory, additional "
"trunks of the gpu memory will be requested from gpu with size "
"specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has "
"no memory left for the additional trunk. Note: if you set this "
"flag, the memory size set by "
"FLAGS_fraction_of_gpu_memory_to_use will be overrided by this "
"flag. If you don't set this flag, PaddlePaddle will use "
"FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"
);
DEFINE_uint64
(
reallocate_gpu_memory_in_mb
,
0ul
,
"If this flag is set, Paddle will reallocate the gpu memory with "
"size specified by this flag. Else Paddle will reallocate by "
"FLAGS_fraction_of_gpu_memory_to_use"
);
DEFINE_bool
(
DEFINE_bool
(
enable_cublas_tensor_op_math
,
false
,
enable_cublas_tensor_op_math
,
false
,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
...
@@ -180,13 +199,43 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
...
@@ -180,13 +199,43 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
}
}
size_t
GpuMaxAllocSize
()
{
size_t
GpuMaxAllocSize
()
{
return
std
::
max
(
GpuInitAllocSize
(),
GpuReallocSize
());
}
size_t
GpuInitAllocSize
()
{
if
(
FLAGS_initial_gpu_memory_in_mb
>
0ul
)
{
// Initial memory will be allocated by FLAGS_initial_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_initial_gpu_memory_in_mb
<<
20
);
}
// FLAGS_initial_gpu_memory_in_mb is 0, initial memory will be allocated by
// fraction
size_t
total
=
0
;
size_t
total
=
0
;
size_t
available
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
// Reserve the rest for page tables, etc.
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
return
static_cast
<
size_t
>
(
total
*
FLAGS_fraction_of_gpu_memory_to_use
);
FLAGS_fraction_of_gpu_memory_to_use
);
}
size_t
GpuReallocSize
()
{
if
(
FLAGS_reallocate_gpu_memory_in_mb
>
0ul
)
{
// Additional memory will be allocated by FLAGS_reallocate_gpu_memory_in_mb
return
static_cast
<
size_t
>
(
FLAGS_reallocate_gpu_memory_in_mb
<<
20
);
}
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be allocated
// by fraction
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
return
static_cast
<
size_t
>
((
total
-
reserving
)
*
FLAGS_fraction_of_gpu_memory_to_use
);
}
}
size_t
GpuMinChunkSize
()
{
size_t
GpuMinChunkSize
()
{
...
@@ -201,16 +250,13 @@ size_t GpuMaxChunkSize() {
...
@@ -201,16 +250,13 @@ size_t GpuMaxChunkSize() {
GpuMemoryUsage
(
&
available
,
&
total
);
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
<<
total
/
1024
/
1024
<<
"M"
;
size_t
reserving
=
static_cast
<
size_t
>
(
0.05
*
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
total
);
// If available less than minimum chunk size, no usable memory exists.
// If available less than minimum chunk size, no usable memory exists.
available
=
available
=
std
::
min
(
std
::
max
(
available
,
GpuMinChunkSize
())
-
GpuMinChunkSize
(),
std
::
min
(
std
::
max
(
available
,
GpuMinChunkSize
())
-
GpuMinChunkSize
(),
total
-
reserving
);
total
-
reserving
);
// Reserving the rest memory for page tables, etc.
size_t
allocating
=
GpuMaxAllocSize
();
size_t
allocating
=
static_cast
<
size_t
>
(
FLAGS_fraction_of_gpu_memory_to_use
*
(
total
-
reserving
));
PADDLE_ENFORCE_LE
(
allocating
,
available
,
PADDLE_ENFORCE_LE
(
allocating
,
available
,
"Insufficient GPU memory to allocation."
);
"Insufficient GPU memory to allocation."
);
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
c64d9593
...
@@ -60,6 +60,12 @@ void GpuMemoryUsage(size_t *available, size_t *total);
...
@@ -60,6 +60,12 @@ void GpuMemoryUsage(size_t *available, size_t *total);
//! Get the maximum allocation size of current GPU device.
//! Get the maximum allocation size of current GPU device.
size_t
GpuMaxAllocSize
();
size_t
GpuMaxAllocSize
();
//! Get the initial allocation size of current GPU device.
size_t
GpuInitAllocSize
();
//! Get the re-allocation size of current GPU device.
size_t
GpuReallocSize
();
//! Get the minimum chunk size for GPU buddy allocator.
//! Get the minimum chunk size for GPU buddy allocator.
size_t
GpuMinChunkSize
();
size_t
GpuMinChunkSize
();
...
...
paddle/testing/paddle_gtest_main.cc
浏览文件 @
c64d9593
...
@@ -41,6 +41,8 @@ int main(int argc, char** argv) {
...
@@ -41,6 +41,8 @@ int main(int argc, char** argv) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
envs
.
push_back
(
"fraction_of_gpu_memory_to_use"
);
envs
.
push_back
(
"fraction_of_gpu_memory_to_use"
);
envs
.
push_back
(
"initial_gpu_memory_in_mb"
);
envs
.
push_back
(
"reallocate_gpu_memory_in_mb"
);
envs
.
push_back
(
"allocator_strategy"
);
envs
.
push_back
(
"allocator_strategy"
);
#elif __clang__
#elif __clang__
envs
.
push_back
(
"use_mkldnn"
);
envs
.
push_back
(
"use_mkldnn"
);
...
...
python/paddle/fluid/__init__.py
浏览文件 @
c64d9593
...
@@ -163,7 +163,8 @@ def __bootstrap__():
...
@@ -163,7 +163,8 @@ def __bootstrap__():
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'cudnn_deterministic'
,
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'cudnn_deterministic'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'cudnn_exhaustive_search'
,
'memory_optimize_debug'
,
'selected_gpus'
,
'cudnn_exhaustive_search'
,
'memory_optimize_debug'
,
'selected_gpus'
,
'sync_nccl_allreduce'
,
'limit_of_tmp_allocation'
,
'sync_nccl_allreduce'
,
'limit_of_tmp_allocation'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录