Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
953214ad
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
953214ad
编写于
3月 19, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add more unittest
modify allocator strategy remove changes of legacy buddy_allocator test=develop
上级
fd23262e
变更
34
隐藏空白更改
内联
并排
Showing
34 changed file
with
615 addition
and
306 deletion
+615
-306
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+2
-0
paddle/fluid/framework/inlined_vector.h
paddle/fluid/framework/inlined_vector.h
+19
-10
paddle/fluid/framework/inlined_vector_test.cc
paddle/fluid/framework/inlined_vector_test.cc
+53
-0
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+16
-21
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+1
-0
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+4
-4
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+12
-14
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+37
-18
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+4
-2
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+4
-6
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
.../fluid/memory/allocation/auto_growth_best_fit_allocator.h
+2
-2
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
.../allocation/auto_growth_best_fit_allocator_facade_test.cc
+96
-0
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
.../memory/allocation/auto_growth_best_fit_allocator_test.cc
+3
-7
paddle/fluid/memory/allocation/buffered_allocator_test.cc
paddle/fluid/memory/allocation/buffered_allocator_test.cc
+1
-0
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+11
-27
paddle/fluid/memory/allocation/locked_allocator.cc
paddle/fluid/memory/allocation/locked_allocator.cc
+2
-0
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
...e/fluid/memory/allocation/multi_bin_buffered_allocator.cc
+99
-55
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
...le/fluid/memory/allocation/multi_bin_buffered_allocator.h
+11
-3
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
...id/memory/allocation/multi_bin_buffered_allocator_test.cc
+23
-1
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+94
-0
paddle/fluid/memory/allocation/retry_allocator.h
paddle/fluid/memory/allocation/retry_allocator.h
+1
-0
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
...cation/test_multi_bin_buffered_allocator_division_plan.cc
+56
-0
paddle/fluid/memory/allocation/zero_size_allocator.cc
paddle/fluid/memory/allocation/zero_size_allocator.cc
+9
-8
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+1
-6
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+36
-39
paddle/fluid/memory/detail/buddy_allocator.h
paddle/fluid/memory/detail/buddy_allocator.h
+3
-8
paddle/fluid/memory/detail/memory_block.h
paddle/fluid/memory/detail/memory_block.h
+4
-5
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+1
-58
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+0
-6
paddle/fluid/platform/temporary_allocator.cc
paddle/fluid/platform/temporary_allocator.cc
+1
-0
paddle/fluid/platform/temporary_allocator.h
paddle/fluid/platform/temporary_allocator.h
+1
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-0
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+2
-4
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-2
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
953214ad
...
...
@@ -202,6 +202,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test
(
tuple_test SRCS tuple_test.cc
)
cc_test
(
inlined_vector_test SRCS inlined_vector_test.cc
)
if
(
NOT WIN32
)
cc_test
(
rw_lock_test SRCS rw_lock_test.cc
)
endif
(
NOT WIN32
)
...
...
paddle/fluid/framework/inlined_
stack
.h
→
paddle/fluid/framework/inlined_
vector
.h
浏览文件 @
953214ad
...
...
@@ -14,18 +14,18 @@
#pragma once
#include <
deque
>
#include <
vector
>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
,
size_t
N
>
class
Inlined
Stack
{
class
Inlined
Vector
{
static_assert
(
N
>
0
,
"N must be larger than 0"
);
public:
inline
void
push
(
const
T
&
item
)
{
inline
void
push
_back
(
const
T
&
item
)
{
if
(
size_
<
N
)
{
head_
[
size_
]
=
item
;
}
else
{
...
...
@@ -34,21 +34,21 @@ class InlinedStack {
++
size_
;
}
inline
void
pop
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to pop
element from empty stack
."
);
inline
void
pop
_back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to pop
back element from empty vector
."
);
if
(
size_
>
N
)
{
tail_
.
pop_back
();
}
--
size_
;
}
inline
const
T
&
top
()
const
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get
top element of empty stack
."
);
inline
const
T
&
back
()
const
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get
back element of empty vector
."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
inline
T
&
top
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get
top element of empty stack
."
);
inline
T
&
back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get
back element of empty vector
."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
...
...
@@ -63,10 +63,19 @@ class InlinedStack {
return
i
<
N
?
head_
[
i
]
:
tail_
[
i
-
N
];
}
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
ret
;
ret
.
reserve
(
size_
);
for
(
size_t
i
=
0
;
i
<
size_
;
++
i
)
{
ret
.
emplace_back
((
*
this
)[
i
]);
}
return
ret
;
}
private:
T
head_
[
N
];
size_t
size_
{
0
};
std
::
deque
<
T
>
tail_
;
std
::
vector
<
T
>
tail_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/inlined_vector_test.cc
0 → 100644
浏览文件 @
953214ad
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <vector>
#include "gtest/gtest.h"
namespace
paddle
{
namespace
framework
{
TEST
(
inlined_stack
,
inlined_stack
)
{
size_t
max_num
=
10
;
InlinedVector
<
size_t
,
5
>
stack
;
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
size
(),
i
);
stack
.
push_back
(
i
);
ASSERT_EQ
(
stack
.
size
(),
i
+
1
);
}
std
::
vector
<
size_t
>
vec
=
stack
;
ASSERT_EQ
(
stack
.
size
(),
vec
.
size
());
for
(
size_t
i
=
0
;
i
<
vec
.
size
();
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
vec
[
i
]);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
i
);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
back
(),
max_num
-
1
-
i
);
stack
.
pop_back
();
ASSERT_EQ
(
stack
.
size
(),
max_num
-
1
-
i
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
953214ad
...
...
@@ -3,13 +3,18 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator
)
cc_library
(
multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator
gflags
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator
)
cc_library
(
auto_increment_best_fit_allocator SRCS auto_increment_best_fit_allocator.cc DEPS allocator
)
cc_test
(
auto_increment_best_fit_allocator_test SRCS auto_increment_best_fit_allocator_test.cc DEPS cpu_allocator auto_increment_best_fit_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
if
(
NOT WIN32
)
cc_test
(
test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator
)
endif
()
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
...
...
@@ -42,30 +47,20 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator multi_bin_buffered_allocator auto_growth_best_fit_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_library
(
conditional_allocator SRCS conditional_allocator.cc DEPS allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS
${
AllocatorFacadeDeps
}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
multi_bin_buffered_allocator
auto_increment_best_fit_allocator
allocator_strategy
legacy_allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
${
AllocatorFacadeDeps
}
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
953214ad
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
953214ad
...
...
@@ -27,24 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr
Allocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
ptr
=
AllocateImpl
(
size
,
attr
);
ptr
->
Register
AllocatorChain
(
this
);
ptr
->
Register
DecoratedAllocator
(
this
);
return
AllocationPtr
(
ptr
);
}
void
Allocator
::
FreeImpl
(
Allocation
*
allocation
)
{
Allocator
*
allocator
=
allocation
->
TopAllocator
();
Allocator
*
allocator
=
allocation
->
Top
Decorated
Allocator
();
allocator
->
Free
(
allocation
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
allocation
->
PopAllocator
();
allocation
->
Pop
Decorated
Allocator
();
FreeImpl
(
allocation
);
}
const
char
*
BadAlloc
::
what
()
const
noexcept
{
return
msg_
.
c_str
();
}
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
Allocator
*
allocator
=
allocation
->
TopAllocator
();
Allocator
*
allocator
=
allocation
->
Top
Decorated
Allocator
();
allocator
->
Free
(
allocation
);
}
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
953214ad
...
...
@@ -15,8 +15,9 @@
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/inlined_
stack
.h"
#include "paddle/fluid/framework/inlined_
vector
.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -78,29 +79,26 @@ class Allocation {
virtual
~
Allocation
();
// This function should only be used in unittest
std
::
vector
<
Allocator
*>
GetAllocatorChain
()
const
{
std
::
vector
<
Allocator
*>
allocators
;
for
(
size_t
i
=
0
;
i
<
allocator_chain_
.
size
();
++
i
)
{
allocators
.
push_back
(
allocator_chain_
[
i
]);
}
return
allocators
;
private:
std
::
vector
<
Allocator
*>
DecoratedAllocators
()
const
{
return
static_cast
<
std
::
vector
<
Allocator
*>>
(
decorated_allocators_
);
}
private:
inline
void
RegisterAllocatorChain
(
Allocator
*
allocator
)
{
allocator_chain_
.
push
(
allocator
);
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
decorated_allocators_
.
push_back
(
allocator
);
}
inline
void
Pop
Allocator
()
{
allocator_chain_
.
pop
();
}
inline
void
Pop
DecoratedAllocator
()
{
decorated_allocators_
.
pop_back
();
}
inline
Allocator
*
TopAllocator
()
{
return
allocator_chain_
.
top
();
}
inline
Allocator
*
TopDecoratedAllocator
()
{
return
decorated_allocators_
.
back
();
}
private:
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
framework
::
Inlined
Stack
<
Allocator
*
,
8
>
allocator_chain
_
;
framework
::
Inlined
Vector
<
Allocator
*
,
8
>
decorated_allocators
_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
953214ad
...
...
@@ -17,12 +17,13 @@
#include <map>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
...
...
@@ -32,6 +33,7 @@
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
...
...
@@ -51,6 +53,21 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAndBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
,
bool
enable_buffered
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
if
(
enable_buffered
)
{
allocator
.
reset
(
new
MultiBinBufferedAllocator
(
allocator
));
}
return
allocator
;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class
CPUManagedAllocator
:
public
Allocator
{
public:
...
...
@@ -117,17 +134,10 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
if
(
retry_time_
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time_
);
allocator
.
reset
(
retry_allocator
);
}
allocator
=
WrapRetryAndBufferedAllocator
(
allocator
,
retry_time_
,
FLAGS_enable_buffered_allocator
);
if
(
FLAGS_enable_buffered_allocator
)
{
allocator
.
reset
(
new
MultiBinBufferedAllocator
(
allocator
));
}
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
std
::
move
(
allocator
));
return
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
move
(
allocator
));
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
...
...
@@ -210,7 +220,7 @@ class AllocatorFacadePrivate {
break
;
}
case
AllocatorStrategy
::
kAutoGrowthBestFit
:
{
InitCPUAllocator
();
Init
AutoGrowth
CPUAllocator
();
InitAutoGrowthCUDAAllocator
();
InitAutoGrowthCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
...
...
@@ -224,15 +234,25 @@ class AllocatorFacadePrivate {
}
private:
void
InitAutoGrowthCPUAllocator
()
{
auto
cpu_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUAllocator
>
());
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cpu_allocator
,
platform
::
CpuMaxChunkSize
(),
4096
);
}
void
InitAutoGrowthCUDAAllocator
()
{
#ifdef PADDLE_WITH_CUDA
int
dev_cnt
=
platform
::
GetCUDADeviceCount
();
for
(
int
dev_id
=
0
;
dev_id
<
dev_cnt
;
++
dev_id
)
{
auto
cuda_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CUDAAllocator
>
(
platform
::
CUDAPlace
(
dev_id
)));
allocators_
[
platform
::
CUDAPlace
(
dev_id
)]
=
std
::
make_shared
<
AutoIncrementBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMaxChunkSize
(),
4096
);
auto
allocator
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMaxChunkSize
(),
4096
);
allocators_
[
platform
::
CUDAPlace
(
dev_id
)]
=
WrapRetryAndBufferedAllocator
(
allocator
,
FLAGS_gpu_allocator_retry_time
,
false
);
}
#endif
}
...
...
@@ -242,7 +262,7 @@ class AllocatorFacadePrivate {
auto
cuda_pinned_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUPinnedAllocator
>
());
allocators_
[
platform
::
CUDAPinnedPlace
()]
=
std
::
make_shared
<
Auto
Increment
BestFitAllocator
>
(
std
::
make_shared
<
Auto
Growth
BestFitAllocator
>
(
cuda_pinned_allocator
,
platform
::
CUDAPinnedMaxChunkSize
(),
4096
);
#endif
}
...
...
@@ -300,8 +320,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std
::
shared_ptr
<
Allocation
>
AllocatorFacade
::
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
).
release
(),
AllocationDeleter
());
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
));
}
AllocationPtr
AllocatorFacade
::
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
953214ad
...
...
@@ -19,7 +19,9 @@
DEFINE_string
(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]"
);
"naive_best_fit means the experimental best fit allocator. "
"auto_growth_best_fit means the experimental auto growth best fit "
"allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit]."
);
namespace
paddle
{
namespace
memory
{
...
...
@@ -28,7 +30,7 @@ namespace allocation {
static
AllocatorStrategy
GetStrategyFromFlag
()
{
if
(
FLAGS_allocator_strategy
==
"legacy"
)
{
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"na
vi
e_best_fit"
)
{
}
else
if
(
FLAGS_allocator_strategy
==
"na
iv
e_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
if
(
FLAGS_allocator_strategy
==
"auto_growth_best_fit"
)
{
return
AllocatorStrategy
::
kAutoGrowthBestFit
;
...
...
paddle/fluid/memory/allocation/auto_
increment
_best_fit_allocator.cc
→
paddle/fluid/memory/allocation/auto_
growth
_best_fit_allocator.cc
浏览文件 @
953214ad
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_
increment
_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_
growth
_best_fit_allocator.h"
#include <algorithm>
#include <list>
#include <map>
...
...
@@ -29,16 +29,14 @@ static size_t align(size_t size, size_t alignment) {
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
Auto
IncrementBestFitAllocator
::
AutoIncrement
BestFitAllocator
(
Auto
GrowthBestFitAllocator
::
AutoGrowth
BestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
)
:
underlying_allocator_
(
underlying_allocator
),
chunk_size_
(
align
(
chunk_size
,
alignment
)),
alignment_
(
alignment
)
{}
Allocation
*
AutoIncrementBestFitAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
if
(
size
==
0
)
return
nullptr
;
Allocation
*
AutoGrowthBestFitAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
size
=
align
(
size
,
alignment_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
auto
iter
=
free_blocks_
.
lower_bound
(
std
::
make_pair
(
size
,
nullptr
));
...
...
@@ -95,7 +93,7 @@ Allocation *AutoIncrementBestFitAllocator::AllocateImpl(size_t size,
return
new
Chunk
::
BlockAllocation
(
block_it
);
}
void
Auto
Increment
BestFitAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
Auto
Growth
BestFitAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
&
block_it
=
static_cast
<
Chunk
::
BlockAllocation
*>
(
allocation
)
->
block_it_
;
auto
&
blocks
=
block_it
->
chunk_
->
blocks_
;
...
...
paddle/fluid/memory/allocation/auto_
increment
_best_fit_allocator.h
→
paddle/fluid/memory/allocation/auto_
growth
_best_fit_allocator.h
浏览文件 @
953214ad
...
...
@@ -25,9 +25,9 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
class
Auto
Increment
BestFitAllocator
:
public
Allocator
{
class
Auto
Growth
BestFitAllocator
:
public
Allocator
{
public:
explicit
Auto
Increment
BestFitAllocator
(
explicit
Auto
Growth
BestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
);
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
0 → 100644
浏览文件 @
953214ad
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
inline
size_t
AlignTo
(
size_t
size
,
size_t
alignment
=
4096
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"auto_growth_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
AlignTo
(
size
));
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
AlignTo
(
size
));
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_
increment
_best_fit_allocator_test.cc
→
paddle/fluid/memory/allocation/auto_
growth
_best_fit_allocator_test.cc
浏览文件 @
953214ad
...
...
@@ -22,18 +22,18 @@
#include <iostream>
#include "paddle/fluid/memory/allocation/auto_
increment
_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_
growth
_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
auto_
increment
_best_fit_allocator
)
{
TEST
(
allocator
,
auto_
growth
_best_fit_allocator
)
{
auto
cpu_allocator
=
std
::
make_shared
<
CPUAllocator
>
();
auto
allocator
=
std
::
make_shared
<
Auto
Increment
BestFitAllocator
>
(
cpu_allocator
,
0
,
4096
);
std
::
make_shared
<
Auto
Growth
BestFitAllocator
>
(
cpu_allocator
,
0
,
4096
);
std
::
mutex
mtx
;
std
::
condition_variable
cv
;
...
...
@@ -60,13 +60,9 @@ TEST(allocator, auto_increment_best_fit_allocator) {
}
cv
.
notify_all
();
thread_main
();
for
(
auto
&
th
:
ths
)
{
th
.
join
();
}
std
::
cout
<<
"test ends"
<<
std
::
endl
;
}
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/buffered_allocator_test.cc
浏览文件 @
953214ad
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
953214ad
...
...
@@ -37,8 +37,6 @@ DEFINE_bool(init_allocated_mem, false,
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
initial_gpu_memory_in_mb
);
DECLARE_double
(
reallocate_gpu_memory_in_mb
);
DECLARE_bool
(
benchmark
);
namespace
paddle
{
...
...
@@ -72,8 +70,7 @@ BuddyAllocator *GetCPUBuddyAllocator() {
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
(),
platform
::
CpuMaxChunkSize
());
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
...
...
@@ -147,28 +144,16 @@ class GPUBuddyAllocatorList {
PADDLE_ENFORCE
(
dev_id
<
flags_
.
size
(),
"Invalid device id %s"
,
dev_id
);
std
::
call_once
(
flags_
[
dev_id
],
[
this
,
dev_id
]
{
platform
::
SetDeviceId
(
dev_id
);
size_t
first_size
=
platform
::
GpuFirstAllocateChunkSize
();
size_t
re_size
=
platform
::
GpuReAllocateChunkSize
();
allocators_
[
dev_id
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
first_size
,
re_size
);
VLOG
(
2
)
<<
"
\n\n
NOTE: each GPU device use "
<<
string
::
HumanReadableSize
(
first_size
)
<<
"(initial chunk) "
<<
string
::
HumanReadableSize
(
re_size
)
<<
"(reallocate chunk) "
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' or "
"'FLAGS_initial_gpu_memory_in_mb/"
"FLAGS_reallocate_gpu_memory_in_mb' to change the fraction "
"of GPU usage.
\n\n
"
;
VLOG
(
2
)
<<
"Currently, FLAGS_fraction_of_gpu_memory_to_use="
<<
FLAGS_fraction_of_gpu_memory_to_use
<<
", "
<<
"FLAGS_initial_gpu_memory_in_mb="
<<
FLAGS_initial_gpu_memory_in_mb
<<
", "
<<
"FLAGS_reallocate_gpu_memory_in_mb="
<<
FLAGS_reallocate_gpu_memory_in_mb
;
allocators_
[
dev_id
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
});
return
allocators_
[
dev_id
];
}
...
...
@@ -251,7 +236,6 @@ BuddyAllocator *GetCUDAPinnedBuddyAllocator() {
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
...
...
paddle/fluid/memory/allocation/locked_allocator.cc
浏览文件 @
953214ad
...
...
@@ -14,8 +14,10 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
...
...
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
浏览文件 @
953214ad
...
...
@@ -17,20 +17,37 @@
#include <cctype>
#include <fstream>
#include <limits>
#include <mutex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double
(
buffered_allocator_excess_times
,
2
,
"Tolerant memory size times of buffered_allocator"
);
DEFINE_double
(
buffered_allocator_excess_times
,
2
,
"Excess memory size times of buffered_allocator. BufferedAllocator"
" would try to reuse memory freed previously, but the size of freed"
" allocation may not be exactly the same as the requested. Here, we"
" use a flag to control the excess times of reused memory size. "
"Not quite sure what is the best excess times value."
);
DEFINE_string
(
division_plan_path
,
""
,
"Division plan file path"
);
DEFINE_string
(
buffered_allocator_division_plan_path
,
""
,
"The file path which "
"determines the memory size division plans of BufferedAllocator."
"If it is empty, use the default division plan. The file must be a "
"text file which each lines indicates the bound of division plan. "
"For example, if the text file has 3 lines, which are '500M', '1G', "
" '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) "
"and [2G, +inf). Allocation request whose requested memory size is "
"inside the last interval of division plan would be dispatched to "
" underlying_allocator directly without caching when freed."
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
st
d
::
string
TrimStringAndToLow
erCase
(
const
std
::
string
&
str
)
{
st
atic
std
::
string
TrimStringAndToUpp
erCase
(
const
std
::
string
&
str
)
{
auto
not_space
=
[](
char
ch
)
{
return
std
::
isspace
(
ch
)
==
0
;
};
auto
first_idx
=
static_cast
<
size_t
>
(
std
::
find_if
(
str
.
begin
(),
str
.
end
(),
not_space
)
-
str
.
begin
());
...
...
@@ -38,41 +55,69 @@ std::string TrimStringAndToLowerCase(const std::string &str) {
std
::
find_if
(
str
.
rbegin
(),
str
.
rend
(),
not_space
)
-
str
.
rbegin
());
if
(
first_idx
==
str
.
size
()
||
last_idx
==
str
.
size
())
return
""
;
last_idx
=
str
.
size
()
-
1
-
last_idx
;
last_idx
=
str
.
size
()
-
last_idx
;
auto
ret
=
str
.
substr
(
first_idx
,
last_idx
-
first_idx
);
std
::
for_each
(
ret
.
begin
(),
ret
.
end
(),
[](
char
&
ch
)
{
ch
=
std
::
to
low
er
(
ch
);
});
[](
char
&
ch
)
{
ch
=
std
::
to
upp
er
(
ch
);
});
return
ret
;
}
static
size_t
ParseStringToBytes
(
const
std
::
string
&
str
)
{
std
::
string
ret
=
str
;
if
(
ret
.
back
()
==
'b'
)
{
ret
.
pop_back
();
namespace
{
enum
DivisionPlanFileStatus
{
kEOF
,
kException
,
kNormal
};
}
// NOLINT
static
size_t
ParseStringToBytes
(
const
std
::
string
&
original_str
,
DivisionPlanFileStatus
*
ret_code
)
{
std
::
string
str
=
TrimStringAndToUpperCase
(
original_str
);
if
(
str
.
empty
())
{
*
ret_code
=
kEOF
;
return
0
;
}
if
(
str
.
back
()
==
'B'
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
PADDLE_ENFORCE
(
!
ret
.
empty
(),
"Wrong format: %s"
,
str
);
size_t
multiples
=
1
;
switch
(
ret
.
back
())
{
case
'
g
'
:
switch
(
str
.
back
())
{
case
'
G
'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
30
);
break
;
case
'
m
'
:
case
'
M
'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
20
);
break
;
case
'
k
'
:
case
'
K
'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
10
);
break
;
default:
break
;
}
if
(
multiples
!=
1
)
ret
.
pop_back
();
ret
=
TrimStringAndToLowerCase
(
ret
);
double
ret_val
=
0.0
;
std
::
stringstream
ss
(
ret
);
PADDLE_ENFORCE
((
ss
>>
ret_val
).
good
(),
"Wrong format %s"
,
str
);
return
static_cast
<
size_t
>
(
ret_val
*
multiples
);
if
(
multiples
!=
1
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
str
=
TrimStringAndToUpperCase
(
str
);
double
mem_val
=
-
1.0
;
std
::
stringstream
ss
(
str
);
if
(
!
(
ss
>>
mem_val
)
||
mem_val
<
0
)
{
*
ret_code
=
kException
;
return
0
;
}
*
ret_code
=
kNormal
;
return
static_cast
<
size_t
>
(
mem_val
*
multiples
);
}
static
std
::
string
GetDebugStringOfPlan
(
const
std
::
vector
<
size_t
>
&
plan
)
{
...
...
@@ -84,16 +129,27 @@ static std::string GetDebugStringOfPlan(const std::vector<size_t> &plan) {
return
ret
+
"]"
;
}
st
atic
std
::
vector
<
size_t
>
Read
DivisionPlanFromFile
(
st
d
::
vector
<
size_t
>
ReadBufferedAllocator
DivisionPlanFromFile
(
const
std
::
string
&
filepath
)
{
std
::
ifstream
is
(
filepath
.
c_str
());
PADDLE_ENFORCE
(
is
.
good
(),
"File
not exist"
);
PADDLE_ENFORCE
(
is
.
good
(),
"File
%s not exist"
,
filepath
);
std
::
string
str
;
std
::
vector
<
size_t
>
plan
;
size_t
line_num
=
1
;
while
(
std
::
getline
(
is
,
str
).
good
())
{
str
=
TrimStringAndToLowerCase
(
str
);
if
(
str
.
empty
())
break
;
plan
.
push_back
(
ParseStringToBytes
(
str
));
DivisionPlanFileStatus
status
;
size_t
ret
=
ParseStringToBytes
(
str
,
&
status
);
if
(
status
==
kEOF
)
{
break
;
}
if
(
status
==
kException
)
{
PADDLE_THROW
(
"Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, "
"GB."
,
line_num
,
filepath
,
str
);
}
plan
.
push_back
(
ret
);
++
line_num
;
}
return
plan
;
}
...
...
@@ -110,11 +166,12 @@ static void CheckAndModifyMemoryDivisionPlan(
}
PADDLE_ENFORCE
(
is_strictly_sorted
,
"Divison plan must be stricted sorted"
);
// Insert 0
and remove MAX
to disivion plan for clean binary searching code
// Insert 0 to disivion plan for clean binary searching code
if
(
division_plan
->
empty
()
||
division_plan
->
front
()
!=
0
)
{
division_plan
->
insert
(
division_plan
->
begin
(),
0
);
}
// Remove MAX from disivion plan for clean binary searching code
constexpr
auto
kSizeTypeMax
=
std
::
numeric_limits
<
size_t
>::
max
();
if
(
division_plan
->
back
()
==
kSizeTypeMax
)
{
division_plan
->
pop_back
();
...
...
@@ -124,21 +181,17 @@ static void CheckAndModifyMemoryDivisionPlan(
}
static
std
::
vector
<
size_t
>
GetDefaultDivisionPlan
()
{
if
(
!
FLAGS_division_plan_path
.
empty
())
{
return
ReadDivisionPlanFromFile
(
FLAGS_division_plan_path
);
if
(
!
FLAGS_buffered_allocator_division_plan_path
.
empty
())
{
return
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
}
// Default division plan is 4K, 8K, 16K, ..., 500M, 1G
constexpr
size_t
kMaxLogSize
=
30
;
std
::
vector
<
size_t
>
plan
;
for
(
size_t
i
=
12
;
i
<=
kMaxLogSize
;
++
i
)
{
plan
.
push_back
(
static_cast
<
size_t
>
(
1
)
<<
i
);
}
/*
for (size_t i = 0; i < sizeof(size_t) * 8; ++i) {
plan.push_back(static_cast<size_t>(1) << i);
}
*/
return
plan
;
}
...
...
@@ -164,6 +217,7 @@ MultiBinBufferedAllocator::MultiBinBufferedAllocator(
division_plan_
(
division_plan
)
{
CheckAndModifyMemoryDivisionPlan
(
&
division_plan_
);
allocations_
.
resize
(
division_plan_
.
size
()
-
1
);
accumulated_cache_size_
.
assign
(
division_plan_
.
size
()
-
1
,
0UL
);
mtx_
.
resize
(
division_plan_
.
size
()
-
1
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
for
(
auto
&
mtx
:
mtx_
)
{
...
...
@@ -182,28 +236,22 @@ void MultiBinBufferedAllocator::FreeImpl(Allocation *allocation) {
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
[
bin_index
]);
allocations_
[
bin_index
].
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
accumulated_cache_size_
[
bin_index
]
+=
allocation
->
size
();
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
// bin_index is not used currently.
// Maybe we can design more flexible FreeCache strategy based on bin_index
size_t
MultiBinBufferedAllocator
::
FreeCache
(
size_t
size
,
size_t
bin_index
)
{
// and require size.
size_t
MultiBinBufferedAllocator
::
ClearCache
()
{
size_t
accumulated_size
=
0
;
// FIXME(zjl): free the largest first when there is no extra
for
(
size_t
i
=
allocations_
.
size
()
-
1
;
i
!=
static_cast
<
size_t
>
(
-
1
);
--
i
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
[
i
]);
if
(
allocations_
[
i
].
empty
())
continue
;
auto
it
=
--
allocations_
[
i
].
end
();
do
{
accumulated_size
+=
it
->
second
->
size
();
underlying_allocator_
->
Free
(
it
->
second
.
release
());
allocations_
[
i
].
erase
(
it
--
);
if
(
accumulated_size
>=
size
)
{
return
accumulated_size
;
}
}
while
(
!
allocations_
[
i
].
empty
());
allocations_
[
i
].
clear
();
accumulated_size
+=
accumulated_cache_size_
[
i
];
accumulated_cache_size_
[
i
]
=
0
;
}
return
accumulated_size
;
}
...
...
@@ -212,10 +260,6 @@ Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
auto
bin_index
=
FindDivisionPlanBinIndex
(
division_plan_
,
size
);
auto
upper_size
=
TolerantUpperSize
(
size
);
// if (bin_index >= allocations_.size()) {
// VLOG(2) << "Allocate " << size << " from underlying directly";
//}
for
(;
bin_index
<
allocations_
.
size
()
&&
upper_size
>=
division_plan_
[
bin_index
];
++
bin_index
)
{
...
...
@@ -226,6 +270,7 @@ Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
size_t
sz
=
it
->
second
->
size
();
auto
ret
=
std
::
move
(
it
->
second
);
allocation
.
erase
(
it
);
accumulated_cache_size_
[
bin_index
]
-=
sz
;
VLOG
(
3
)
<<
"Allocate "
<<
sz
<<
"(required "
<<
size
<<
") from cache directly"
;
return
ret
.
release
();
...
...
@@ -239,10 +284,7 @@ Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
VLOG
(
2
)
<<
"Allocate "
<<
size
<<
" from underlying directly"
;
return
ret
;
}
catch
(
BadAlloc
&
)
{
VLOG
(
1
)
<<
retry_time
<<
"-th BadAlloc raises, try to free "
<<
size
<<
" bytes caches"
;
// size_t actual_free_size = FreeCache(size, bin_index);
size_t
actual_free_size
=
FreeCache
(
-
1UL
,
bin_index
);
size_t
actual_free_size
=
ClearCache
();
VLOG
(
1
)
<<
retry_time
<<
"-th free "
<<
actual_free_size
<<
" bytes caches"
;
if
(
actual_free_size
==
0
)
throw
;
...
...
@@ -251,6 +293,8 @@ Allocation *MultiBinBufferedAllocator::AllocateImpl(size_t size, Attr attr) {
}
}
void
UseMultiBinBufferedAllocatorGFlags
()
{}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
浏览文件 @
953214ad
...
...
@@ -16,6 +16,8 @@
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
...
...
@@ -24,6 +26,9 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
std
::
vector
<
size_t
>
ReadBufferedAllocatorDivisionPlanFromFile
(
const
std
::
string
&
filepath
);
class
MultiBinBufferedAllocator
:
public
Allocator
{
public:
explicit
MultiBinBufferedAllocator
(
...
...
@@ -34,21 +39,24 @@ class MultiBinBufferedAllocator : public Allocator {
bool
IsAllocThreadSafe
()
const
override
{
return
mtx_
.
front
()
!=
nullptr
;
}
void
ClearCache
()
{
FreeCache
(
static_cast
<
size_t
>
(
-
1
),
0
);
}
size_t
ClearCache
();
const
std
::
vector
<
size_t
>&
DivisionPlan
()
const
{
return
division_plan_
;
}
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
size_t
FreeCache
(
size_t
size
,
size_t
bin_index
);
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
vector
<
std
::
multimap
<
size_t
,
AllocationPtr
>>
allocations_
;
std
::
vector
<
size_t
>
accumulated_cache_size_
;
std
::
vector
<
size_t
>
division_plan_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
mutex
>>
mtx_
;
};
extern
void
UseMultiBinBufferedAllocatorGFlags
();
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
浏览文件 @
953214ad
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
...
...
@@ -123,10 +124,31 @@ TEST(buffered_allocator, lazy_free) {
{
underlying_allocator
->
ResetCounter
();
allocator
->
ClearCache
();
size_t
cache_size
=
allocator
->
ClearCache
();
ASSERT_EQ
(
cache_size
,
static_cast
<
size_t
>
(
alloc_size
+
2048
));
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kZero
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kTwo
);
}
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
(),
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kOne
);
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
()
-
1
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
}
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
0 → 100644
浏览文件 @
953214ad
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_bool
(
enable_buffered_allocator
);
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
FLAGS_enable_buffered_allocator
=
true
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
size
);
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
size
);
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/retry_allocator.h
浏览文件 @
953214ad
...
...
@@ -18,6 +18,7 @@
#include <condition_variable> // NOLINT
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
...
...
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
0 → 100644
浏览文件 @
953214ad
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
DECLARE_string
(
buffered_allocator_division_plan_path
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
buffered_allocator
,
division_plan
)
{
std
::
string
path
=
"/tmp/buffered_allocator_divison_plan"
;
FLAGS_buffered_allocator_division_plan_path
=
path
;
{
std
::
vector
<
std
::
string
>
plan
(
{
"100b"
,
"300.7K"
,
"500.3m"
,
"1.02gB"
,
"2g"
,
"4G"
});
std
::
ofstream
os
(
path
);
for
(
auto
&
p
:
plan
)
{
os
<<
p
<<
std
::
endl
;
}
os
.
close
();
}
auto
plan
=
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
ASSERT_EQ
(
plan
.
size
(),
6UL
);
ASSERT_EQ
(
plan
[
0
],
100UL
);
ASSERT_EQ
(
plan
[
1
],
static_cast
<
size_t
>
(
300.7
*
1024
));
ASSERT_EQ
(
plan
[
2
],
static_cast
<
size_t
>
(
500.3
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
3
],
static_cast
<
size_t
>
(
1.02
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
4
],
static_cast
<
size_t
>
(
2.0
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
5
],
static_cast
<
size_t
>
(
4.0
*
1024
*
1024
*
1024
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/zero_size_allocator.cc
浏览文件 @
953214ad
...
...
@@ -22,21 +22,22 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
return
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
ZeroSizeAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
if
(
dynamic_cast
<
ZeroSizeAllocation
*>
(
allocation
)
)
{
delete
allocation
;
Allocation
*
ZeroSizeAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
size
==
0
)
{
return
new
Allocation
(
nullptr
,
0
,
place_
)
;
}
else
{
underlying_allocator_
->
Free
(
allocation
);
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
(
);
}
}
Allocation
*
ZeroSizeAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
size
==
0
)
{
return
new
ZeroSizeAllocation
(
place_
)
;
void
ZeroSizeAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
if
(
allocation
->
size
()
==
0
)
{
delete
allocation
;
}
else
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
(
);
underlying_allocator_
->
Free
(
allocation
);
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
953214ad
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
...
...
@@ -23,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
:
Allocation
(
nullptr
,
0
,
p
)
{}
};
class
ZeroSizeAllocator
:
public
Allocator
{
public:
ZeroSizeAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
...
...
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
953214ad
...
...
@@ -25,11 +25,9 @@ namespace detail {
BuddyAllocator
::
BuddyAllocator
(
std
::
unique_ptr
<
SystemAllocator
>
system_allocator
,
size_t
min_chunk_size
,
size_t
first_allocate_chunk_size
,
size_t
reallocate
_chunk_size
)
size_t
max
_chunk_size
)
:
min_chunk_size_
(
min_chunk_size
),
first_allocate_chunk_size_
(
first_allocate_chunk_size
),
reallocate_chunk_size_
(
reallocate_chunk_size
),
max_chunk_size_
(
first_allocate_chunk_size
),
max_chunk_size_
(
max_chunk_size
),
cache_
(
system_allocator
->
UseGpu
()),
system_allocator_
(
std
::
move
(
system_allocator
))
{}
...
...
@@ -38,10 +36,9 @@ BuddyAllocator::~BuddyAllocator() {
"have actually been freed"
;
while
(
!
pool_
.
empty
())
{
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool_
.
begin
()));
auto
desc
=
cache_
.
load
(
block
);
VLOG
(
10
)
<<
"Free from block ("
<<
block
<<
", "
<<
desc
.
size
<<
")"
;
VLOG
(
10
)
<<
"Free from block ("
<<
block
<<
", "
<<
max_chunk_size_
<<
")"
;
system_allocator_
->
Free
(
block
,
desc
.
size
,
desc
.
index
);
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
)
);
cache_
.
invalidate
(
block
);
pool_
.
erase
(
pool_
.
begin
());
}
...
...
@@ -66,7 +63,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// if the allocation is huge, send directly to the system allocator
if
(
size
>
max_chunk_size_
)
{
VLOG
(
10
)
<<
"Allocate from system allocator."
;
return
SystemAlloc
(
size
,
false
);
return
SystemAlloc
(
size
);
}
// query and allocate from the existing chunk
...
...
@@ -75,9 +72,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// refill the pool if failure
if
(
it
==
pool_
.
end
())
{
it
=
RefillPool
();
// if still failure,
try to allocate from SystemAllocator
// if still failure,
fail fatally
if
(
it
==
pool_
.
end
())
{
return
SystemAlloc
(
size
,
false
)
;
return
nullptr
;
}
}
else
{
VLOG
(
10
)
<<
"Allocation from existing memory block "
<<
std
::
get
<
2
>
(
*
it
)
...
...
@@ -101,7 +98,7 @@ void BuddyAllocator::Free(void* p) {
VLOG
(
10
)
<<
"Free from address "
<<
block
;
if
(
block
->
type
(
cache_
)
==
MemoryBlock
::
UNMANAGED_
HUGE_CHUNK
)
{
if
(
block
->
type
(
cache_
)
==
MemoryBlock
::
HUGE_CHUNK
)
{
VLOG
(
10
)
<<
"Free directly from system allocator"
;
system_allocator_
->
Free
(
block
,
block
->
total_size
(
cache_
),
block
->
index
(
cache_
));
...
...
@@ -171,12 +168,9 @@ void BuddyAllocator::Free(void* p) {
size_t
BuddyAllocator
::
Used
()
{
return
total_used_
;
}
size_t
BuddyAllocator
::
GetMinChunkSize
()
{
return
min_chunk_size_
;
}
size_t
BuddyAllocator
::
GetMaxChunkSize
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
return
max_chunk_size_
;
}
size_t
BuddyAllocator
::
GetMaxChunkSize
()
{
return
max_chunk_size_
;
}
void
*
BuddyAllocator
::
SystemAlloc
(
size_t
size
,
bool
is_managed
)
{
void
*
BuddyAllocator
::
SystemAlloc
(
size_t
size
)
{
size_t
index
=
0
;
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
size
);
...
...
@@ -184,23 +178,25 @@ void* BuddyAllocator::SystemAlloc(size_t size, bool is_managed) {
if
(
p
==
nullptr
)
return
nullptr
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
is_managed
?
MemoryBlock
::
MANAGED_HUGE_CHUNK
:
MemoryBlock
::
UNMANAGED_HUGE_CHUNK
,
index
,
size
,
nullptr
,
nullptr
);
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
HUGE_CHUNK
,
index
,
size
,
nullptr
,
nullptr
);
return
static_cast
<
MemoryBlock
*>
(
p
)
->
data
();
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
if
(
total_used_
+
total_free_
>
0
)
{
max_chunk_size_
=
reallocate_chunk_size_
;
#ifdef PADDLE_WITH_CUDA
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
max_chunk_size_
=
platform
::
GpuMaxChunkSize
();
}
}
#endif
// Allocate a new maximum sized block
size_t
index
=
0
;
size_t
chunk_size
=
max_chunk_size_
;
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
chunk_size
);
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
max_chunk_size_
);
if
(
p
==
nullptr
)
return
pool_
.
end
();
...
...
@@ -208,7 +204,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
<<
" from system allocator"
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
chunk_size
,
nullptr
,
nullptr
);
max_chunk_size_
,
nullptr
,
nullptr
);
// gpu fallback allocation
if
(
system_allocator_
->
UseGpu
()
&&
...
...
@@ -216,10 +212,10 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
fallback_alloc_count_
++
;
}
total_free_
+=
chunk_size
;
total_free_
+=
max_chunk_size_
;
// dump the block into pool
return
pool_
.
insert
(
IndexSizeAddress
(
index
,
chunk_size
,
p
)).
first
;
return
pool_
.
insert
(
IndexSizeAddress
(
index
,
max_chunk_size_
,
p
)).
first
;
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
FindExistChunk
(
size_t
size
)
{
...
...
@@ -275,24 +271,27 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
void
BuddyAllocator
::
CleanIdleFallBackAlloc
()
{
// If fallback allocation does not exist, return directly
if
(
!
fallback_alloc_count_
||
!
system_allocator_
->
UseGpu
()
)
return
;
if
(
!
fallback_alloc_count_
)
return
;
for
(
auto
pool
=
pool_
.
rbegin
();
pool
!=
pool_
.
rend
();)
{
// If free memory block less than max_chunk_size_, return directly
if
(
std
::
get
<
1
>
(
*
pool
)
<
max_chunk_size_
)
return
;
MemoryBlock
*
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool
));
auto
desc
=
cache_
.
load
(
block
);
if
(
desc
.
index
==
0
)
{
// If no GPU fallback allocator, return
if
(
!
system_allocator_
->
UseGpu
()
||
block
->
index
(
cache_
)
==
0
)
{
return
;
}
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to fallback allocator."
;
system_allocator_
->
Free
(
block
,
desc
.
size
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
total_free_
-=
desc
.
size
;
total_free_
-=
max_chunk_size_
;
fallback_alloc_count_
--
;
// If no fall allocation exists, return directly
...
...
@@ -316,21 +315,19 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
if
(
!
shall_free_alloc
())
return
;
for
(
auto
pool
=
pool_
.
rbegin
();
pool
!=
pool_
.
rend
();)
{
MemoryBlock
*
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool
));
auto
desc
=
cache_
.
load
(
block
)
;
// If free memory block less than max_chunk_size_, return directly
if
(
std
::
get
<
1
>
(
*
pool
)
<
max_chunk_size_
)
return
;
if
(
desc
.
type
!=
MemoryBlock
::
MANAGED_HUGE_CHUNK
)
{
return
;
}
MemoryBlock
*
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool
));
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to base allocator."
;
system_allocator_
->
Free
(
block
,
desc
.
size
,
desc
.
index
);
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
)
);
cache_
.
invalidate
(
block
);
pool
=
PoolSet
::
reverse_iterator
(
pool_
.
erase
(
std
::
next
(
pool
).
base
()));
total_free_
-=
desc
.
size
;
total_free_
-=
max_chunk_size_
;
if
(
!
shall_free_alloc
())
return
;
}
...
...
paddle/fluid/memory/detail/buddy_allocator.h
浏览文件 @
953214ad
...
...
@@ -34,8 +34,7 @@ namespace detail {
class
BuddyAllocator
{
public:
BuddyAllocator
(
std
::
unique_ptr
<
SystemAllocator
>
system_allocator
,
size_t
min_chunk_size
,
size_t
first_allocate_chunk_size
,
size_t
reallocate_chunk_size
);
size_t
min_chunk_size
,
size_t
max_chunk_size
);
~
BuddyAllocator
();
...
...
@@ -58,7 +57,7 @@ class BuddyAllocator {
using
PoolSet
=
std
::
set
<
IndexSizeAddress
>
;
/*! \brief Allocate fixed-size memory from system */
void
*
SystemAlloc
(
size_t
size
,
bool
is_managed
=
true
);
void
*
SystemAlloc
(
size_t
size
);
/*! \brief If existing chunks are not suitable, refill pool */
PoolSet
::
iterator
RefillPool
();
...
...
@@ -88,11 +87,7 @@ class BuddyAllocator {
size_t
total_free_
=
0
;
// the total size of free memory
size_t
min_chunk_size_
;
// the minimum size of each chunk
size_t
first_allocate_chunk_size_
;
size_t
reallocate_chunk_size_
;
size_t
max_chunk_size_
;
size_t
max_chunk_size_
;
// the maximum size of each chunk
private:
/**
...
...
paddle/fluid/memory/detail/memory_block.h
浏览文件 @
953214ad
...
...
@@ -27,11 +27,10 @@ class MetadataCache;
// MemoryBlock::Desc and the payload.
struct
MemoryBlock
{
enum
Type
{
FREE_CHUNK
,
// memory is free and idle
ARENA_CHUNK
,
// memory is being occupied
MANAGED_HUGE_CHUNK
,
// memory is huge and out of management
UNMANAGED_HUGE_CHUNK
,
// memory is huge and managed by allocator
INVALID_CHUNK
// memory is invalid
FREE_CHUNK
,
// memory is free and idle
ARENA_CHUNK
,
// memory is being occupied
HUGE_CHUNK
,
// memory is out of management
INVALID_CHUNK
// memory is invalid
};
// init saves the MemoryBlock::Desc of the memory block in a MetadataCache.
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
953214ad
...
...
@@ -38,22 +38,6 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk."
);
DEFINE_double
(
initial_gpu_memory_in_mb
,
-
1.0
,
"GPU memory chunk size in MB."
"Allocator would allocate FLAGS_initial_gpu_memory_in_mb size "
"chunk first and reallocate FLAGS_reallocate_gpu_memory_in_mb size "
"chunk when the first chunk is not enough. This flag has higher priority "
"than FLAGS_fraction_of_gpu_memory_to_use. Disable when less than 0."
);
DEFINE_double
(
reallocate_gpu_memory_in_mb
,
-
1.0
,
"GPU memory chunk size in MB."
"If FLAGS_initial_gpu_memory_in_mb is set and "
"FLAGS_reallocate_gpu_memory_in_mb "
"is less than 0, it would be replaced by "
"FLAGS_initial_gpu_memory_in_mb. Disable "
"when FLAGS_initial_gpu_memory_in_mb is less than 0."
);
DEFINE_bool
(
enable_cublas_tensor_op_math
,
false
,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
...
...
@@ -227,54 +211,13 @@ size_t GpuMaxChunkSize() {
size_t
allocating
=
static_cast
<
size_t
>
(
FLAGS_fraction_of_gpu_memory_to_use
*
(
total
-
reserving
));
PADDLE_ENFORCE_LE
(
allocating
,
available
,
"Insufficient GPU memory to allocation."
);
return
allocating
;
}
size_t
GpuFirstAllocateChunkSize
()
{
if
(
FLAGS_initial_gpu_memory_in_mb
<=
0
)
{
return
GpuMaxChunkSize
();
}
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
size_t
initial_mem
=
static_cast
<
size_t
>
(
FLAGS_initial_gpu_memory_in_mb
*
(
1
<<
20
));
PADDLE_ENFORCE_LE
(
initial_mem
,
available
,
"Insufficient GPU memory to allocation."
);
return
initial_mem
;
}
size_t
GpuReAllocateChunkSize
()
{
if
(
FLAGS_initial_gpu_memory_in_mb
<=
0
)
{
return
GpuMaxChunkSize
();
}
double
reallocate_mem
=
FLAGS_reallocate_gpu_memory_in_mb
;
if
(
reallocate_mem
<
0
)
{
PADDLE_ENFORCE
(
FLAGS_initial_gpu_memory_in_mb
>
0
,
"FLAGS_init_gpu_memory_to_use_mb must be larger than 0"
);
reallocate_mem
=
FLAGS_initial_gpu_memory_in_mb
;
}
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
size_t
realloc_mem
=
static_cast
<
size_t
>
(
reallocate_mem
*
(
1
<<
20
));
PADDLE_ENFORCE_LE
(
realloc_mem
,
available
,
"Insufficient GPU memory to allocation."
);
return
realloc_mem
;
}
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
,
cudaStream_t
stream
)
{
PADDLE_ENFORCE
(
cudaMemcpyAsync
(
dst
,
src
,
count
,
kind
,
stream
),
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
953214ad
...
...
@@ -66,12 +66,6 @@ size_t GpuMinChunkSize();
//! Get the maximum chunk size for GPU buddy allocator.
size_t
GpuMaxChunkSize
();
//! Get init chunk size for GPU buddy allocator.
size_t
GpuFirstAllocateChunkSize
();
//! Get reallocate chunk size for GPU buddy allocator.
size_t
GpuReAllocateChunkSize
();
//! Copy memory from address src to dst asynchronously.
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
,
cudaStream_t
stream
);
...
...
paddle/fluid/platform/temporary_allocator.cc
浏览文件 @
953214ad
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64
(
limit_of_tmp_allocation
,
-
1
,
...
...
paddle/fluid/platform/temporary_allocator.h
浏览文件 @
953214ad
...
...
@@ -16,6 +16,7 @@
#include <condition_variable> // NOLINT
#include <deque>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
953214ad
...
...
@@ -39,6 +39,7 @@ limitations under the License. */
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
...
...
@@ -133,6 +134,9 @@ PYBIND11_MODULE(core, m) {
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
paddle
::
memory
::
allocation
::
UseMultiBinBufferedAllocatorGFlags
();
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
...
...
paddle/fluid/string/printf.h
浏览文件 @
953214ad
...
...
@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf
(
std
::
cout
,
fmt
,
args
...);
}
template
<
typename
T
>
std
::
string
HumanReadableSize
(
T
size
)
{
inline
std
::
string
HumanReadableSize
(
double
f_size
)
{
size_t
i
=
0
;
double
f_size
=
static_cast
<
double
>
(
size
);
double
orig
=
f_size
;
const
std
::
vector
<
std
::
string
>
units
(
{
"B"
,
"kB"
,
"MB"
,
"GB"
,
"TB"
,
"PB"
,
"EB"
,
"ZB"
,
"YB"
});
while
(
f_size
>
1024
)
{
while
(
f_size
>
=
1024
)
{
f_size
/=
1024
;
i
++
;
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
953214ad
...
...
@@ -130,7 +130,8 @@ def __bootstrap__():
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'enable_buffered_allocator'
,
'buffered_allocator_excess_times'
,
'reader_queue_speed_test_mode'
,
'buffered_allocator_excess_times'
,
'buffered_allocator_division_plan_path'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'multiple_of_cupti_buffer_size'
,
'enable_subgraph_optimize'
,
...
...
@@ -163,7 +164,6 @@ def __bootstrap__():
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'fraction_of_gpu_memory_to_use'
,
'cudnn_deterministic'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'cudnn_exhaustive_search'
,
'memory_optimize_debug'
,
'selected_gpus'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录