Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
644e8af4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
644e8af4
编写于
3月 27, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
3月 27, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16424 from sneaxiy/fix_allocator_bug
Fix allocator bug
上级
679a4c28
318072c2
变更
31
显示空白变更内容
内联
并排
Showing
31 changed file
with
350 addition
and
223 deletion
+350
-223
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+0
-3
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+7
-16
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+3
-0
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+11
-3
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+68
-6
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+34
-16
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+11
-4
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+1
-1
paddle/fluid/memory/allocation/best_fit_allocator.h
paddle/fluid/memory/allocation/best_fit_allocator.h
+1
-1
paddle/fluid/memory/allocation/buffered_allocator.cc
paddle/fluid/memory/allocation/buffered_allocator.cc
+10
-12
paddle/fluid/memory/allocation/buffered_allocator.h
paddle/fluid/memory/allocation/buffered_allocator.h
+3
-3
paddle/fluid/memory/allocation/buffered_allocator_test.cc
paddle/fluid/memory/allocation/buffered_allocator_test.cc
+2
-1
paddle/fluid/memory/allocation/cpu_allocator.cc
paddle/fluid/memory/allocation/cpu_allocator.cc
+15
-13
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+2
-8
paddle/fluid/memory/allocation/cuda_allocator.cc
paddle/fluid/memory/allocation/cuda_allocator.cc
+5
-5
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+1
-8
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+27
-25
paddle/fluid/memory/allocation/legacy_allocator.h
paddle/fluid/memory/allocation/legacy_allocator.h
+1
-1
paddle/fluid/memory/allocation/locked_allocator.cc
paddle/fluid/memory/allocation/locked_allocator.cc
+10
-10
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+3
-3
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+91
-0
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+2
-7
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+1
-7
paddle/fluid/memory/allocation/retry_allocator.cc
paddle/fluid/memory/allocation/retry_allocator.cc
+4
-14
paddle/fluid/memory/allocation/retry_allocator.h
paddle/fluid/memory/allocation/retry_allocator.h
+8
-16
paddle/fluid/memory/allocation/zero_size_allocator.cc
paddle/fluid/memory/allocation/zero_size_allocator.cc
+10
-1
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+2
-6
paddle/fluid/platform/temporary_allocator.cc
paddle/fluid/platform/temporary_allocator.cc
+10
-18
paddle/fluid/platform/temporary_allocator.h
paddle/fluid/platform/temporary_allocator.h
+4
-11
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-0
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+2
-4
未找到文件。
paddle/fluid/framework/operator.h
浏览文件 @
644e8af4
...
...
@@ -365,9 +365,6 @@ class ExecutionContext {
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_GE
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
644e8af4
...
...
@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
if
(
WITH_GPU
)
...
...
@@ -37,30 +38,20 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_library
(
conditional_allocator SRCS conditional_allocator.cc DEPS allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS
${
AllocatorFacadeDeps
}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
${
AllocatorFacadeDeps
}
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
644e8af4
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
...
...
@@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_
->
Allocate
(
size
+
kAlignment
,
attr
);
return
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
);
}
void
FreeImpl
(
Allocation
*
allocation
)
override
{
delete
allocation
;
}
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
644e8af4
...
...
@@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr
Allocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
ptr
=
AllocateImpl
(
size
,
attr
);
ptr
->
set_a
llocator
(
this
);
ptr
->
RegisterDecoratedA
llocator
(
this
);
return
AllocationPtr
(
ptr
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
delete
allocation
;
}
void
Allocator
::
FreeImpl
(
Allocation
*
allocation
)
{
Allocator
*
allocator
=
allocation
->
TopDecoratedAllocator
();
allocator
->
Free
(
allocation
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
allocation
->
PopDecoratedAllocator
();
FreeImpl
(
allocation
);
}
const
char
*
BadAlloc
::
what
()
const
noexcept
{
return
msg_
.
c_str
();
}
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
auto
*
allocator
=
allocation
->
a
llocator
();
Allocator
*
allocator
=
allocation
->
TopDecoratedA
llocator
();
allocator
->
Free
(
allocation
);
}
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
644e8af4
...
...
@@ -15,6 +15,8 @@
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -44,13 +46,56 @@ class Allocator;
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate
* a RetryAllocator to any allocator to perform allocation retry when first
* allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
:
allocator_
(
nullptr
),
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{}
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
// Not quite sure whether we need something like gtl vector.
decorated_allocators_
.
reserve
(
8
);
}
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
Allocation
(
Allocation
&&
o
)
=
delete
;
Allocation
&
operator
=
(
Allocation
&&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
...
...
@@ -72,17 +117,31 @@ class Allocation {
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
Allocator
*
allocator
()
{
return
allocator_
;
}
virtual
~
Allocation
();
void
set_allocator
(
Allocator
*
allocator
)
{
allocator_
=
allocator
;
}
private:
const
std
::
vector
<
Allocator
*>&
DecoratedAllocators
()
const
{
return
decorated_allocators_
;
}
virtual
~
Allocation
();
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
decorated_allocators_
.
push_back
(
allocator
);
}
inline
void
PopDecoratedAllocator
()
{
decorated_allocators_
.
pop_back
();
}
inline
Allocator
*
TopDecoratedAllocator
()
{
return
decorated_allocators_
.
back
();
}
private:
Allocator
*
allocator_
;
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
std
::
vector
<
Allocator
*>
decorated_allocators_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
};
using
AllocationPtr
=
std
::
unique_ptr
<
Allocation
,
AllocationDeleter
>
;
...
...
@@ -132,9 +191,12 @@ class Allocator {
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
// This function should not be called outside
void
Free
(
Allocation
*
allocation
);
protected:
virtual
void
Free
(
Allocation
*
allocation
);
virtual
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
=
0
;
virtual
void
FreeImpl
(
Allocation
*
allocation
);
private:
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
644e8af4
...
...
@@ -17,6 +17,7 @@
#include <map>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
...
...
@@ -30,6 +31,7 @@
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
...
...
@@ -47,6 +49,17 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
return
allocator
;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class
CPUManagedAllocator
:
public
Allocator
{
public:
...
...
@@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
CreateAllocatorWithChunk
()
{
chunks_
.
emplace_back
(
raw_allocator_
->
Allocate
(
max_chunk_size_
));
auto
*
allocation
=
chunks_
.
back
().
get
();
std
::
unique
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
unique
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
std
::
shared
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
if
(
retry_time_
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time_
);
allocator
.
reset
(
retry_allocator
);
}
allocator
=
WrapRetryAllocator
(
allocator
,
retry_time_
);
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
std
::
move
(
allocator
));
}
...
...
@@ -188,13 +197,23 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
auto
strategy
=
GetAllocatorStrategy
();
switch
(
strategy
)
{
case
AllocatorStrategy
::
kLegacy
:
{
InitLegacyAllocator
();
}
else
{
break
;
}
case
AllocatorStrategy
::
kNaiveBestFit
:
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
}
}
}
...
...
@@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std
::
shared_ptr
<
Allocation
>
AllocatorFacade
::
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
).
release
(),
AllocationDeleter
());
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
));
}
AllocationPtr
AllocatorFacade
::
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
644e8af4
...
...
@@ -14,20 +14,27 @@
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]"
);
"naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit]."
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
AllocatorStrategy
GetStrategyFromFlag
()
{
return
FLAGS_allocator_strategy
==
"legacy"
?
AllocatorStrategy
::
kLegacy
:
AllocatorStrategy
::
kNaiveBestFit
;
if
(
FLAGS_allocator_strategy
==
"legacy"
)
{
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"naive_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
}
}
AllocatorStrategy
GetAllocatorStrategy
()
{
...
...
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return
num
;
}
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
void
BestFitAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
auto
*
bf_allocation
=
dynamic_cast
<
BestFitAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
bf_allocation
,
"The input allocation is not BestFitAllocation."
);
...
...
paddle/fluid/memory/allocation/best_fit_allocator.h
浏览文件 @
644e8af4
...
...
@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void
InsertFreeNode
(
const
ListIt
&
it
);
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/buffered_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -22,11 +22,11 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
BufferedAllocator
::
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
)
BufferedAllocator
::
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
)
:
underlying_allocator_
(
std
::
move
(
allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
,
"Underlying allocator of BufferedAllocator must
be unmanaged
"
);
"Underlying allocator of BufferedAllocator must
not be null
"
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
...
...
@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while
(
!
allocations_
.
empty
())
{
// free the largest
auto
it
=
--
allocations_
.
end
();
cur
+=
it
->
second
->
size
();
delete
it
->
second
.
release
(
);
underlying_allocator_
->
Free
(
it
->
second
.
release
()
);
allocations_
.
erase
(
it
);
if
(
cur
>=
size
)
return
;
}
}
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
this
->
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
BufferedAllocator
::
Free
(
Allocation
*
allocation
)
{
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
mtx_
!=
nullptr
;
}
void
BufferedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
allocations_
.
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
}
Allocation
*
BufferedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
...
...
@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if
(
it
!=
allocations_
.
end
()
&&
it
->
first
<
size
*
2
)
{
AllocationPtr
result
(
std
::
move
(
it
->
second
));
allocations_
.
erase
(
it
);
return
new
AllocationWithUnderlying
(
std
::
move
(
result
)
);
return
result
.
release
(
);
}
}
try
{
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
catch
(
BadAlloc
&
)
{
FreeCache
(
size
);
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
...
...
paddle/fluid/memory/allocation/buffered_allocator.h
浏览文件 @
644e8af4
...
...
@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_
class
BufferedAllocator
:
public
Allocator
{
public:
explicit
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
);
explicit
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
);
~
BufferedAllocator
();
...
...
@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void
FreeCache
(
size_t
size
);
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
multimap
<
size_t
,
AllocationPtr
>
allocations_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/buffered_allocator_test.cc
浏览文件 @
644e8af4
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
...
...
@@ -64,7 +65,7 @@ class StubAllocator : public Allocator {
size_t
GetFreeCount
()
const
{
return
destruct_count_
;
}
protected:
void
Free
(
Allocation
*
allocation
)
override
{
void
Free
Impl
(
Allocation
*
allocation
)
override
{
auto
*
alloc
=
dynamic_cast
<
StubAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
alloc
);
if
(
alloc
->
ptr
())
delete
[]
static_cast
<
uint8_t
*>
(
alloc
->
ptr
());
...
...
paddle/fluid/memory/allocation/cpu_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -20,25 +20,27 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
CPUAllocation
::
CPUAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CPUPlace
())
{}
bool
CPUAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUAllocation
*>
(
allocation
));
free
(
allocation
->
ptr
());
void
CPUAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
*
p
=
allocation
->
ptr
();
#ifdef _WIN32
_aligned_free
(
p
);
#else
free
(
p
);
#endif
delete
allocation
;
}
Allocation
*
CPUAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
ptr
;
auto
status
=
posix_memalign
(
&
ptr
,
kAlignment
,
size
);
if
(
UNLIKELY
(
status
)
!=
0
)
{
throw
BadAlloc
(
string
::
Sprintf
(
"Cannot allocate cpu memory %d. Errno is %d"
,
size
,
status
));
}
return
new
CPUAllocation
(
ptr
,
size
);
void
*
p
;
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
kAlignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
kAlignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
return
new
Allocation
(
p
,
size
,
platform
::
CPUPlace
());
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
644e8af4
...
...
@@ -31,19 +31,13 @@ namespace allocation {
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocator
;
class
CPUAllocation
:
public
Allocation
{
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
);
};
class
CPUAllocator
:
public
Allocator
{
public:
constexpr
static
size_t
kAlignment
=
64u
;
constexpr
static
size_t
kAlignment
=
4096UL
;
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/cuda_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -23,15 +23,14 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CUDAAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CUDAAllocator
::
Free
(
Allocation
*
allocation
)
{
void
CUDAAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
*
cuda_allocation
=
dynamic_cast
<
CUDAAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
cuda_allocation
);
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
cuda_allocation
->
place
()),
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
allocation
->
place
()),
place_
);
PADDLE_ENFORCE
(
cudaFree
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CUDAAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
void
*
ptr
;
...
...
@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s"
,
size
,
place_
.
device
,
status
,
cudaGetErrorString
(
status
)));
}
return
new
CUDA
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
return
new
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
644e8af4
...
...
@@ -20,13 +20,6 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
};
class
CUDAAllocator
:
public
Allocator
{
public:
explicit
CUDAAllocator
(
const
platform
::
CUDAPlace
&
place
)
:
place_
(
place
)
{}
...
...
@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator {
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -134,26 +134,22 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
static
std
::
vector
<
int
>
devices
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
devices
=
platform
::
GetSelectedDevices
();
int
gpu_num
=
devices
.
size
();
allocation
::
GPUMemMonitor
.
Initialize
(
devices
.
size
());
class
GPUBuddyAllocatorList
{
public:
GPUBuddyAllocatorList
()
:
allocators_
(
platform
::
GetCUDADeviceCount
()),
flags_
(
platform
::
GetCUDADeviceCount
())
{
allocation
::
GPUMemMonitor
.
Initialize
(
allocators_
.
size
());
}
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
int
dev_id
=
devices
[
i
];
a_arr
[
i
]
=
nullptr
;
BuddyAllocator
*
Get
(
size_t
dev_id
)
{
PADDLE_ENFORCE
(
dev_id
<
flags_
.
size
(),
"Invalid device id %s"
,
dev_id
);
std
::
call_once
(
flags_
[
dev_id
],
[
this
,
dev_id
]
{
platform
::
SetDeviceId
(
dev_id
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
allocators_
[
dev_id
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
...
...
@@ -167,13 +163,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
}
});
return
allocators_
[
dev_id
];
}
private:
std
::
vector
<
BuddyAllocator
*>
allocators_
;
std
::
vector
<
std
::
once_flag
>
flags_
;
};
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
GPUBuddyAllocatorList
allocators
;
platform
::
SetDeviceId
(
gpu_id
);
auto
pos
=
std
::
distance
(
devices
.
begin
(),
std
::
find
(
devices
.
begin
(),
devices
.
end
(),
gpu_id
));
return
a_arr
[
pos
];
return
allocators
.
Get
(
gpu_id
);
}
#endif
...
...
@@ -192,7 +194,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
if
(
ptr
==
nullptr
&&
size
>
0
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
...
...
@@ -347,7 +349,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return
tmp_alloc
;
}
void
LegacyAllocator
::
Free
(
Allocation
*
allocation
)
{
void
LegacyAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
boost
::
apply_visitor
(
legacy
::
FreeVisitor
(
allocation
->
ptr
(),
allocation
->
size
()),
allocation
->
place
());
...
...
paddle/fluid/memory/allocation/legacy_allocator.h
浏览文件 @
644e8af4
...
...
@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
private:
platform
::
Place
place_
;
...
...
paddle/fluid/memory/allocation/locked_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -14,8 +14,10 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
...
...
@@ -23,26 +25,24 @@ namespace allocation {
bool
LockedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
LockedAllocator
::
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
)
std
::
shared_ptr
<
Allocator
>
underlying_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlying_allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
);
if
(
!
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
}
void
LockedAllocator
::
Free
(
Allocation
*
allocation
)
{
{
void
LockedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
// Destroy inner allocation
}
delete
allocation
;
underlying_allocator_
->
Free
(
allocation
);
}
Allocation
*
LockedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
644e8af4
...
...
@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
Allocator
{
public:
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
);
explicit
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
);
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
0 → 100644
浏览文件 @
644e8af4
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
size
);
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
size
);
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -20,20 +20,15 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CPUPinnedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUPinnedAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUPinnedAllocation
*>
(
allocation
));
void
CPUPinnedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE
(
cudaFreeHost
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CPUPinnedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void
*
ptr
;
PADDLE_ENFORCE
(
cudaHostAlloc
(
&
ptr
,
size
,
cudaHostAllocPortable
));
return
new
CPUPinnedAllocation
(
ptr
,
size
);
return
new
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
()
);
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
644e8af4
...
...
@@ -20,18 +20,12 @@ namespace memory {
namespace
allocation
{
// Allocator uses `cudaHostAlloc`
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
())
{}
};
class
CPUPinnedAllocator
:
public
Allocator
{
public:
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
...
...
paddle/fluid/memory/allocation/retry_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -18,25 +18,15 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
RetryAllocator
::
IsAllocThreadSafe
()
const
{
return
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
RetryAllocator
::
Free
(
Allocation
*
allocation
)
{
void
RetryAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
// Delete underlying allocation first.
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
{
// notify all waited allocators, they can try to allocate memory after free.
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
underlying_allocator_
->
Free
(
allocation
);
cv_
.
notify_all
();
}
delete
allocation
;
}
Allocation
*
RetryAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
alloc_func
=
[
&
,
this
]()
{
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
};
// In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time
...
...
paddle/fluid/memory/allocation/retry_allocator.h
浏览文件 @
644e8af4
...
...
@@ -18,38 +18,32 @@
#include <condition_variable> // NOLINT
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
RetryAllocator
;
class
RetryAllocator
:
public
Allocator
{
public:
RetryAllocator
(
std
::
unique_ptr
<
Allocator
>&&
allocator
,
size_t
retry_ms
)
RetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
size_t
retry_ms
)
:
underlying_allocator_
(
std
::
move
(
allocator
)),
retry_time_
(
retry_ms
)
{
EnforceCheck
();
}
bool
IsAllocThreadSafe
()
const
override
;
private:
void
EnforceCheck
()
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
.
get
()
,
"UnderlyingAllocator of RetryAllocator must
be UnmanagedAllocator
"
);
underlying_allocator_
,
"UnderlyingAllocator of RetryAllocator must
not be null
"
);
PADDLE_ENFORCE
(
underlying_allocator_
->
IsAllocThreadSafe
(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"
);
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
chrono
::
milliseconds
retry_time_
;
std
::
mutex
mutex_
;
std
::
condition_variable
cv_
;
...
...
@@ -57,8 +51,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
friend
class
RetryAllocation
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/zero_size_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation
*
ZeroSizeAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
size
==
0
)
{
return
new
ZeroSizeAllocation
(
place_
);
return
new
Allocation
(
nullptr
,
0
,
place_
);
}
else
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
void
ZeroSizeAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
if
(
allocation
->
size
()
==
0
)
{
delete
allocation
;
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
644e8af4
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
...
...
@@ -23,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
:
Allocation
(
nullptr
,
0
,
p
)
{}
};
class
ZeroSizeAllocator
:
public
Allocator
{
public:
ZeroSizeAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
...
...
@@ -39,6 +34,7 @@ class ZeroSizeAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
...
...
paddle/fluid/platform/temporary_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64
(
limit_of_tmp_allocation
,
-
1
,
...
...
@@ -29,38 +30,31 @@ namespace paddle {
namespace
platform
{
namespace
alloc
=
memory
::
allocation
;
TemporaryAllocation
::
TemporaryAllocation
(
alloc
::
AllocationPtr
&&
underlying_allocation
)
:
Allocation
(
underlying_allocation
->
ptr
(),
underlying_allocation
->
size
(),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
TemporaryAllocator
::
TemporaryAllocator
(
platform
::
Place
place
)
:
place_
(
place
)
{
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
}
bool
TemporaryAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
TemporaryAllocator
::
Release
(
const
std
::
function
<
void
()
>
&
callback
)
{
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
Temporary
Allocation
*>>
t_allocations
;
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>>
t_allocations
;
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx_
);
callback
();
t_allocations
.
swap
(
temp_mem_map_
);
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
wait_delete_mem_
=
0
;
}
alloc
::
AllocationDeleter
deleter
;
for
(
auto
tmp
:
*
t_allocations
)
{
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
tmp
.
second
->
ptr
()
<<
" size: "
<<
tmp
.
second
->
size
();
delete
tmp
.
second
;
delete
r
(
tmp
.
second
)
;
}
}
void
TemporaryAllocator
::
Free
(
alloc
::
Allocation
*
allocation
)
{
auto
*
temp_allocation
=
dynamic_cast
<
TemporaryAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
temp_allocation
);
void
TemporaryAllocator
::
FreeImpl
(
alloc
::
Allocation
*
temp_allocation
)
{
if
(
platform
::
is_gpu_place
(
temp_allocation
->
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
temp_allocation
->
place
(),
place_
),
"The place should be the same."
);
...
...
@@ -84,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
}
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
temp_allocation
->
ptr
()
<<
" size: "
<<
temp_allocation
->
size
();
delete
temp_allocation
;
alloc
::
AllocationDeleter
()(
temp_allocation
)
;
}
size_t
TemporaryAllocator
::
TemporaryAllocationQueueSize
()
{
...
...
@@ -119,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
}
// If not find the the available allocation, get allocation from
// AllocatorFacadeInstance.
auto
raw_allocation
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
auto
temp_mem
=
new
TemporaryAllocation
(
std
::
move
(
raw_allocation
));
auto
temp_mem
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
VLOG
(
10
)
<<
"Alloc temporary allocation: "
<<
temp_mem
->
ptr
()
<<
": "
<<
size
;
return
temp_mem
;
return
temp_mem
.
release
()
;
}
}
// namespace platform
...
...
paddle/fluid/platform/temporary_allocator.h
浏览文件 @
644e8af4
...
...
@@ -16,20 +16,13 @@
#include <condition_variable> // NOLINT
#include <deque>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
platform
{
class
TemporaryAllocation
:
public
memory
::
allocation
::
Allocation
{
public:
explicit
TemporaryAllocation
(
memory
::
allocation
::
AllocationPtr
&&
underlying_allocation
);
memory
::
allocation
::
AllocationPtr
underlying_allocation_
;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
...
...
@@ -56,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void
SetCallback
(
const
std
::
function
<
void
()
>
&
callback
);
protected:
void
Free
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
void
Free
Impl
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
memory
::
allocation
::
Allocation
*
AllocateImpl
(
size_t
size
,
memory
::
allocation
::
Allocator
::
Attr
attr
)
override
;
...
...
@@ -65,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform
::
Place
place_
;
// When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately.
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
TemporaryAllocation
*>>
temp_mem_map_
{
nullptr
};
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
memory
::
allocation
::
Allocation
*>>
temp_mem_map_
{
nullptr
};
std
::
mutex
mtx_
;
size_t
wait_delete_mem_
{
0
};
std
::
function
<
void
()
>
callback_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
644e8af4
...
...
@@ -324,6 +324,7 @@ PYBIND11_MODULE(core, m) {
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"_clear"
,
&
Tensor
::
clear
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
...
...
paddle/fluid/string/printf.h
浏览文件 @
644e8af4
...
...
@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf
(
std
::
cout
,
fmt
,
args
...);
}
template
<
typename
T
>
std
::
string
HumanReadableSize
(
T
size
)
{
inline
std
::
string
HumanReadableSize
(
double
f_size
)
{
size_t
i
=
0
;
double
f_size
=
static_cast
<
double
>
(
size
);
double
orig
=
f_size
;
const
std
::
vector
<
std
::
string
>
units
(
{
"B"
,
"kB"
,
"MB"
,
"GB"
,
"TB"
,
"PB"
,
"EB"
,
"ZB"
,
"YB"
});
while
(
f_size
>
1024
)
{
while
(
f_size
>
=
1024
)
{
f_size
/=
1024
;
i
++
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录