Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
644e8af4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
644e8af4
编写于
3月 27, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
3月 27, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16424 from sneaxiy/fix_allocator_bug
Fix allocator bug
上级
679a4c28
318072c2
变更
31
隐藏空白更改
内联
并排
Showing
31 changed file
with
350 addition
and
223 deletion
+350
-223
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+0
-3
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+7
-16
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+3
-0
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+11
-3
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+68
-6
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+34
-16
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+11
-4
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+1
-1
paddle/fluid/memory/allocation/best_fit_allocator.h
paddle/fluid/memory/allocation/best_fit_allocator.h
+1
-1
paddle/fluid/memory/allocation/buffered_allocator.cc
paddle/fluid/memory/allocation/buffered_allocator.cc
+10
-12
paddle/fluid/memory/allocation/buffered_allocator.h
paddle/fluid/memory/allocation/buffered_allocator.h
+3
-3
paddle/fluid/memory/allocation/buffered_allocator_test.cc
paddle/fluid/memory/allocation/buffered_allocator_test.cc
+2
-1
paddle/fluid/memory/allocation/cpu_allocator.cc
paddle/fluid/memory/allocation/cpu_allocator.cc
+15
-13
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+2
-8
paddle/fluid/memory/allocation/cuda_allocator.cc
paddle/fluid/memory/allocation/cuda_allocator.cc
+5
-5
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+1
-8
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+27
-25
paddle/fluid/memory/allocation/legacy_allocator.h
paddle/fluid/memory/allocation/legacy_allocator.h
+1
-1
paddle/fluid/memory/allocation/locked_allocator.cc
paddle/fluid/memory/allocation/locked_allocator.cc
+10
-10
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+3
-3
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+91
-0
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+2
-7
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+1
-7
paddle/fluid/memory/allocation/retry_allocator.cc
paddle/fluid/memory/allocation/retry_allocator.cc
+4
-14
paddle/fluid/memory/allocation/retry_allocator.h
paddle/fluid/memory/allocation/retry_allocator.h
+8
-16
paddle/fluid/memory/allocation/zero_size_allocator.cc
paddle/fluid/memory/allocation/zero_size_allocator.cc
+10
-1
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+2
-6
paddle/fluid/platform/temporary_allocator.cc
paddle/fluid/platform/temporary_allocator.cc
+10
-18
paddle/fluid/platform/temporary_allocator.h
paddle/fluid/platform/temporary_allocator.h
+4
-11
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-0
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+2
-4
未找到文件。
paddle/fluid/framework/operator.h
浏览文件 @
644e8af4
...
...
@@ -365,9 +365,6 @@ class ExecutionContext {
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_GE
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
644e8af4
...
...
@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
if
(
WITH_GPU
)
...
...
@@ -37,30 +38,20 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_library
(
conditional_allocator SRCS conditional_allocator.cc DEPS allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS
${
AllocatorFacadeDeps
}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
${
AllocatorFacadeDeps
}
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
644e8af4
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
...
...
@@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_
->
Allocate
(
size
+
kAlignment
,
attr
);
return
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
);
}
void
FreeImpl
(
Allocation
*
allocation
)
override
{
delete
allocation
;
}
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
644e8af4
...
...
@@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr
Allocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
ptr
=
AllocateImpl
(
size
,
attr
);
ptr
->
set_a
llocator
(
this
);
ptr
->
RegisterDecoratedA
llocator
(
this
);
return
AllocationPtr
(
ptr
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
delete
allocation
;
}
void
Allocator
::
FreeImpl
(
Allocation
*
allocation
)
{
Allocator
*
allocator
=
allocation
->
TopDecoratedAllocator
();
allocator
->
Free
(
allocation
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
allocation
->
PopDecoratedAllocator
();
FreeImpl
(
allocation
);
}
const
char
*
BadAlloc
::
what
()
const
noexcept
{
return
msg_
.
c_str
();
}
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
auto
*
allocator
=
allocation
->
a
llocator
();
Allocator
*
allocator
=
allocation
->
TopDecoratedA
llocator
();
allocator
->
Free
(
allocation
);
}
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
644e8af4
...
...
@@ -15,6 +15,8 @@
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -44,13 +46,56 @@ class Allocator;
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate
* a RetryAllocator to any allocator to perform allocation retry when first
* allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
:
allocator_
(
nullptr
),
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{}
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
// Not quite sure whether we need something like gtl vector.
decorated_allocators_
.
reserve
(
8
);
}
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
Allocation
(
Allocation
&&
o
)
=
delete
;
Allocation
&
operator
=
(
Allocation
&&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
...
...
@@ -72,17 +117,31 @@ class Allocation {
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
Allocator
*
allocator
()
{
return
allocator_
;
}
virtual
~
Allocation
();
void
set_allocator
(
Allocator
*
allocator
)
{
allocator_
=
allocator
;
}
private:
const
std
::
vector
<
Allocator
*>&
DecoratedAllocators
()
const
{
return
decorated_allocators_
;
}
virtual
~
Allocation
();
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
decorated_allocators_
.
push_back
(
allocator
);
}
inline
void
PopDecoratedAllocator
()
{
decorated_allocators_
.
pop_back
();
}
inline
Allocator
*
TopDecoratedAllocator
()
{
return
decorated_allocators_
.
back
();
}
private:
Allocator
*
allocator_
;
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
std
::
vector
<
Allocator
*>
decorated_allocators_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
};
using
AllocationPtr
=
std
::
unique_ptr
<
Allocation
,
AllocationDeleter
>
;
...
...
@@ -132,9 +191,12 @@ class Allocator {
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
// This function should not be called outside
void
Free
(
Allocation
*
allocation
);
protected:
virtual
void
Free
(
Allocation
*
allocation
);
virtual
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
=
0
;
virtual
void
FreeImpl
(
Allocation
*
allocation
);
private:
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
644e8af4
...
...
@@ -17,6 +17,7 @@
#include <map>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
...
...
@@ -30,6 +31,7 @@
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
...
...
@@ -47,6 +49,17 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
return
allocator
;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class
CPUManagedAllocator
:
public
Allocator
{
public:
...
...
@@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
CreateAllocatorWithChunk
()
{
chunks_
.
emplace_back
(
raw_allocator_
->
Allocate
(
max_chunk_size_
));
auto
*
allocation
=
chunks_
.
back
().
get
();
std
::
unique
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
unique
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
std
::
shared
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
if
(
retry_time_
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time_
);
allocator
.
reset
(
retry_allocator
);
}
allocator
=
WrapRetryAllocator
(
allocator
,
retry_time_
);
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
std
::
move
(
allocator
));
}
...
...
@@ -188,13 +197,23 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
InitLegacyAllocator
();
}
else
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
auto
strategy
=
GetAllocatorStrategy
();
switch
(
strategy
)
{
case
AllocatorStrategy
::
kLegacy
:
{
InitLegacyAllocator
();
break
;
}
case
AllocatorStrategy
::
kNaiveBestFit
:
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
}
}
}
...
...
@@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
std
::
shared_ptr
<
Allocation
>
AllocatorFacade
::
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
).
release
(),
AllocationDeleter
());
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
));
}
AllocationPtr
AllocatorFacade
::
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
644e8af4
...
...
@@ -14,20 +14,27 @@
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"New means the experimental allocators of Fluid. in [legacy, new]"
);
"naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit]."
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
AllocatorStrategy
GetStrategyFromFlag
()
{
return
FLAGS_allocator_strategy
==
"legacy"
?
AllocatorStrategy
::
kLegacy
:
AllocatorStrategy
::
kNaiveBestFit
;
if
(
FLAGS_allocator_strategy
==
"legacy"
)
{
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"naive_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
}
}
AllocatorStrategy
GetAllocatorStrategy
()
{
...
...
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return
num
;
}
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
void
BestFitAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
auto
*
bf_allocation
=
dynamic_cast
<
BestFitAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
bf_allocation
,
"The input allocation is not BestFitAllocation."
);
...
...
paddle/fluid/memory/allocation/best_fit_allocator.h
浏览文件 @
644e8af4
...
...
@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void
InsertFreeNode
(
const
ListIt
&
it
);
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/buffered_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -22,11 +22,11 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
BufferedAllocator
::
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
)
BufferedAllocator
::
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
)
:
underlying_allocator_
(
std
::
move
(
allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
,
"Underlying allocator of BufferedAllocator must
be unmanaged
"
);
"Underlying allocator of BufferedAllocator must
not be null
"
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
...
...
@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while
(
!
allocations_
.
empty
())
{
// free the largest
auto
it
=
--
allocations_
.
end
();
cur
+=
it
->
second
->
size
();
delete
it
->
second
.
release
(
);
underlying_allocator_
->
Free
(
it
->
second
.
release
()
);
allocations_
.
erase
(
it
);
if
(
cur
>=
size
)
return
;
}
}
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
this
->
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
BufferedAllocator
::
Free
(
Allocation
*
allocation
)
{
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
mtx_
!=
nullptr
;
}
void
BufferedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
allocations_
.
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
}
Allocation
*
BufferedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
...
...
@@ -61,17 +61,15 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if
(
it
!=
allocations_
.
end
()
&&
it
->
first
<
size
*
2
)
{
AllocationPtr
result
(
std
::
move
(
it
->
second
));
allocations_
.
erase
(
it
);
return
new
AllocationWithUnderlying
(
std
::
move
(
result
)
);
return
result
.
release
(
);
}
}
try
{
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
catch
(
BadAlloc
&
)
{
FreeCache
(
size
);
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
...
...
paddle/fluid/memory/allocation/buffered_allocator.h
浏览文件 @
644e8af4
...
...
@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_
class
BufferedAllocator
:
public
Allocator
{
public:
explicit
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
);
explicit
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
);
~
BufferedAllocator
();
...
...
@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void
FreeCache
(
size_t
size
);
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
multimap
<
size_t
,
AllocationPtr
>
allocations_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/buffered_allocator_test.cc
浏览文件 @
644e8af4
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
...
...
@@ -64,7 +65,7 @@ class StubAllocator : public Allocator {
size_t
GetFreeCount
()
const
{
return
destruct_count_
;
}
protected:
void
Free
(
Allocation
*
allocation
)
override
{
void
Free
Impl
(
Allocation
*
allocation
)
override
{
auto
*
alloc
=
dynamic_cast
<
StubAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
alloc
);
if
(
alloc
->
ptr
())
delete
[]
static_cast
<
uint8_t
*>
(
alloc
->
ptr
());
...
...
paddle/fluid/memory/allocation/cpu_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -20,25 +20,27 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
CPUAllocation
::
CPUAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CPUPlace
())
{}
bool
CPUAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUAllocation
*>
(
allocation
));
free
(
allocation
->
ptr
());
void
CPUAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
*
p
=
allocation
->
ptr
();
#ifdef _WIN32
_aligned_free
(
p
);
#else
free
(
p
);
#endif
delete
allocation
;
}
Allocation
*
CPUAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
ptr
;
auto
status
=
posix_memalign
(
&
ptr
,
kAlignment
,
size
);
if
(
UNLIKELY
(
status
)
!=
0
)
{
throw
BadAlloc
(
string
::
Sprintf
(
"Cannot allocate cpu memory %d. Errno is %d"
,
size
,
status
));
}
return
new
CPUAllocation
(
ptr
,
size
);
void
*
p
;
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
kAlignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
kAlignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
return
new
Allocation
(
p
,
size
,
platform
::
CPUPlace
());
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
644e8af4
...
...
@@ -31,19 +31,13 @@ namespace allocation {
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocator
;
class
CPUAllocation
:
public
Allocation
{
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
);
};
class
CPUAllocator
:
public
Allocator
{
public:
constexpr
static
size_t
kAlignment
=
64u
;
constexpr
static
size_t
kAlignment
=
4096UL
;
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/cuda_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -23,15 +23,14 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CUDAAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CUDAAllocator
::
Free
(
Allocation
*
allocation
)
{
void
CUDAAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
*
cuda_allocation
=
dynamic_cast
<
CUDAAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
cuda_allocation
);
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
cuda_allocation
->
place
()),
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
allocation
->
place
()),
place_
);
PADDLE_ENFORCE
(
cudaFree
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CUDAAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
void
*
ptr
;
...
...
@@ -41,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s"
,
size
,
place_
.
device
,
status
,
cudaGetErrorString
(
status
)));
}
return
new
CUDA
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
return
new
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
644e8af4
...
...
@@ -20,13 +20,6 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
};
class
CUDAAllocator
:
public
Allocator
{
public:
explicit
CUDAAllocator
(
const
platform
::
CUDAPlace
&
place
)
:
place_
(
place
)
{}
...
...
@@ -35,7 +28,7 @@ class CUDAAllocator : public Allocator {
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -134,26 +134,22 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
static
std
::
vector
<
int
>
devices
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
devices
=
platform
::
GetSelectedDevices
();
int
gpu_num
=
devices
.
size
();
allocation
::
GPUMemMonitor
.
Initialize
(
devices
.
size
());
class
GPUBuddyAllocatorList
{
public:
GPUBuddyAllocatorList
()
:
allocators_
(
platform
::
GetCUDADeviceCount
()),
flags_
(
platform
::
GetCUDADeviceCount
())
{
allocation
::
GPUMemMonitor
.
Initialize
(
allocators_
.
size
());
}
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
int
dev_id
=
devices
[
i
];
a_arr
[
i
]
=
nullptr
;
BuddyAllocator
*
Get
(
size_t
dev_id
)
{
PADDLE_ENFORCE
(
dev_id
<
flags_
.
size
(),
"Invalid device id %s"
,
dev_id
);
std
::
call_once
(
flags_
[
dev_id
],
[
this
,
dev_id
]
{
platform
::
SetDeviceId
(
dev_id
);
a
_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(
),
platform
::
GpuMaxChunkSize
());
a
llocators_
[
dev_id
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)
),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
...
...
@@ -167,13 +163,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
}
});
});
return
allocators_
[
dev_id
];
}
private:
std
::
vector
<
BuddyAllocator
*>
allocators_
;
std
::
vector
<
std
::
once_flag
>
flags_
;
};
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
GPUBuddyAllocatorList
allocators
;
platform
::
SetDeviceId
(
gpu_id
);
auto
pos
=
std
::
distance
(
devices
.
begin
(),
std
::
find
(
devices
.
begin
(),
devices
.
end
(),
gpu_id
));
return
a_arr
[
pos
];
return
allocators
.
Get
(
gpu_id
);
}
#endif
...
...
@@ -192,7 +194,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
if
(
ptr
==
nullptr
&&
size
>
0
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
...
...
@@ -347,7 +349,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return
tmp_alloc
;
}
void
LegacyAllocator
::
Free
(
Allocation
*
allocation
)
{
void
LegacyAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
boost
::
apply_visitor
(
legacy
::
FreeVisitor
(
allocation
->
ptr
(),
allocation
->
size
()),
allocation
->
place
());
...
...
paddle/fluid/memory/allocation/legacy_allocator.h
浏览文件 @
644e8af4
...
...
@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
private:
platform
::
Place
place_
;
...
...
paddle/fluid/memory/allocation/locked_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -14,8 +14,10 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
...
...
@@ -23,26 +25,24 @@ namespace allocation {
bool
LockedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
LockedAllocator
::
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
)
std
::
shared_ptr
<
Allocator
>
underlying_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlying_allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
);
if
(
!
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
}
void
LockedAllocator
::
Free
(
Allocation
*
allocation
)
{
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
// Destroy inner allocation
}
delete
allocation
;
void
LockedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
underlying_allocator_
->
Free
(
allocation
);
}
Allocation
*
LockedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
644e8af4
...
...
@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
Allocator
{
public:
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
);
explicit
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
);
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
0 → 100644
浏览文件 @
644e8af4
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
size
);
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
size
);
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -20,20 +20,15 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CPUPinnedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUPinnedAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUPinnedAllocation
*>
(
allocation
));
void
CPUPinnedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE
(
cudaFreeHost
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CPUPinnedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void
*
ptr
;
PADDLE_ENFORCE
(
cudaHostAlloc
(
&
ptr
,
size
,
cudaHostAllocPortable
));
return
new
CPUPinnedAllocation
(
ptr
,
size
);
return
new
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
()
);
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
644e8af4
...
...
@@ -20,18 +20,12 @@ namespace memory {
namespace
allocation
{
// Allocator uses `cudaHostAlloc`
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
())
{}
};
class
CPUPinnedAllocator
:
public
Allocator
{
public:
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
...
...
paddle/fluid/memory/allocation/retry_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -18,25 +18,15 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
RetryAllocator
::
IsAllocThreadSafe
()
const
{
return
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
RetryAllocator
::
Free
(
Allocation
*
allocation
)
{
void
RetryAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
// Delete underlying allocation first.
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
{
// notify all waited allocators, they can try to allocate memory after free.
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
cv_
.
notify_all
();
}
delete
allocation
;
underlying_allocator_
->
Free
(
allocation
);
cv_
.
notify_all
();
}
Allocation
*
RetryAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
alloc_func
=
[
&
,
this
]()
{
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
};
// In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time
...
...
paddle/fluid/memory/allocation/retry_allocator.h
浏览文件 @
644e8af4
...
...
@@ -18,38 +18,32 @@
#include <condition_variable> // NOLINT
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
RetryAllocator
;
class
RetryAllocator
:
public
Allocator
{
public:
RetryAllocator
(
std
::
unique_ptr
<
Allocator
>&&
allocator
,
size_t
retry_ms
)
RetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
size_t
retry_ms
)
:
underlying_allocator_
(
std
::
move
(
allocator
)),
retry_time_
(
retry_ms
)
{
EnforceCheck
();
}
bool
IsAllocThreadSafe
()
const
override
;
private:
void
EnforceCheck
()
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
.
get
()
,
"UnderlyingAllocator of RetryAllocator must
be UnmanagedAllocator
"
);
underlying_allocator_
,
"UnderlyingAllocator of RetryAllocator must
not be null
"
);
PADDLE_ENFORCE
(
underlying_allocator_
->
IsAllocThreadSafe
(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"
);
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
protected:
void
Free
(
Allocation
*
allocation
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
chrono
::
milliseconds
retry_time_
;
std
::
mutex
mutex_
;
std
::
condition_variable
cv_
;
...
...
@@ -57,8 +51,6 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
friend
class
RetryAllocation
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/zero_size_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -24,11 +24,20 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation
*
ZeroSizeAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
size
==
0
)
{
return
new
ZeroSizeAllocation
(
place_
);
return
new
Allocation
(
nullptr
,
0
,
place_
);
}
else
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
void
ZeroSizeAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
if
(
allocation
->
size
()
==
0
)
{
delete
allocation
;
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
644e8af4
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
...
...
@@ -23,12 +24,6 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
:
Allocation
(
nullptr
,
0
,
p
)
{}
};
class
ZeroSizeAllocator
:
public
Allocator
{
public:
ZeroSizeAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
...
...
@@ -39,6 +34,7 @@ class ZeroSizeAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
...
...
paddle/fluid/platform/temporary_allocator.cc
浏览文件 @
644e8af4
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64
(
limit_of_tmp_allocation
,
-
1
,
...
...
@@ -29,38 +30,31 @@ namespace paddle {
namespace
platform
{
namespace
alloc
=
memory
::
allocation
;
TemporaryAllocation
::
TemporaryAllocation
(
alloc
::
AllocationPtr
&&
underlying_allocation
)
:
Allocation
(
underlying_allocation
->
ptr
(),
underlying_allocation
->
size
(),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
TemporaryAllocator
::
TemporaryAllocator
(
platform
::
Place
place
)
:
place_
(
place
)
{
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
}
bool
TemporaryAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
TemporaryAllocator
::
Release
(
const
std
::
function
<
void
()
>
&
callback
)
{
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
Temporary
Allocation
*>>
t_allocations
;
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>>
t_allocations
;
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx_
);
callback
();
t_allocations
.
swap
(
temp_mem_map_
);
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
wait_delete_mem_
=
0
;
}
alloc
::
AllocationDeleter
deleter
;
for
(
auto
tmp
:
*
t_allocations
)
{
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
tmp
.
second
->
ptr
()
<<
" size: "
<<
tmp
.
second
->
size
();
delete
tmp
.
second
;
delete
r
(
tmp
.
second
)
;
}
}
void
TemporaryAllocator
::
Free
(
alloc
::
Allocation
*
allocation
)
{
auto
*
temp_allocation
=
dynamic_cast
<
TemporaryAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
temp_allocation
);
void
TemporaryAllocator
::
FreeImpl
(
alloc
::
Allocation
*
temp_allocation
)
{
if
(
platform
::
is_gpu_place
(
temp_allocation
->
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
temp_allocation
->
place
(),
place_
),
"The place should be the same."
);
...
...
@@ -84,7 +78,7 @@ void TemporaryAllocator::Free(alloc::Allocation *allocation) {
}
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
temp_allocation
->
ptr
()
<<
" size: "
<<
temp_allocation
->
size
();
delete
temp_allocation
;
alloc
::
AllocationDeleter
()(
temp_allocation
)
;
}
size_t
TemporaryAllocator
::
TemporaryAllocationQueueSize
()
{
...
...
@@ -119,11 +113,9 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
}
// If not find the the available allocation, get allocation from
// AllocatorFacadeInstance.
auto
raw_allocation
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
auto
temp_mem
=
new
TemporaryAllocation
(
std
::
move
(
raw_allocation
));
auto
temp_mem
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
VLOG
(
10
)
<<
"Alloc temporary allocation: "
<<
temp_mem
->
ptr
()
<<
": "
<<
size
;
return
temp_mem
;
return
temp_mem
.
release
()
;
}
}
// namespace platform
...
...
paddle/fluid/platform/temporary_allocator.h
浏览文件 @
644e8af4
...
...
@@ -16,20 +16,13 @@
#include <condition_variable> // NOLINT
#include <deque>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
platform
{
class
TemporaryAllocation
:
public
memory
::
allocation
::
Allocation
{
public:
explicit
TemporaryAllocation
(
memory
::
allocation
::
AllocationPtr
&&
underlying_allocation
);
memory
::
allocation
::
AllocationPtr
underlying_allocation_
;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
...
...
@@ -56,7 +49,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void
SetCallback
(
const
std
::
function
<
void
()
>
&
callback
);
protected:
void
Free
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
void
Free
Impl
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
memory
::
allocation
::
Allocation
*
AllocateImpl
(
size_t
size
,
memory
::
allocation
::
Allocator
::
Attr
attr
)
override
;
...
...
@@ -65,8 +58,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform
::
Place
place_
;
// When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately.
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
TemporaryAllocation
*>>
temp_mem_map_
{
nullptr
};
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
memory
::
allocation
::
Allocation
*>>
temp_mem_map_
{
nullptr
};
std
::
mutex
mtx_
;
size_t
wait_delete_mem_
{
0
};
std
::
function
<
void
()
>
callback_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
644e8af4
...
...
@@ -324,6 +324,7 @@ PYBIND11_MODULE(core, m) {
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"_clear"
,
&
Tensor
::
clear
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
...
...
paddle/fluid/string/printf.h
浏览文件 @
644e8af4
...
...
@@ -105,14 +105,12 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf
(
std
::
cout
,
fmt
,
args
...);
}
template
<
typename
T
>
std
::
string
HumanReadableSize
(
T
size
)
{
inline
std
::
string
HumanReadableSize
(
double
f_size
)
{
size_t
i
=
0
;
double
f_size
=
static_cast
<
double
>
(
size
);
double
orig
=
f_size
;
const
std
::
vector
<
std
::
string
>
units
(
{
"B"
,
"kB"
,
"MB"
,
"GB"
,
"TB"
,
"PB"
,
"EB"
,
"ZB"
,
"YB"
});
while
(
f_size
>
1024
)
{
while
(
f_size
>
=
1024
)
{
f_size
/=
1024
;
i
++
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录