Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
64d94596
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
64d94596
编写于
10月 15, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix allocator_facade bug
上级
e2780623
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
62 addition
and
29 deletion
+62
-29
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+20
-4
paddle/fluid/memory/allocation/auto_increment_allocator.h
paddle/fluid/memory/allocation/auto_increment_allocator.h
+38
-22
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+4
-3
未找到文件。
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
64d94596
...
@@ -74,10 +74,24 @@ class CUDAManagedAllocator : public ManagedAllocator {
...
@@ -74,10 +74,24 @@ class CUDAManagedAllocator : public ManagedAllocator {
explicit
CUDAManagedAllocator
(
int
dev_id
)
{
explicit
CUDAManagedAllocator
(
int
dev_id
)
{
platform
::
CUDADeviceGuard
guard
(
dev_id
);
platform
::
CUDADeviceGuard
guard
(
dev_id
);
max_chunk_size_
=
platform
::
GpuMaxChunkSize
();
max_chunk_size_
=
platform
::
GpuMaxChunkSize
();
raw_allocator_
=
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
raw_allocator_
=
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
new
CUDAAllocator
(
platform
::
CUDAPlace
(
dev_id
))));
new
CUDAAllocator
(
platform
::
CUDAPlace
(
dev_id
))));
default_allocator_
=
std
::
make_shared
<
AutoIncrementAllocator
>
(
[
this
]
{
return
std
::
move
(
BestFitAllocatorCreator
());
});
if
(
max_chunk_size_
==
0
)
{
default_allocator_
=
raw_allocator_
;
}
else
{
size_t
available
,
total
;
platform
::
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
capacity
=
available
/
max_chunk_size_
;
if
(
capacity
==
1
)
{
default_allocator_
=
BestFitAllocatorCreator
();
}
else
{
default_allocator_
=
std
::
make_shared
<
AutoIncrementAllocator
>
(
[
this
]
{
return
std
::
move
(
BestFitAllocatorCreator
());
},
capacity
);
}
}
auto
*
cond_allocator
=
new
ConditionalAllocator
();
auto
*
cond_allocator
=
new
ConditionalAllocator
();
cond_allocator
cond_allocator
...
@@ -110,9 +124,11 @@ class CUDAManagedAllocator : public ManagedAllocator {
...
@@ -110,9 +124,11 @@ class CUDAManagedAllocator : public ManagedAllocator {
chunks_
.
emplace_back
(
raw_allocator_
->
Allocate
(
max_chunk_size_
));
chunks_
.
emplace_back
(
raw_allocator_
->
Allocate
(
max_chunk_size_
));
auto
*
allocation
=
chunks_
.
back
().
get
();
auto
*
allocation
=
chunks_
.
back
().
get
();
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
NaiveManagedAllocator
::
Create
(
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
std
::
unique_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
new
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))))));
}
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
private:
private:
...
...
paddle/fluid/memory/allocation/auto_increment_allocator.h
浏览文件 @
64d94596
...
@@ -40,13 +40,18 @@ namespace allocation {
...
@@ -40,13 +40,18 @@ namespace allocation {
// allocator. The allocation requests from many threads may be dispatched
// allocator. The allocation requests from many threads may be dispatched
// to the same underlying allocator. So the underlying allocator must be
// to the same underlying allocator. So the underlying allocator must be
// thread safe.
// thread safe.
//
// NOTE(zjl): Add capacity parameters to constructor. A high-performance
// thread-safe std::vector with varying size is hard to implement.
// Fortunately, we can get the total GPU memory and each chunk size.
// Therefore, we can get the suitable capacity of AutoIncrementAllocator.
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
public:
public:
// Creator is the method to create ManagedAllocator
// Creator is the method to create ManagedAllocator
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
explicit
AutoIncrementAllocator
(
AllocatorCreator
&&
creator
)
explicit
AutoIncrementAllocator
(
AllocatorCreator
&&
creator
,
size_t
capacity
)
:
creator_
(
std
::
move
(
creator
)),
prev_success_allocator_
{
0
}
{}
:
creator_
(
std
::
move
(
creator
)),
underlying_allocators_
(
capacity
)
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
bool
IsAllocThreadSafe
()
const
override
;
bool
IsAllocThreadSafe
()
const
override
;
...
@@ -56,15 +61,13 @@ class AutoIncrementAllocator : public ManagedAllocator {
...
@@ -56,15 +61,13 @@ class AutoIncrementAllocator : public ManagedAllocator {
template
<
typename
Callback
>
template
<
typename
Callback
>
inline
typename
std
::
result_of
<
Callback
(
ManagedAllocator
&
)
>::
type
inline
typename
std
::
result_of
<
Callback
(
ManagedAllocator
&
)
>::
type
InvokeOrCreateUnderlyingAllocator
(
Callback
callback
)
{
InvokeOrCreateUnderlyingAllocator
(
Callback
callback
)
{
std
::
shared_ptr
<
std
::
vector
<
AllocatorCreator
::
result_type
>>
underlying_allocators
=
underlying_allocators_
;
size_t
retry_count
=
underlying_allocators
->
size
();
size_t
allocator_num
=
retry_count
;
auto
cur
=
prev_success_allocator_
.
load
();
auto
cur
=
prev_success_allocator_
.
load
();
size_t
retry_count
=
allocator_num_
.
load
();
size_t
allocator_num
=
retry_count
;
while
(
retry_count
--
>
0
)
{
// until there retry count is zero
while
(
retry_count
--
>
0
)
{
// until there retry count is zero
try
{
try
{
auto
res
=
callback
(
*
((
*
underlying_allocators
)[
cur
])
);
auto
res
=
callback
(
*
underlying_allocators_
[
cur
]
);
prev_success_allocator_
.
store
(
cur
)
;
prev_success_allocator_
=
cur
;
return
std
::
move
(
res
);
return
std
::
move
(
res
);
}
catch
(
BadAlloc
&
)
{
}
catch
(
BadAlloc
&
)
{
if
(
++
cur
>=
allocator_num
)
{
if
(
++
cur
>=
allocator_num
)
{
...
@@ -77,20 +80,34 @@ class AutoIncrementAllocator : public ManagedAllocator {
...
@@ -77,20 +80,34 @@ class AutoIncrementAllocator : public ManagedAllocator {
}
}
// No suitable allocator
// No suitable allocator
// This happens when the first allocator is exhausted and
// there are more than 1 allocation requests
// In this situation, the first allocation request would success
// and the second allocation request would fail if we do not use
// the newly created allocator by the first allocation request.
for
(
size_t
new_allocator_num
=
allocator_num_
.
load
();
allocator_num
<
new_allocator_num
;
++
allocator_num
)
{
try
{
auto
ret
=
callback
(
*
underlying_allocators_
[
allocator_num
]);
prev_success_allocator_
=
allocator_num
;
return
std
::
move
(
ret
);
}
catch
(
BadAlloc
&
)
{
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
ManagedAllocator
*
new_allocator
;
ManagedAllocator
*
new_allocator
;
{
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
auto
old_size
=
underlying_allocators_
->
size
();
auto
old_size
=
allocator_num_
.
load
();
decltype
(
underlying_allocators_
)
new_allocators
(
PADDLE_ENFORCE_LT
(
old_size
,
underlying_allocators_
.
size
(),
new
std
::
vector
<
AllocatorCreator
::
result_type
>
(
old_size
+
1
));
"Allocator number exceeds capacity %d"
,
for
(
size_t
i
=
0
;
i
<
old_size
;
++
i
)
{
underlying_allocators_
.
size
());
(
*
new_allocators
)[
i
]
=
(
*
underlying_allocators_
)[
i
];
underlying_allocators_
[
old_size
]
=
creator_
();
}
new_allocator
=
underlying_allocators_
[
old_size
].
get
();
prev_success_allocator_
=
old_size
;
(
*
new_allocators
)[
old_size
]
=
creator_
();
allocator_num_
.
fetch_add
(
1
);
new_allocator
=
(
*
new_allocators
)[
old_size
].
get
();
underlying_allocators_
=
new_allocators
;
prev_success_allocator_
.
store
(
old_size
);
}
}
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
...
@@ -102,9 +119,8 @@ class AutoIncrementAllocator : public ManagedAllocator {
...
@@ -102,9 +119,8 @@ class AutoIncrementAllocator : public ManagedAllocator {
AllocatorCreator
creator_
;
AllocatorCreator
creator_
;
// Use std::shared_ptr to ensure thread-safety
std
::
vector
<
AllocatorCreator
::
result_type
>
underlying_allocators_
;
std
::
shared_ptr
<
std
::
vector
<
AllocatorCreator
::
result_type
>>
std
::
atomic
<
size_t
>
allocator_num_
{
0
};
underlying_allocators_
;
// Use std::atomic rather than std::mutex, since std::atomic is usually
// Use std::atomic rather than std::mutex, since std::atomic is usually
// lock-free
// lock-free
...
...
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
64d94596
...
@@ -26,10 +26,11 @@ static int HighestBitPos(size_t N) {
...
@@ -26,10 +26,11 @@ static int HighestBitPos(size_t N) {
if
(
UNLIKELY
(
N
==
0
))
{
if
(
UNLIKELY
(
N
==
0
))
{
return
0
;
return
0
;
}
else
{
}
else
{
// NOTE: here we can use __builtin_clz in GCC.
#ifdef __GNUC__
// However, let's use std::log2 for better readability
return
sizeof
(
unsigned
int
)
*
8
-
__builtin_clz
(
N
);
// and trust std::log2's performance.
#else
return
static_cast
<
int
>
(
std
::
log2
(
N
)
+
1
);
return
static_cast
<
int
>
(
std
::
log2
(
N
)
+
1
);
#endif
}
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录