Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
9dcddf92
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9dcddf92
编写于
10月 22, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Polish best_fit_allocator
上级
0c25da39
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
16 addition
and
16 deletion
+16
-16
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+14
-14
paddle/fluid/memory/allocation/best_fit_allocator.h
paddle/fluid/memory/allocation/best_fit_allocator.h
+2
-2
未找到文件。
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
9dcddf92
...
@@ -41,8 +41,7 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
...
@@ -41,8 +41,7 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
chunk
.
offset_
=
0
;
chunk
.
offset_
=
0
;
chunk
.
is_free
=
true
;
chunk
.
is_free
=
true
;
chunks_
.
emplace_back
(
chunk
);
chunks_
.
emplace_back
(
chunk
);
free_chunks_
[
HighestBitPos
(
chunk
.
size_
)].
insert
(
InsertFreeNode
(
chunks_
.
begin
());
{
chunk
.
size_
,
chunks_
.
begin
()});
}
}
std
::
unique_ptr
<
Allocation
>
BestFitAllocator
::
Allocate
(
size_t
size
,
Attr
attr
)
{
std
::
unique_ptr
<
Allocation
>
BestFitAllocator
::
Allocate
(
size_t
size
,
Attr
attr
)
{
...
@@ -86,35 +85,33 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
...
@@ -86,35 +85,33 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
details
::
Chunk
remaining
;
details
::
Chunk
remaining
;
to_use
.
size_
=
request_size
;
to_use
.
size_
=
request_size
;
to_use
.
is_free
=
false
;
to_use
.
is_free
=
false
;
remaining
.
size_
=
remaining_size
;
remaining
.
is_free
=
true
;
// calc offsets
// calc offsets
to_use
.
offset_
=
to_split_it
->
offset_
;
to_use
.
offset_
=
to_split_it
->
offset_
;
remaining
.
offset_
=
to_use
.
offset_
+
to_use
.
size_
;
// insert to chunk list
// insert to chunk list
auto
to_use_it
=
chunks_
.
insert
(
to_split_it
,
to_use
);
auto
to_use_it
=
chunks_
.
insert
(
to_split_it
,
to_use
);
if
(
remaining
.
size_
!=
0
)
{
if
(
remaining_size
!=
0
)
{
auto
bit_size
=
static_cast
<
size_t
>
(
HighestBitPos
(
remaining
.
size_
));
remaining
.
size_
=
remaining_size
;
free_chunks_
[
bit_size
].
insert
(
remaining
.
is_free
=
true
;
{
remaining
.
size_
,
chunks_
.
insert
(
to_split_it
,
remaining
)});
remaining
.
offset_
=
to_use
.
offset_
+
to_use
.
size_
;
auto
remaining_it
=
chunks_
.
insert
(
to_split_it
,
remaining
);
InsertFreeNode
(
remaining_it
);
}
}
chunks_
.
erase
(
to_split_it
);
chunks_
.
erase
(
to_split_it
);
return
to_use_it
;
return
to_use_it
;
}
}
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
auto
*
bf_allocation
=
dynamic
_cast
<
BestFitAllocation
*>
(
allocation
);
auto
*
bf_allocation
=
reinterpret
_cast
<
BestFitAllocation
*>
(
allocation
);
auto
chunk_it
=
bf_allocation
->
ChunkIterator
();
auto
chunk_it
=
bf_allocation
->
ChunkIterator
();
PADDLE_ENFORCE
(
!
chunk_it
->
is_free
);
PADDLE_ENFORCE
(
!
chunk_it
->
is_free
);
chunk_it
->
is_free
=
true
;
chunk_it
->
is_free
=
true
;
if
(
chunk_it
!=
chunks_
.
begin
())
{
if
(
chunk_it
!=
chunks_
.
begin
())
{
// not the first chunk, try to merge prev.
auto
prev_it
=
chunk_it
;
auto
prev_it
=
chunk_it
;
--
prev_it
;
--
prev_it
;
if
(
prev_it
->
is_free
)
{
if
(
prev_it
->
is_free
)
{
// Merge
Left
.
// Merge
Prev
.
EraseFreeNode
(
prev_it
);
EraseFreeNode
(
prev_it
);
prev_it
->
size_
+=
chunk_it
->
size_
;
prev_it
->
size_
+=
chunk_it
->
size_
;
chunks_
.
erase
(
chunk_it
);
chunks_
.
erase
(
chunk_it
);
...
@@ -125,6 +122,7 @@ void BestFitAllocator::Free(Allocation* allocation) {
...
@@ -125,6 +122,7 @@ void BestFitAllocator::Free(Allocation* allocation) {
auto
next_it
=
chunk_it
;
auto
next_it
=
chunk_it
;
++
next_it
;
++
next_it
;
if
(
next_it
!=
chunks_
.
end
()
&&
next_it
->
is_free
)
{
if
(
next_it
!=
chunks_
.
end
()
&&
next_it
->
is_free
)
{
// not the last chunk, try to merge next
EraseFreeNode
(
next_it
);
EraseFreeNode
(
next_it
);
chunk_it
->
size_
+=
next_it
->
size_
;
chunk_it
->
size_
+=
next_it
->
size_
;
chunks_
.
erase
(
next_it
);
chunks_
.
erase
(
next_it
);
...
@@ -139,9 +137,11 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
...
@@ -139,9 +137,11 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
free_map
.
insert
({
it
->
size_
,
it
});
free_map
.
insert
({
it
->
size_
,
it
});
}
}
void
BestFitAllocator
::
EraseFreeNode
(
const
ListIt
&
it
)
{
void
BestFitAllocator
::
EraseFreeNode
(
const
ListIt
&
it
)
{
size_t
pos
=
static_cast
<
size_t
>
(
HighestBitPos
(
it
->
size_
));
auto
pos
=
static_cast
<
size_t
>
(
HighestBitPos
(
it
->
size_
));
auto
&
free_map
=
free_chunks_
[
pos
];
auto
&
free_map
=
free_chunks_
[
pos
];
auto
map_it
=
free_map
.
find
(
it
->
size_
);
auto
map_it
=
free_map
.
find
(
it
->
size_
);
// This while loop because it is a multi-map
while
(
map_it
->
second
!=
it
&&
map_it
!=
free_map
.
end
())
{
while
(
map_it
->
second
!=
it
&&
map_it
!=
free_map
.
end
())
{
++
map_it
;
++
map_it
;
}
}
...
...
paddle/fluid/memory/allocation/best_fit_allocator.h
浏览文件 @
9dcddf92
...
@@ -37,8 +37,8 @@ struct Chunk {
...
@@ -37,8 +37,8 @@ struct Chunk {
// | Chunk | prev_ pointer | next_ pointer | payload .... |
// | Chunk | prev_ pointer | next_ pointer | payload .... |
// *-------*---------------*---------------*--------------*
// *-------*---------------*---------------*--------------*
// This implementation can just return a raw pointer, and we can get the list
// This implementation can just return a raw pointer, and we can get the list
// structure by
it. However, we cannot use the same code on GPU since C
PU
// structure by
the raw pointer. However, we cannot use the same code on G
PU
// cannot access GPU memory directly.
//
since CPU
cannot access GPU memory directly.
//
//
// So we choose to use `std::list` and return an allocation instance, which
// So we choose to use `std::list` and return an allocation instance, which
// contains the list node iterator, then we can unify CPU/GPU code.
// contains the list node iterator, then we can unify CPU/GPU code.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录