Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
71c846ef
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
71c846ef
编写于
10月 23, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert buggy changes
test=develop
上级
dbf9f6f4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
16 addition
and
17 deletion
+16
-17
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+15
-15
paddle/fluid/operators/distributed/sendrecvop_utils.cc
paddle/fluid/operators/distributed/sendrecvop_utils.cc
+1
-2
未找到文件。
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
71c846ef
...
@@ -26,7 +26,7 @@ static int HighestBitPos(size_t N) {
...
@@ -26,7 +26,7 @@ static int HighestBitPos(size_t N) {
if
(
UNLIKELY
(
N
==
0
))
{
if
(
UNLIKELY
(
N
==
0
))
{
return
0
;
return
0
;
}
else
{
}
else
{
#ifdef __GNUC__
#ifdef __GNUC
C
__
return
sizeof
(
unsigned
int
)
*
8
-
__builtin_clz
(
N
);
return
sizeof
(
unsigned
int
)
*
8
-
__builtin_clz
(
N
);
#else
#else
return
static_cast
<
int
>
(
std
::
log2
(
N
)
+
1
);
return
static_cast
<
int
>
(
std
::
log2
(
N
)
+
1
);
...
@@ -41,7 +41,8 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
...
@@ -41,7 +41,8 @@ BestFitAllocator::BestFitAllocator(Allocation* allocation)
chunk
.
offset_
=
0
;
chunk
.
offset_
=
0
;
chunk
.
is_free
=
true
;
chunk
.
is_free
=
true
;
chunks_
.
emplace_back
(
chunk
);
chunks_
.
emplace_back
(
chunk
);
InsertFreeNode
(
chunks_
.
begin
());
free_chunks_
[
HighestBitPos
(
chunk
.
size_
)].
insert
(
{
chunk
.
size_
,
chunks_
.
begin
()});
}
}
std
::
unique_ptr
<
Allocation
>
BestFitAllocator
::
Allocate
(
size_t
size
,
Attr
attr
)
{
std
::
unique_ptr
<
Allocation
>
BestFitAllocator
::
Allocate
(
size_t
size
,
Attr
attr
)
{
...
@@ -85,33 +86,35 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
...
@@ -85,33 +86,35 @@ BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
details
::
Chunk
remaining
;
details
::
Chunk
remaining
;
to_use
.
size_
=
request_size
;
to_use
.
size_
=
request_size
;
to_use
.
is_free
=
false
;
to_use
.
is_free
=
false
;
remaining
.
size_
=
remaining_size
;
remaining
.
is_free
=
true
;
// calc offsets
// calc offsets
to_use
.
offset_
=
to_split_it
->
offset_
;
to_use
.
offset_
=
to_split_it
->
offset_
;
remaining
.
offset_
=
to_use
.
offset_
+
to_use
.
size_
;
// insert to chunk list
// insert to chunk list
auto
to_use_it
=
chunks_
.
insert
(
to_split_it
,
to_use
);
auto
to_use_it
=
chunks_
.
insert
(
to_split_it
,
to_use
);
if
(
remaining_size
!=
0
)
{
if
(
remaining
.
size_
!=
0
)
{
remaining
.
size_
=
remaining_size
;
auto
bit_size
=
static_cast
<
size_t
>
(
HighestBitPos
(
remaining
.
size_
));
remaining
.
is_free
=
true
;
free_chunks_
[
bit_size
].
insert
(
remaining
.
offset_
=
to_use
.
offset_
+
to_use
.
size_
;
{
remaining
.
size_
,
chunks_
.
insert
(
to_split_it
,
remaining
)});
auto
remaining_it
=
chunks_
.
insert
(
to_split_it
,
remaining
);
InsertFreeNode
(
remaining_it
);
}
}
chunks_
.
erase
(
to_split_it
);
chunks_
.
erase
(
to_split_it
);
return
to_use_it
;
return
to_use_it
;
}
}
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
auto
*
bf_allocation
=
reinterpret
_cast
<
BestFitAllocation
*>
(
allocation
);
auto
*
bf_allocation
=
dynamic
_cast
<
BestFitAllocation
*>
(
allocation
);
auto
chunk_it
=
bf_allocation
->
ChunkIterator
();
auto
chunk_it
=
bf_allocation
->
ChunkIterator
();
PADDLE_ENFORCE
(
!
chunk_it
->
is_free
);
PADDLE_ENFORCE
(
!
chunk_it
->
is_free
);
chunk_it
->
is_free
=
true
;
chunk_it
->
is_free
=
true
;
if
(
chunk_it
!=
chunks_
.
begin
())
{
// not the first chunk, try to merge prev.
if
(
chunk_it
!=
chunks_
.
begin
())
{
auto
prev_it
=
chunk_it
;
auto
prev_it
=
chunk_it
;
--
prev_it
;
--
prev_it
;
if
(
prev_it
->
is_free
)
{
if
(
prev_it
->
is_free
)
{
// Merge
Prev
.
// Merge
Left
.
EraseFreeNode
(
prev_it
);
EraseFreeNode
(
prev_it
);
prev_it
->
size_
+=
chunk_it
->
size_
;
prev_it
->
size_
+=
chunk_it
->
size_
;
chunks_
.
erase
(
chunk_it
);
chunks_
.
erase
(
chunk_it
);
...
@@ -122,7 +125,6 @@ void BestFitAllocator::Free(Allocation* allocation) {
...
@@ -122,7 +125,6 @@ void BestFitAllocator::Free(Allocation* allocation) {
auto
next_it
=
chunk_it
;
auto
next_it
=
chunk_it
;
++
next_it
;
++
next_it
;
if
(
next_it
!=
chunks_
.
end
()
&&
next_it
->
is_free
)
{
if
(
next_it
!=
chunks_
.
end
()
&&
next_it
->
is_free
)
{
// not the last chunk, try to merge next
EraseFreeNode
(
next_it
);
EraseFreeNode
(
next_it
);
chunk_it
->
size_
+=
next_it
->
size_
;
chunk_it
->
size_
+=
next_it
->
size_
;
chunks_
.
erase
(
next_it
);
chunks_
.
erase
(
next_it
);
...
@@ -137,11 +139,9 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
...
@@ -137,11 +139,9 @@ void BestFitAllocator::InsertFreeNode(const ListIt& it) {
free_map
.
insert
({
it
->
size_
,
it
});
free_map
.
insert
({
it
->
size_
,
it
});
}
}
void
BestFitAllocator
::
EraseFreeNode
(
const
ListIt
&
it
)
{
void
BestFitAllocator
::
EraseFreeNode
(
const
ListIt
&
it
)
{
auto
pos
=
static_cast
<
size_t
>
(
HighestBitPos
(
it
->
size_
));
size_t
pos
=
static_cast
<
size_t
>
(
HighestBitPos
(
it
->
size_
));
auto
&
free_map
=
free_chunks_
[
pos
];
auto
&
free_map
=
free_chunks_
[
pos
];
auto
map_it
=
free_map
.
find
(
it
->
size_
);
auto
map_it
=
free_map
.
find
(
it
->
size_
);
// This while loop because it is a multi-map
while
(
map_it
->
second
!=
it
&&
map_it
!=
free_map
.
end
())
{
while
(
map_it
->
second
!=
it
&&
map_it
!=
free_map
.
end
())
{
++
map_it
;
++
map_it
;
}
}
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.cc
浏览文件 @
71c846ef
...
@@ -42,8 +42,7 @@ static std::shared_ptr<memory::Allocation> GetCommunicationAllocationFromTensor(
...
@@ -42,8 +42,7 @@ static std::shared_ptr<memory::Allocation> GetCommunicationAllocationFromTensor(
memory
::
Copy
(
cuda_pinned
,
result
->
ptr
(),
memory
::
Copy
(
cuda_pinned
,
result
->
ptr
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
tensor
.
data
<
void
>
()),
copy_size
,
tensor
.
data
<
void
>
(),
copy_size
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
stream
());
ctx
.
Wait
();
ctx
.
Wait
();
return
result
;
return
result
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录