Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
734c498d
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
734c498d
编写于
7月 30, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(mgb/core): improve DevMemAlloc when it has single stream
GitOrigin-RevId: 61874faa6d3be40ff9984efceb44a0cd0b4f2435
上级
39bd66fc
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
143 addition
and
41 deletion
+143
-41
src/core/impl/comp_node/mem_alloc/impl.cpp
src/core/impl/comp_node/mem_alloc/impl.cpp
+67
-34
src/core/impl/comp_node/mem_alloc/impl.h
src/core/impl/comp_node/mem_alloc/impl.h
+15
-1
src/core/test/mem_alloc.cpp
src/core/test/mem_alloc.cpp
+61
-6
未找到文件。
src/core/impl/comp_node/mem_alloc/impl.cpp
浏览文件 @
734c498d
...
...
@@ -267,45 +267,59 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) {
}
size_t
DevMemAllocImpl
::
gather_stream_free_blk_and_release_full
()
{
size_t
gathered_size
=
0
;
MGB_LOCK_GUARD
(
m_mutex
);
for
(
auto
&&
pair
:
m_stream_alloc
)
{
auto
ch
=
pair
.
second
.
get
();
auto
&&
chmtx
=
ch
->
m_mutex
;
MGB_LOCK_GUARD
(
chmtx
);
for
(
auto
&&
i
:
ch
->
m_free_blk_size
)
{
merge_free_unsafe
(
i
.
first
);
gathered_size
+=
i
.
first
.
size
;
}
ch
->
m_free_blk_addr
.
clear
();
ch
->
m_free_blk_size
.
clear
();
}
mgb_assert
(
gathered_size
<=
m_used_size
.
load
());
m_used_size
-=
gathered_size
;
size_t
free_size
=
0
;
using
Iter
=
decltype
(
m_free_blk_size
.
begin
());
std
::
vector
<
void
*>
to_free_by_raw
;
for
(
Iter
i
=
m_free_blk_size
.
begin
(),
inext
;
i
!=
m_free_blk_size
.
end
();
i
=
inext
)
{
inext
=
i
;
++
inext
;
auto
&&
blk
=
i
->
first
;
if
(
blk
.
addr
.
is_head
)
{
auto
riter
=
m_alloc_from_raw
.
find
(
blk
.
addr
.
addr_ptr
());
mgb_assert
(
riter
!=
m_alloc_from_raw
.
end
()
&&
blk
.
size
<=
riter
->
second
);
if
(
blk
.
size
==
riter
->
second
)
{
to_free_by_raw
.
push_back
(
blk
.
addr
.
addr_ptr
());
free_size
+=
blk
.
size
;
auto
j
=
i
->
second
.
aiter
;
m_free_blk_size
.
erase
(
i
);
m_free_blk_addr
.
erase
(
j
);
m_alloc_from_raw
.
erase
(
riter
);
MGB_LOCK_GUARD
(
m_mutex
);
auto
return_full_free_blk_unsafe
=
[
&
](
MemAllocImplHelper
*
alloc
)
{
auto
&&
free_blk_size
=
alloc
->
m_free_blk_size
;
auto
&&
free_blk_addr
=
alloc
->
m_free_blk_addr
;
using
Iter
=
decltype
(
m_free_blk_size
.
begin
());
for
(
Iter
i
=
free_blk_size
.
begin
(),
inext
;
i
!=
free_blk_size
.
end
();
i
=
inext
)
{
inext
=
i
;
++
inext
;
auto
&&
blk
=
i
->
first
;
if
(
blk
.
addr
.
is_head
)
{
auto
riter
=
m_alloc_from_raw
.
find
(
blk
.
addr
.
addr_ptr
());
mgb_assert
(
riter
!=
m_alloc_from_raw
.
end
()
&&
blk
.
size
<=
riter
->
second
);
if
(
blk
.
size
==
riter
->
second
)
{
to_free_by_raw
.
push_back
(
blk
.
addr
.
addr_ptr
());
free_size
+=
blk
.
size
;
auto
j
=
i
->
second
.
aiter
;
free_blk_size
.
erase
(
i
);
free_blk_addr
.
erase
(
j
);
m_alloc_from_raw
.
erase
(
riter
);
}
}
}
};
if
(
auto
child
=
get_single_child_stream_unsafe
())
{
MGB_LOCK_GUARD
(
child
->
m_mutex
);
return_full_free_blk_unsafe
(
child
);
mgb_assert
(
free_size
<=
m_used_size
.
load
());
m_used_size
-=
free_size
;
}
else
{
size_t
gathered_size
=
0
;
for
(
auto
&&
pair
:
m_stream_alloc
)
{
auto
ch
=
pair
.
second
.
get
();
auto
&&
chmtx
=
ch
->
m_mutex
;
MGB_LOCK_GUARD
(
chmtx
);
for
(
auto
&&
i
:
ch
->
m_free_blk_size
)
{
merge_free_unsafe
(
i
.
first
);
gathered_size
+=
i
.
first
.
size
;
}
ch
->
m_free_blk_addr
.
clear
();
ch
->
m_free_blk_size
.
clear
();
}
mgb_assert
(
gathered_size
<=
m_used_size
.
load
());
m_used_size
-=
gathered_size
;
}
return_full_free_blk_unsafe
(
this
);
m_tot_allocated_from_raw
-=
free_size
;
// we have to sync to ensure no kernel on the child stream still uses
...
...
@@ -359,6 +373,25 @@ FreeMemStat DevMemAllocImpl::get_free_memory_dev() {
return
ret
;
}
void
DevMemAllocImpl
::
insert_free_unsafe
(
const
FreeBlock
&
block
)
{
if
(
auto
child
=
get_single_child_stream_unsafe
())
{
{
MGB_LOCK_GUARD
(
child
->
m_mutex
);
child
->
insert_free_unsafe
(
block
);
}
m_used_size
+=
block
.
size
;
}
else
{
MemAllocImplHelper
::
insert_free_unsafe
(
block
);
}
}
StreamMemAllocImpl
*
DevMemAllocImpl
::
get_single_child_stream_unsafe
()
{
if
(
m_stream_alloc
.
size
()
==
1
)
{
return
m_stream_alloc
.
begin
()
->
second
.
get
();
}
return
nullptr
;
}
DevMemAllocImpl
::~
DevMemAllocImpl
()
{
for
(
auto
&&
i
:
m_alloc_from_raw
)
m_raw_allocator
->
free
(
i
.
first
);
...
...
src/core/impl/comp_node/mem_alloc/impl.h
浏览文件 @
734c498d
...
...
@@ -94,7 +94,7 @@ class MemAllocImplHelper: virtual public MemAllocBase {
* \brief directly insert a free block into m_free_blk_size and
* m_free_blk_addr, without merging
*/
inline
void
insert_free_unsafe
(
const
FreeBlock
&
block
);
virtual
void
insert_free_unsafe
(
const
FreeBlock
&
block
);
/*!
* \brief allocate from parent allocator; this method must either return
...
...
@@ -153,6 +153,12 @@ class StreamMemAllocImpl final: public StreamMemAlloc,
{}
};
/*!
* \Note: DevMemAlloc has two-level structure, but when only one stream was
* registered into the DevMemAlloc, the DevMemAlloc would behave like a
* single-level allocator(i.e. only the FreeBlock pool in its child stream
* allocator will be used) for better performance
*/
class
DevMemAllocImpl
final
:
public
DevMemAlloc
,
public
MemAllocImplHelper
{
friend
class
StreamMemAllocImpl
;
...
...
@@ -193,6 +199,14 @@ class DevMemAllocImpl final: public DevMemAlloc,
size_t
get_used_memory
()
override
{
return
m_used_size
.
load
();
}
void
insert_free_unsafe
(
const
FreeBlock
&
block
)
override
;
/*!
* \brief return stream allocator if DevMemAlloc has single child,
* otherwise return nullptr
*/
StreamMemAllocImpl
*
get_single_child_stream_unsafe
();
public:
DevMemAllocImpl
(
int
device
,
size_t
reserve_size
,
...
...
src/core/test/mem_alloc.cpp
浏览文件 @
734c498d
...
...
@@ -209,18 +209,73 @@ TEST(TestMemAlloc, Alloc) {
auto
ptr
=
strm_alloc
->
alloc_shared
(
REQ
);
EXPECT_EQ
(
REQ
,
strm_alloc
->
get_used_memory
());
EXPECT_EQ
(
0u
,
strm_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
TOT
-
REQ
,
dev_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
TOT
-
REQ
,
strm_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
TOT
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
0u
,
dev_alloc
->
get_free_memory
().
tot
);
auto
addr
=
ptr
.
get
();
ptr
.
reset
();
EXPECT_EQ
(
0u
,
strm_alloc
->
get_used_memory
());
EXPECT_EQ
(
REQ
,
strm_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
TOT
-
REQ
,
dev_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
TOT
,
strm_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
TOT
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
0u
,
dev_alloc
->
get_free_memory
().
tot
);
EXPECT_EQ
(
addr
,
strm_alloc
->
alloc_shared
(
REQ
).
get
());
}
TEST
(
TestMemAlloc
,
MergeFreeBlock
)
{
using
StreamKey
=
DevMemAlloc
::
StreamKey
;
auto
raw_alloc
=
std
::
make_shared
<
DummyAllocator
>
(
7000
);
auto
runtime_policy
=
std
::
make_shared
<
DummyRuntimePolicy
>
(
0
);
auto
dev_alloc
=
DevMemAlloc
::
make
(
0
,
7000
,
raw_alloc
,
runtime_policy
);
StreamKey
stream_key
=
nullptr
;
auto
strm_alloc
=
dev_alloc
->
add_stream
(
static_cast
<
StreamKey
>
(
&
stream_key
));
auto
ptr
=
strm_alloc
->
alloc_shared
(
2000
);
auto
addr
=
ptr
.
get
();
ptr
.
reset
();
ptr
=
strm_alloc
->
alloc_shared
(
3000
);
EXPECT_EQ
(
addr
,
ptr
.
get
());
strm_alloc
->
alloc_shared
(
4000
);
}
TEST
(
TestMemAlloc
,
AllocTwoStream
)
{
constexpr
size_t
TOT
=
2048
,
REQ0
=
1000
,
REQ1
=
2000
;
using
StreamKey
=
DevMemAlloc
::
StreamKey
;
auto
raw_alloc
=
std
::
make_shared
<
DummyAllocator
>
(
TOT
);
auto
runtime_policy
=
std
::
make_shared
<
DummyRuntimePolicy
>
(
0
);
auto
dev_alloc
=
DevMemAlloc
::
make
(
0
,
TOT
,
raw_alloc
,
runtime_policy
);
StreamKey
stream_key0
,
stream_key1
;
auto
strm_alloc0
=
dev_alloc
->
add_stream
(
static_cast
<
StreamKey
>
(
&
stream_key0
)),
strm_alloc1
=
dev_alloc
->
add_stream
(
static_cast
<
StreamKey
>
(
&
stream_key1
));
ASSERT_NE
(
strm_alloc0
,
strm_alloc1
);
auto
ptr0
=
strm_alloc0
->
alloc_shared
(
REQ0
);
EXPECT_EQ
(
REQ0
,
strm_alloc0
->
get_used_memory
());
EXPECT_EQ
(
0u
,
strm_alloc0
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ0
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
TOT
-
REQ0
,
dev_alloc
->
get_free_memory
().
tot
);
ptr0
.
reset
();
EXPECT_EQ
(
0u
,
strm_alloc0
->
get_used_memory
());
EXPECT_EQ
(
REQ0
,
strm_alloc0
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ0
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
TOT
-
REQ0
,
dev_alloc
->
get_free_memory
().
tot
);
auto
ptr1
=
strm_alloc1
->
alloc_shared
(
REQ1
);
EXPECT_EQ
(
0u
,
strm_alloc0
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ1
,
strm_alloc1
->
get_used_memory
());
EXPECT_EQ
(
0u
,
strm_alloc1
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ1
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
0u
,
dev_alloc
->
get_free_memory
().
tot
);
ptr1
.
reset
();
EXPECT_EQ
(
0u
,
strm_alloc1
->
get_used_memory
());
EXPECT_EQ
(
REQ1
,
strm_alloc1
->
get_free_memory
().
tot
);
EXPECT_EQ
(
REQ1
,
dev_alloc
->
get_used_memory
());
EXPECT_EQ
(
0u
,
dev_alloc
->
get_free_memory
().
tot
);
}
TEST
(
TestMemAlloc
,
AllocMoreThanReserve
)
{
constexpr
size_t
RES
=
1000
,
TOT
=
2048
,
REQ
=
2048
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录