Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
9ffc2c0a
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9ffc2c0a
编写于
3月 24, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mge): fix host performance loss caused by dtr
GitOrigin-RevId: ee8b729e8087cb42e904fb33f59043b73b5d2262
上级
69673f14
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
18 addition
and
17 deletion
+18
-17
imperative/src/impl/interpreter/commands.h
imperative/src/impl/interpreter/commands.h
+0
-1
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+16
-12
imperative/src/impl/interpreter/interpreter_impl.h
imperative/src/impl/interpreter/interpreter_impl.h
+1
-1
imperative/src/impl/interpreter/tensor_info.h
imperative/src/impl/interpreter/tensor_info.h
+1
-3
未找到文件。
imperative/src/impl/interpreter/commands.h
浏览文件 @
9ffc2c0a
...
@@ -49,7 +49,6 @@ struct ApplyOp {
...
@@ -49,7 +49,6 @@ struct ApplyOp {
std
::
shared_ptr
<
OpDef
>
op
;
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
outputs
;
SmallVector
<
TensorInfo
*>
outputs
;
SmallVector
<
LogicalTensorDesc
>
outputs_descs
;
bool
validated
=
false
;
bool
validated
=
false
;
template
<
typename
TFunctor
>
template
<
typename
TFunctor
>
...
...
imperative/src/impl/interpreter/interpreter_impl.cpp
浏览文件 @
9ffc2c0a
...
@@ -355,7 +355,7 @@ void ChannelImpl::dispatch_kernel(
...
@@ -355,7 +355,7 @@ void ChannelImpl::dispatch_kernel(
for
(
int
i
=
0
;
i
<
output_descs
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
output_descs
.
size
();
++
i
)
{
auto
&&
desc
=
output_descs
[
i
];
auto
&&
desc
=
output_descs
[
i
];
auto
info
=
alloc
();
auto
info
=
alloc
();
init
(
info
,
desc
);
init
(
info
,
std
::
move
(
desc
)
);
// make sure desc's value is consistent with h_value
// make sure desc's value is consistent with h_value
if
(
!
info
->
desc
.
value
.
empty
())
{
if
(
!
info
->
desc
.
value
.
empty
())
{
info
->
h_value
=
HostTensorND
::
make_proxy
(
desc
.
value
)
info
->
h_value
=
HostTensorND
::
make_proxy
(
desc
.
value
)
...
@@ -364,9 +364,9 @@ void ChannelImpl::dispatch_kernel(
...
@@ -364,9 +364,9 @@ void ChannelImpl::dispatch_kernel(
output_infos
.
push_back
(
info
);
output_infos
.
push_back
(
info
);
outputs
->
push_back
(
reinterpret_cast
<
Handle
>
(
info
));
outputs
->
push_back
(
reinterpret_cast
<
Handle
>
(
info
));
}
}
ApplyOp
cmd
{
Profiler
::
next_id
(),
std
::
move
(
op
),
ApplyOp
cmd
{
std
::
move
(
input_infos
),
std
::
move
(
out
put_infos
),
Profiler
::
next_id
(),
std
::
move
(
op
),
std
::
move
(
in
put_infos
),
std
::
move
(
output_desc
s
),
validated
};
std
::
move
(
output_info
s
),
validated
};
if
(
Profiler
::
is_profiling
())
{
if
(
Profiler
::
is_profiling
())
{
auto
op_info_getter
=
[
op
=
cmd
.
op
]
{
auto
op_info_getter
=
[
op
=
cmd
.
op
]
{
std
::
unordered_map
<
std
::
string
,
std
::
string
>
op_info
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
op_info
;
...
@@ -594,7 +594,7 @@ TensorInfo* ChannelImpl::alloc() {
...
@@ -594,7 +594,7 @@ TensorInfo* ChannelImpl::alloc() {
return
info
;
return
info
;
}
}
void
ChannelImpl
::
init
(
TensorInfo
*
info
,
LogicalTensorDesc
desc
)
{
void
ChannelImpl
::
init
(
TensorInfo
*
info
,
LogicalTensorDesc
&&
desc
)
{
m_valid_handle
.
insert
(
reinterpret_cast
<
Handle
>
(
info
));
m_valid_handle
.
insert
(
reinterpret_cast
<
Handle
>
(
info
));
MGB_RECORD_EVENT
(
TensorDeclareEvent
,
info
->
id
,
info
->
name
);
MGB_RECORD_EVENT
(
TensorDeclareEvent
,
info
->
id
,
info
->
name
);
info
->
status
=
TensorInfo
::
Allocated
;
info
->
status
=
TensorInfo
::
Allocated
;
...
@@ -724,9 +724,8 @@ void ChannelImpl::regenerate(TensorInfo* dest) {
...
@@ -724,9 +724,8 @@ void ChannelImpl::regenerate(TensorInfo* dest) {
if
(
dest
->
evict_type
==
EvictType
::
DROP
)
{
if
(
dest
->
evict_type
==
EvictType
::
DROP
)
{
auto
&&
path
=
dest
->
producer
;
auto
&&
path
=
dest
->
producer
;
m_apply_stack
.
push
(
m_apply_stack
.
push
(
{
ApplyOp
{
path
->
id
,
path
->
op
,
path
->
inputs
,
path
->
outputs
,
{
ApplyOp
{
path
->
id
,
path
->
op
,
path
->
inputs
,
path
->
outputs
},
0
,
dest
,
path
->
outputs_descs
},
"dtr"
});
0
,
dest
,
"dtr"
});
if
(
!
m_applying
)
if
(
!
m_applying
)
flush_apply_stack
();
flush_apply_stack
();
}
}
...
@@ -819,13 +818,18 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
...
@@ -819,13 +818,18 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
}
}
// Apply op
// Apply op
SmallVector
<
LogicalTensorDesc
>
output_descs
;
SmallVector
<
LogicalTensorDesc
>
output_descs
;
for
(
auto
i
:
cmd
.
outputs_descs
)
{
bool
validated
=
cmd
.
validated
;
output_descs
.
push_back
(
i
);
if
(
!
state
.
options
.
enable_dtr_auto_drop
)
{
for
(
auto
i
:
cmd
.
outputs
)
{
output_descs
.
push_back
(
i
->
desc
);
}
}
else
{
validated
=
false
;
}
}
// Here std::move is REQUIRED for removing duplicated references.
// Here std::move is REQUIRED for removing duplicated references.
auto
outputs
=
apply_on_physical_tensor
(
auto
outputs
=
apply_on_physical_tensor
(
apply_on_physical_tensor
,
*
cmd
.
op
,
std
::
move
(
inputs
),
output_descs
,
apply_on_physical_tensor
,
*
cmd
.
op
,
std
::
move
(
inputs
),
output_descs
,
cmd
.
validated
);
validated
);
// After execute
// After execute
for
(
auto
&&
[
device
,
kernel_id
]
:
kernels
)
{
for
(
auto
&&
[
device
,
kernel_id
]
:
kernels
)
{
MGB_RECORD_EVENT_IF
(
MGB_RECORD_EVENT_IF
(
...
@@ -1154,7 +1158,7 @@ void ChannelImpl::process_one_task(Command& icmd) {
...
@@ -1154,7 +1158,7 @@ void ChannelImpl::process_one_task(Command& icmd) {
if
(
!
inplace
&&
!
cross_cn
&&
!
m_dtr
.
is_bad_op
(
get_name
(
*
cmd
.
op
)))
{
if
(
!
inplace
&&
!
cross_cn
&&
!
m_dtr
.
is_bad_op
(
get_name
(
*
cmd
.
op
)))
{
TensorInfo
::
ComputePath
::
make
(
TensorInfo
::
ComputePath
::
make
(
cmd
.
id
,
cmd
.
op
,
cmd
.
inputs
,
cmd
.
outputs
,
cmd
.
outputs_descs
);
cmd
.
id
,
cmd
.
op
,
cmd
.
inputs
,
cmd
.
outputs
);
size_t
detach_cnt
=
0
;
size_t
detach_cnt
=
0
;
if
(
!
strcmp
(
get_name
(
*
cmd
.
op
),
"BatchNorm"
)
&&
if
(
!
strcmp
(
get_name
(
*
cmd
.
op
),
"BatchNorm"
)
&&
cmd
.
outputs
.
size
()
==
6
)
{
cmd
.
outputs
.
size
()
==
6
)
{
...
...
imperative/src/impl/interpreter/interpreter_impl.h
浏览文件 @
9ffc2c0a
...
@@ -77,7 +77,7 @@ private:
...
@@ -77,7 +77,7 @@ private:
struct
State
;
struct
State
;
TensorInfo
*
alloc
();
TensorInfo
*
alloc
();
void
init
(
TensorInfo
*
,
LogicalTensorDesc
desc
);
void
init
(
TensorInfo
*
,
LogicalTensorDesc
&&
desc
);
void
free
(
TensorInfo
*
);
void
free
(
TensorInfo
*
);
void
real_free
(
TensorInfo
*
);
void
real_free
(
TensorInfo
*
);
void
recursive_free
(
TensorInfo
*
);
void
recursive_free
(
TensorInfo
*
);
...
...
imperative/src/impl/interpreter/tensor_info.h
浏览文件 @
9ffc2c0a
...
@@ -99,14 +99,12 @@ struct TensorInfo {
...
@@ -99,14 +99,12 @@ struct TensorInfo {
static
ComputePath
*
make
(
static
ComputePath
*
make
(
uint64_t
id
,
std
::
shared_ptr
<
OpDef
>
op
,
SmallVector
<
TensorInfo
*>
inputs
,
uint64_t
id
,
std
::
shared_ptr
<
OpDef
>
op
,
SmallVector
<
TensorInfo
*>
inputs
,
SmallVector
<
TensorInfo
*>
outputs
,
SmallVector
<
TensorInfo
*>
outputs
)
{
SmallVector
<
LogicalTensorDesc
>
outputs_descs
)
{
auto
*
path
=
new
TensorInfo
::
ComputePath
();
auto
*
path
=
new
TensorInfo
::
ComputePath
();
path
->
id
=
id
;
path
->
id
=
id
;
path
->
op
=
op
;
path
->
op
=
op
;
path
->
inputs
=
inputs
;
path
->
inputs
=
inputs
;
path
->
outputs
=
outputs
;
path
->
outputs
=
outputs
;
path
->
outputs_descs
=
outputs_descs
;
// dedup
// dedup
SmallVector
<
TensorInfo
*>
unique_inputs
=
inputs
;
SmallVector
<
TensorInfo
*>
unique_inputs
=
inputs
;
std
::
sort
(
unique_inputs
.
begin
(),
unique_inputs
.
end
());
std
::
sort
(
unique_inputs
.
begin
(),
unique_inputs
.
end
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录