Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
84466261
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
84466261
编写于
3月 03, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(imperative/src): improve elemwise
GitOrigin-RevId: 78aa487277b20bf08698ef1e100a7d4b0cc4df15
上级
e400b7ff
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
53 addition
and
7 deletion
+53
-7
imperative/src/impl/ops/elemwise.cpp
imperative/src/impl/ops/elemwise.cpp
+36
-7
imperative/src/impl/physical_tensor.cpp
imperative/src/impl/physical_tensor.cpp
+5
-0
imperative/src/include/megbrain/imperative/physical_tensor.h
imperative/src/include/megbrain/imperative/physical_tensor.h
+2
-0
src/opr/impl/basic_arith.cpp
src/opr/impl/basic_arith.cpp
+6
-0
src/opr/include/megbrain/opr/basic_arith.h
src/opr/include/megbrain/opr/basic_arith.h
+4
-0
未找到文件。
imperative/src/impl/ops/elemwise.cpp
浏览文件 @
84466261
...
@@ -114,15 +114,44 @@ void apply_on_device_tensornd(
...
@@ -114,15 +114,44 @@ void apply_on_device_tensornd(
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
SmallVector
<
DeviceTensorND
>
inp_tensornds
(
inputs
.
size
());
auto
comp_node
=
inputs
[
0
]
->
comp_node
();
using
Mode
=
Elemwise
::
Mode
;
using
TensorND
=
megdnn
::
TensorND
;
auto
&&
op_def
=
def
.
cast_final_safe
<
Elemwise
>
();
SmallVector
<
TensorND
>
inp_tensornds
;
TensorShapeArray
inp_shapes
(
inputs
.
size
());
inp_tensornds
.
reserve
(
inputs
.
size
());
TensorLayout
layout
{
inputs
[
0
]
->
layout
().
dtype
};
bool
is_empty
=
false
;
for
(
unsigned
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
for
(
unsigned
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
inp_tensornds
[
i
]
=
inputs
[
i
]
->
dev_tensor
();
if
(
inputs
[
i
]
->
layout
().
is_empty
())
{
is_empty
=
true
;
}
inp_tensornds
.
push_back
(
inputs
[
i
]
->
dnn_tensor
());
inp_shapes
[
i
]
=
inputs
[
i
]
->
layout
();
}
megdnn
::
Elemwise
::
deduce_shape
(
inp_shapes
,
layout
);
layout
.
init_contiguous_stride
();
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
layout
);
if
(
is_empty
)
{
return
{
Tensor
::
make
(
out
)};
}
}
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
auto
&&
dnn_opr
=
opr
::
intl
::
create_megdnn_opr
<
megdnn
::
Elemwise
>
(
comp_node
);
inp_tensornds
[
0
].
comp_node
(),
output_descs
[
0
].
layout
);
SmallVector
<
DeviceTensorND
>
oup_tensornds
=
{
out
};
dnn_opr
->
param
()
=
op_def
.
param
();
apply_on_device_tensornd
(
def
,
inp_tensornds
,
&
oup_tensornds
);
if
(
dnn_opr
->
param
().
mode
==
Mode
::
FUSE_MUL_ADD3
||
return
{
Tensor
::
make
(
oup_tensornds
[
0
])};
dnn_opr
->
param
().
mode
==
Mode
::
FUSE_MUL_ADD4
||
(
inp_tensornds
.
size
()
&&
inp_tensornds
[
0
].
layout
.
dtype
.
category
()
==
DTypeCategory
::
QUANTIZED
))
{
opr
::
Elemwise
::
perform_dnn
(
comp_node
,
out
,
inp_tensornds
,
dnn_opr
);
}
else
{
dnn_opr
->
exec
(
inp_tensornds
,
out
.
as_megdnn
());
}
return
{
Tensor
::
make
(
out
)};
}
}
MGB_DEFINE_OPR_CLASS
(
MGB_DEFINE_OPR_CLASS
(
...
...
imperative/src/impl/physical_tensor.cpp
浏览文件 @
84466261
...
@@ -212,6 +212,11 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) {
...
@@ -212,6 +212,11 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) {
return
ret
;
return
ret
;
}
}
megdnn
::
TensorND
Tensor
::
dnn_tensor
()
{
mgb_assert
(
m_blob
,
"uninitialized tensor."
);
return
{
m_layout
,
{
m_blob
->
storage
().
get
(),
m_offset
}};
}
void
Tensor
::
fetch_value
()
{
void
Tensor
::
fetch_value
()
{
MGB_LOCK_GUARD
(
m_blob_mtx
);
MGB_LOCK_GUARD
(
m_blob_mtx
);
MGB_LOCK_GUARD
(
m_value_mtx
);
MGB_LOCK_GUARD
(
m_value_mtx
);
...
...
imperative/src/include/megbrain/imperative/physical_tensor.h
浏览文件 @
84466261
...
@@ -110,6 +110,8 @@ public:
...
@@ -110,6 +110,8 @@ public:
void
assign_from_dev_tensor
(
DeviceTensorND
);
void
assign_from_dev_tensor
(
DeviceTensorND
);
megdnn
::
TensorND
dnn_tensor
();
static
TensorPtr
make_scalar
(
DTypeScalar
value
,
CompNode
cn
);
static
TensorPtr
make_scalar
(
DTypeScalar
value
,
CompNode
cn
);
TensorPtr
make_scalar
(
DTypeScalar
value
)
const
{
TensorPtr
make_scalar
(
DTypeScalar
value
)
const
{
...
...
src/opr/impl/basic_arith.cpp
浏览文件 @
84466261
...
@@ -268,6 +268,12 @@ void Elemwise::perform(
...
@@ -268,6 +268,12 @@ void Elemwise::perform(
call_megdnn_opr_exec
(
out_cn
,
dnn_inputs
,
dest
.
as_megdnn
(),
opr
.
get
(),
nullptr
);
call_megdnn_opr_exec
(
out_cn
,
dnn_inputs
,
dest
.
as_megdnn
(),
opr
.
get
(),
nullptr
);
}
}
void
Elemwise
::
perform_dnn
(
CompNode
cn
,
DeviceTensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
)
{
call_megdnn_opr_exec
(
cn
,
inputs
,
dest
.
as_megdnn
(),
opr
.
get
(),
nullptr
);
}
TensorLayoutArray
Elemwise
::
collective_collapse
(
const
TensorLayoutArray
&
layouts
)
{
TensorLayoutArray
Elemwise
::
collective_collapse
(
const
TensorLayoutArray
&
layouts
)
{
TensorLayoutPtrArray
inp
(
layouts
.
size
());
TensorLayoutPtrArray
inp
(
layouts
.
size
());
TensorLayoutArray
result
(
inp
.
size
());
TensorLayoutArray
result
(
inp
.
size
());
...
...
src/opr/include/megbrain/opr/basic_arith.h
浏览文件 @
84466261
...
@@ -88,6 +88,10 @@ public:
...
@@ -88,6 +88,10 @@ public:
Mode
mode
,
DeviceTensorND
&
dest
,
const
SmallVector
<
DeviceTensorND
>&
inputs
,
Mode
mode
,
DeviceTensorND
&
dest
,
const
SmallVector
<
DeviceTensorND
>&
inputs
,
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
);
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
);
MGE_WIN_DECLSPEC_FUC
static
void
perform_dnn
(
CompNode
cn
,
DeviceTensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
);
using
TensorLayoutPtrArray
=
SmallVector
<
TensorLayout
*>
;
using
TensorLayoutPtrArray
=
SmallVector
<
TensorLayout
*>
;
/*!
/*!
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录