Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c59be192
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
c59be192
编写于
4月 17, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
5月 06, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/arm_common/elemwise): add arm_common support chw44 elemwise
GitOrigin-RevId: aba44e01233107168b1a2c5e8dbc0c0ef1e71687
上级
6ffcfb4c
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
25 addition
and
13 deletion
+25
-13
dnn/src/common/elemwise/opr_impl_helper.cpp
dnn/src/common/elemwise/opr_impl_helper.cpp
+12
-4
dnn/src/common/elemwise/opr_impl_helper.h
dnn/src/common/elemwise/opr_impl_helper.h
+3
-1
dnn/src/x86/elemwise/opr_impl.cpp
dnn/src/x86/elemwise/opr_impl.cpp
+6
-4
dnn/src/x86/elemwise_op.h
dnn/src/x86/elemwise_op.h
+4
-4
未找到文件。
dnn/src/common/elemwise/opr_impl_helper.cpp
浏览文件 @
c59be192
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./opr_impl_helper.h"
...
...
@@ -79,18 +80,19 @@ bool ElemwiseLayoutHelper::is_broadcasted_scalar(const TensorLayout& layout) {
}
return
true
;
}
template
<
size_t
slice_size
>
bool
ElemwiseLayoutHelper
::
is_broadcastedx_channel_like
(
const
TensorLayout
&
layout
,
BroadcastChannelInfo
&
info
)
{
if
(
layout
.
format
.
type
()
==
TensorFormat
::
Type
::
DEFAULT
&&
layout
.
ndim
==
3
&&
layout
.
stride
[
0
]
==
8
&&
layout
.
stride
[
1
]
==
0
&&
layout
.
stride
[
2
]
==
1
)
{
layout
.
ndim
==
3
&&
layout
.
stride
[
0
]
==
slice_size
&&
layout
.
stride
[
1
]
==
0
&&
layout
.
stride
[
2
]
==
1
)
{
info
.
x
=
layout
.
shape
[
0
];
info
.
y
=
layout
.
shape
[
1
];
info
.
z
=
layout
.
shape
[
2
];
return
true
;
}
else
if
(
layout
.
format
.
type
()
==
TensorFormat
::
Type
::
DEFAULT
&&
layout
.
ndim
==
4
&&
layout
.
stride
[
0
]
==
0
&&
layout
.
stride
[
1
]
==
8
&&
layout
.
stride
[
2
]
==
0
&&
layout
.
stride
[
1
]
==
slice_size
&&
layout
.
stride
[
2
]
==
0
&&
layout
.
stride
[
3
]
==
1
)
{
info
.
x
=
layout
.
shape
[
1
];
info
.
y
=
layout
.
shape
[
2
];
...
...
@@ -99,6 +101,12 @@ bool ElemwiseLayoutHelper::is_broadcastedx_channel_like(
}
return
false
;
}
#define INST(n) \
template bool ElemwiseLayoutHelper::is_broadcastedx_channel_like<n>( \
const TensorLayout& layout, BroadcastChannelInfo& info)
INST
(
4
);
INST
(
8
);
#undef INST
bool
ElemwiseLayoutHelper
::
is_broadcasted_channel_like
(
const
TensorLayout
&
layout
,
BroadcastChannelInfo
&
info
)
{
...
...
dnn/src/common/elemwise/opr_impl_helper.h
浏览文件 @
c59be192
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
...
...
@@ -87,6 +88,7 @@ public:
* Note that Input can also be 3-dimensional, and must be [x, 1, z]
* broadacsted into [x, y, z]
*/
template
<
size_t
slice_size
>
static
bool
is_broadcastedx_channel_like
(
const
TensorLayout
&
layout
,
BroadcastChannelInfo
&
info
);
};
...
...
dnn/src/x86/elemwise/opr_impl.cpp
浏览文件 @
c59be192
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/x86/elemwise/opr_impl.h"
#include "src/x86/elemwise_op.h"
...
...
@@ -360,13 +361,14 @@ bool ElemwiseImpl::exec_binary() {
return true; \
}
{
bool
normal_case
=
is_vector
(
src1
.
layout
)
&&
is_broadcastedx_channel_like
(
src0
.
layout
,
binfo
);
bool
normal_case
=
is_vector
(
src1
.
layout
)
&&
is_broadcastedx_channel_like
<
8
>
(
src0
.
layout
,
binfo
);
bool
swap_case
=
false
;
bool
commutable
=
mode_trait
().
commutable
;
if
(
!
normal_case
&&
commutable
)
{
swap_case
=
is_vector
(
src0
.
layout
)
&&
is_broadcastedx_channel_like
(
src1
.
layout
,
binfo
);
is_broadcastedx_channel_like
<
8
>
(
src1
.
layout
,
binfo
);
}
if
((
swap_case
||
normal_case
)
&&
...
...
dnn/src/x86/elemwise_op.h
浏览文件 @
c59be192
...
...
@@ -414,7 +414,7 @@ struct OpCallerBinary<Op, SIMDType::AVX2, BCAST101x_VEC> {
const
typename
Op
::
src_ctype
*
src1
,
typename
Op
::
dst_ctype
*
dst
,
DType
src0_dtype
,
DType
src1_dtype
,
DType
dst_dtype
,
size_t
batch
,
size_t
nr_
blocks_in_channel
,
size_t
channel_stride
,
size_t
nr_
channel_blocks
,
size_t
channel_stride
,
size_t
channel_block_dim
)
{
megdnn_assert
(
channel_block_dim
==
8
,
"avx2 only support nchw88"
);
Op
op
(
src0_dtype
,
src1_dtype
,
dst_dtype
);
...
...
@@ -422,7 +422,7 @@ struct OpCallerBinary<Op, SIMDType::AVX2, BCAST101x_VEC> {
ParamElemVisitor
<
typename
Op
::
src_ctype
,
SIMDType
::
AVX2
>
vis1
;
for
(
size_t
b
=
0
;
b
<
batch
;
b
++
)
{
auto
src0_ptr
=
src0
;
for
(
size_t
cb
=
0
;
cb
<
nr_
blocks_in_channel
;
cb
++
)
{
for
(
size_t
cb
=
0
;
cb
<
nr_
channel_blocks
;
cb
++
)
{
auto
src0_block_ptr
=
src0_ptr
+
cb
*
channel_block_dim
;
auto
channel_block_vec
=
vis0
(
src0_block_ptr
);
size_t
img_index
=
0
;
...
...
@@ -451,12 +451,12 @@ struct OpCallerBinary<Op, SIMDType::NONE, BCAST101x_VEC> {
const
typename
Op
::
src_ctype
*
src1
,
typename
Op
::
dst_ctype
*
dst
,
DType
src0_dtype
,
DType
src1_dtype
,
DType
dst_dtype
,
size_t
batch
,
size_t
nr_
blocks_in_channel
,
size_t
channel_stride
,
size_t
nr_
channel_blocks
,
size_t
channel_stride
,
size_t
channel_block_dim
)
{
Op
op
(
src0_dtype
,
src1_dtype
,
dst_dtype
);
for
(
size_t
b
=
0
;
b
<
batch
;
b
++
)
{
auto
src0_ptr
=
src0
;
for
(
size_t
cb
=
0
;
cb
<
nr_
blocks_in_channel
;
cb
++
)
{
for
(
size_t
cb
=
0
;
cb
<
nr_
channel_blocks
;
cb
++
)
{
auto
src0_block_ptr
=
src0_ptr
+
cb
*
channel_block_dim
;
for
(
size_t
i
=
0
;
i
<
channel_stride
;
i
++
)
{
for
(
size_t
c_iter
=
0
;
c_iter
<
channel_block_dim
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录