Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
09eaa398
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
09eaa398
编写于
7月 15, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/dnn): fix case fallthrough compile error for gcc7
GitOrigin-RevId: ab6c9644da676bd85d7459a9eaf3c6b1570d0e88
上级
b689b4ce
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
435 addition
and
301 deletion
+435
-301
dnn/src/armv7/matrix_mul/fp32/strategy_4x12.cpp
dnn/src/armv7/matrix_mul/fp32/strategy_4x12.cpp
+236
-228
dnn/src/armv7/matrix_mul/int16x16x32/kernel_12x4x1.h
dnn/src/armv7/matrix_mul/int16x16x32/kernel_12x4x1.h
+4
-1
dnn/src/armv7/matrix_mul/int8/kernel_4x2x16.h
dnn/src/armv7/matrix_mul/int8/kernel_4x2x16.h
+34
-33
dnn/src/armv7/matrix_mul/int8/kernel_4x8x8.h
dnn/src/armv7/matrix_mul/int8/kernel_4x8x8.h
+40
-2
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
+6
-1
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x2x16.h
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x2x16.h
+34
-33
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x8x8.h
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x8x8.h
+41
-2
dnn/src/armv7/matrix_mul/quint8/kernel_4x8x8.h
dnn/src/armv7/matrix_mul/quint8/kernel_4x8x8.h
+40
-1
未找到文件。
dnn/src/armv7/matrix_mul/fp32/strategy_4x12.cpp
浏览文件 @
09eaa398
...
...
@@ -6,12 +6,13 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/armv7/matrix_mul/fp32/strategy.h"
#include "src/armv7/matrix_mul/asm/common.h"
#include "src/arm_common/simd_macro/marm_neon.h"
#include "src/armv7/matrix_mul/asm/common.h"
#include "src/armv7/matrix_mul/fp32/strategy.h"
#include "src/common/utils.h"
using
namespace
megdnn
;
...
...
@@ -42,8 +43,8 @@ namespace {
// +--+ - - - - +--------+--------+--------+
//
// Accumulator
void
kern_4x12
(
const
float
*
packA
,
const
float
*
packB
,
int
K
,
float
*
output
,
int
LDC
,
bool
is_first_k
,
int
m_remain
)
{
void
kern_4x12
(
const
float
*
packA
,
const
float
*
packB
,
int
K
,
float
*
output
,
int
LDC
,
bool
is_first_k
,
int
m_remain
)
{
const
float
*
a_ptr
=
packA
;
const
float
*
b_ptr
=
packB
;
int
oddk
=
(
K
&
1
);
...
...
@@ -209,15 +210,14 @@ void kern_4x12(const float* packA, const float* packB, int K,
"6:
\n
"
STORE_C
:
[
a_ptr
]
"+r"
(
a_ptr
),
[
b_ptr
]
"+r"
(
b_ptr
),
[
K
]
"+r"
(
K
),
[
LDC
]
"+r"
(
LDC
),
[
is_first_k
]
"+r"
(
is_first_k
),
[
oddk
]
"+r"
(
oddk
),
[
m_remain
]
"+r"
(
m_remain
),
[
outptr
]
"+r"
(
outptr
)
[
LDC
]
"+r"
(
LDC
),
[
is_first_k
]
"+r"
(
is_first_k
),
[
oddk
]
"+r"
(
oddk
),
[
m_remain
]
"+r"
(
m_remain
),
[
outptr
]
"+r"
(
outptr
)
:
:
"d0"
,
"d1"
,
"d2"
,
"d3"
,
"d4"
,
"d5"
,
"d6"
,
"d7"
,
"d8
"
,
"d9"
,
"d10"
,
"d11"
,
"d12"
,
"d13"
,
"d14"
,
"d15"
,
"d16
"
,
"d17"
,
"d18"
,
"d19"
,
"d20"
,
"d21"
,
"d22"
,
"d23"
,
"d24
"
,
"d25"
,
"d26"
,
"d27"
,
"d28"
,
"d29"
,
"d30"
,
"d31"
,
"r1
"
,
"r2"
,
"r3"
,
"r9"
,
"r10"
,
"cc"
,
"memory"
);
:
"d0"
,
"d1"
,
"d2"
,
"d3"
,
"d4"
,
"d5"
,
"d6"
,
"d7"
,
"d8"
,
"d9"
,
"d10
"
,
"d11"
,
"d12"
,
"d13"
,
"d14"
,
"d15"
,
"d16"
,
"d17"
,
"d18"
,
"d19
"
,
"d20"
,
"d21"
,
"d22"
,
"d23"
,
"d24"
,
"d25"
,
"d26"
,
"d27"
,
"d28
"
,
"d29"
,
"d30"
,
"d31"
,
"r1"
,
"r2"
,
"r3"
,
"r9"
,
"r10"
,
"cc
"
,
"memory"
);
#undef LOAD_LINE
#undef LOAD_C
...
...
@@ -388,20 +388,20 @@ void kern_4x4(const float* packA, const float* packB, int K, float* output,
"6:
\n
"
STORE_C
:
[
a_ptr
]
"+r"
(
a_ptr
),
[
b_ptr
]
"+r"
(
b_ptr
),
[
K
]
"+r"
(
K
),
[
LDC
]
"+r"
(
LDC
),
[
is_first_k
]
"+r"
(
is_first_
k
),
[
oddk
]
"+r"
(
oddk
),
[
m_remain
]
"+r"
(
m
_remain
),
[
n_remain
]
"+r"
(
n_remain
),
[
outptr
]
"+r"
(
outptr
)
[
LDC
]
"+r"
(
LDC
),
[
is_first_k
]
"+r"
(
is_first_k
),
[
oddk
]
"+r"
(
odd
k
),
[
m_remain
]
"+r"
(
m_remain
),
[
n_remain
]
"+r"
(
n
_remain
),
[
outptr
]
"+r"
(
outptr
)
:
:
"d0"
,
"d1"
,
"d2"
,
"d3"
,
"d4"
,
"d5"
,
"d6"
,
"d7"
,
"d8
"
,
"d9"
,
"d10"
,
"d11"
,
"d12"
,
"d13"
,
"d14"
,
"d15"
,
"r1
"
,
"r2"
,
"r3"
,
"r10"
,
"cc"
,
"memory"
);
:
"d0"
,
"d1"
,
"d2"
,
"d3"
,
"d4"
,
"d5"
,
"d6"
,
"d7"
,
"d8"
,
"d9"
,
"d10
"
,
"d11"
,
"d12"
,
"d13"
,
"d14"
,
"d15"
,
"r1"
,
"r2"
,
"r3"
,
"r10"
,
"cc
"
,
"memory"
);
#undef LOAD_LINE
#undef LOAD_C
#undef STORE_LINE
#undef STORE_C
}
void
sgemm_4x12_pack_A_n
(
float
*
outptr
,
const
float
*
inptr
,
int
ldin
,
int
y0
,
void
sgemm_4x12_pack_A_n
(
float
*
outptr
,
const
float
*
inptr
,
int
ldin
,
int
y0
,
int
ymax
,
int
k0
,
int
kmax
)
{
float
zerobuff
[
4
];
std
::
memset
(
zerobuff
,
0
,
sizeof
(
float
)
*
4
);
...
...
@@ -444,8 +444,10 @@ void sgemm_4x12_pack_A_n(float * outptr, const float * inptr, int ldin, int y0,
/* Everything falls through in here */
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -463,8 +465,10 @@ void sgemm_4x12_pack_A_n(float * outptr, const float * inptr, int ldin, int y0,
/* Everything falls through in here */
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -530,8 +534,8 @@ void sgemm_4x12_pack_A_t(float* out, const float* in, int ldin, int x0,
}
}
void
sgemm_4x12_pack_B_n
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
x
0
,
int
x
max
,
int
k0
,
int
kmax
)
{
void
sgemm_4x12_pack_B_n
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
x0
,
int
xmax
,
int
k0
,
int
kmax
)
{
int
ksize
=
kmax
-
k0
;
int
ksize12
=
ksize
*
12
;
int
ksize4
=
(
ksize
<<
2
);
...
...
@@ -600,8 +604,8 @@ void sgemm_4x12_pack_B_n(float* out, const float* in, int ldin,
}
}
void
sgemm_4x12_pack_B_t
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
y
0
,
int
y
max
,
int
k0
,
int
kmax
)
{
void
sgemm_4x12_pack_B_t
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
y0
,
int
ymax
,
int
k0
,
int
kmax
)
{
float
*
outptr
=
out
;
const
float
*
inptr
=
in
;
float
zerobuff
[
4
];
...
...
@@ -660,8 +664,10 @@ void sgemm_4x12_pack_B_t(float* out, const float* in, int ldin,
/* Everything falls through in here */
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -679,8 +685,10 @@ void sgemm_4x12_pack_B_t(float* out, const float* in, int ldin,
/* Everything falls through in here */
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -697,8 +705,8 @@ void sgemm_4x12_pack_B_t(float* out, const float* in, int ldin,
MEGDNN_REG_GEMM_STRATEGY_IMPL
(
sgemm_4x12
);
void
sgemm_4x12
::
pack_A
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
y0
,
int
ymax
,
int
k0
,
int
kmax
,
bool
transpose_A
)
const
{
void
sgemm_4x12
::
pack_A
(
float
*
out
,
const
float
*
in
,
int
ldin
,
int
y0
,
int
ymax
,
int
k0
,
int
kmax
,
bool
transpose_A
)
const
{
if
(
transpose_A
)
{
sgemm_4x12_pack_A_t
(
out
,
in
,
ldin
,
y0
,
ymax
,
k0
,
kmax
);
}
else
{
...
...
@@ -715,9 +723,9 @@ void sgemm_4x12::pack_B(float* out, const float* in, int ldin, int x0, int xmax,
}
}
void
sgemm_4x12
::
kern
(
const
float
*
packA
,
const
float
*
packB
,
size_t
M
,
size_t
N
,
size_t
K
,
float
*
C
,
size_t
LDC
,
bool
is_first_k
,
const
float
*
,
float
*
)
const
{
void
sgemm_4x12
::
kern
(
const
float
*
packA
,
const
float
*
packB
,
size_t
M
,
size_t
N
,
size_t
K
,
float
*
C
,
size_t
LDC
,
bool
is_first_k
,
const
float
*
,
float
*
)
const
{
megdnn_assert
(
A_dtype
.
enumv
()
==
B_dtype
.
enumv
()
&&
A_dtype
.
enumv
()
==
C_dtype
.
enumv
()
&&
A_dtype
.
enumv
()
==
DTypeEnum
::
Float32
);
...
...
dnn/src/armv7/matrix_mul/int16x16x32/kernel_12x4x1.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -988,8 +989,10 @@ static void gemm_s16x16x32_12x4_transpose_pack_B_n(dt_int16* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/int8/kernel_4x2x16.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -330,9 +331,9 @@ static void gemm_s8_4x2_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
if
(
y
+
3
>=
ymax
)
{
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -347,9 +348,9 @@ static void gemm_s8_4x2_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
if
(
y
+
3
>=
ymax
)
{
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -390,19 +391,19 @@ static void gemm_s8_4x2_pack_A_t(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -421,19 +422,19 @@ static void gemm_s8_4x2_pack_A_t(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -488,19 +489,19 @@ static void gemm_s8_4x2_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -519,19 +520,19 @@ static void gemm_s8_4x2_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/int8/kernel_4x8x8.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -378,7 +379,6 @@ static void kern_4x4(const int8_t* packA, const int8_t* packB, int K,
#undef STORE_C
}
static
void
gemm_s8_4x8_pack_A_n
(
dt_int8
*
outptr
,
const
dt_int8
*
inptr
,
int
ldin
,
int
y0
,
int
ymax
,
int
k0
,
int
kmax
)
{
int8_t
zerobuff
[
16
];
...
...
@@ -402,8 +402,10 @@ static void gemm_s8_4x8_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -420,8 +422,10 @@ static void gemm_s8_4x8_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -472,16 +476,22 @@ static void gemm_s8_4x8_transpose_pack_A_n(dt_int8* out, const dt_int8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -500,16 +510,22 @@ static void gemm_s8_4x8_transpose_pack_A_n(dt_int8* out, const dt_int8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -565,16 +581,22 @@ static void gemm_s8_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -594,16 +616,22 @@ static void gemm_s8_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -623,16 +651,22 @@ static void gemm_s8_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -710,8 +744,10 @@ static void gemm_s8_4x8_transpose_pack_B_n(dt_int8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -729,8 +765,10 @@ static void gemm_s8_4x8_transpose_pack_B_n(dt_int8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -323,8 +324,10 @@ static void gemm_mk4_s8_4x2_pack_B(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
3
-
ICB
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -340,8 +343,10 @@ static void gemm_mk4_s8_4x2_pack_B(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
3
-
ICB
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x2x16.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -278,9 +279,9 @@ static void gemm_s8x8x16_4x2_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
if
(
y
+
3
>=
ymax
)
{
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -295,9 +296,9 @@ static void gemm_s8x8x16_4x2_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
if
(
y
+
3
>=
ymax
)
{
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -338,19 +339,19 @@ static void gemm_s8x8x16_4x2_pack_A_t(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -369,19 +370,19 @@ static void gemm_s8x8x16_4x2_pack_A_t(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -436,19 +437,19 @@ static void gemm_s8x8x16_4x2_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -467,19 +468,19 @@ static void gemm_s8x8x16_4x2_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
if
(
remain
>=
0
)
{
switch
(
remain
)
{
case
7
:
inptr0
=
zerobuff
;
inptr0
=
zerobuff
;
MEGDNN_FALLTHRU
case
6
:
inptr1
=
zerobuff
;
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/int8x8x16/kernel_4x8x8.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -76,7 +77,7 @@ static void kern_4x8(const int8_t* packA, const int8_t* packB, int K,
STORE_LINE("14", "15", "3") \
"101:\n"
// clang-format on
// clang-format on
register
int16_t
*
outptr
asm
(
"r0"
)
=
output
;
asm
volatile
(
...
...
@@ -406,8 +407,10 @@ static void gemm_s8x8x16_4x8_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -424,8 +427,10 @@ static void gemm_s8x8x16_4x8_pack_A_n(dt_int8* outptr, const dt_int8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -476,16 +481,22 @@ static void gemm_s8x8x16_4x8_transpose_pack_A_n(dt_int8* out, const dt_int8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -504,16 +515,22 @@ static void gemm_s8x8x16_4x8_transpose_pack_A_n(dt_int8* out, const dt_int8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -569,16 +586,22 @@ static void gemm_s8x8x16_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -598,16 +621,22 @@ static void gemm_s8x8x16_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -627,16 +656,22 @@ static void gemm_s8x8x16_4x8_pack_B_n(dt_int8* out, const dt_int8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -715,8 +750,10 @@ static void gemm_s8x8x16_4x8_transpose_pack_B_n(dt_int8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -734,8 +771,10 @@ static void gemm_s8x8x16_4x8_transpose_pack_B_n(dt_int8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
dnn/src/armv7/matrix_mul/quint8/kernel_4x8x8.h
浏览文件 @
09eaa398
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/simd_macro/marm_neon.h"
...
...
@@ -453,8 +454,10 @@ static void gemm_u8_4x8_pack_A_n(dt_uint8* outptr, const dt_uint8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -471,8 +474,10 @@ static void gemm_u8_4x8_pack_A_n(dt_uint8* outptr, const dt_uint8* inptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -523,16 +528,22 @@ static void gemm_u8_4x8_transpose_pack_A_n(dt_uint8* out, const dt_uint8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -551,16 +562,22 @@ static void gemm_u8_4x8_transpose_pack_A_n(dt_uint8* out, const dt_uint8* in,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -617,16 +634,22 @@ static void gemm_u8_4x8_pack_B_n(dt_uint8* out, const dt_uint8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -646,16 +669,22 @@ static void gemm_u8_4x8_pack_B_n(dt_uint8* out, const dt_uint8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -675,16 +704,22 @@ static void gemm_u8_4x8_pack_B_n(dt_uint8* out, const dt_uint8* in, int ldin,
switch
(
k
+
7
-
kmax
)
{
case
6
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
5
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
4
:
inptr3
=
zerobuff
;
MEGDNN_FALLTHRU
case
3
:
inptr4
=
zerobuff
;
MEGDNN_FALLTHRU
case
2
:
inptr5
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr6
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr7
=
zerobuff
;
break
;
...
...
@@ -763,8 +798,10 @@ static void gemm_u8_4x8_transpose_pack_B_n(dt_uint8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
@@ -782,8 +819,10 @@ static void gemm_u8_4x8_transpose_pack_B_n(dt_uint8* outptr,
switch
(
y
+
3
-
ymax
)
{
case
2
:
inptr1
=
zerobuff
;
MEGDNN_FALLTHRU
case
1
:
inptr2
=
zerobuff
;
MEGDNN_FALLTHRU
case
0
:
inptr3
=
zerobuff
;
break
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录