Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
270b7488
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
270b7488
编写于
5月 14, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): support mk4 fp32 im2col
GitOrigin-RevId: 178d7231726c18bbc2586797f2b14e4ef3fdb969
上级
45e2beea
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
143 addition
and
164 deletion
+143
-164
dnn/src/aarch64/matrix_mul/int8/kernel_mk4_4x4x16.h
dnn/src/aarch64/matrix_mul/int8/kernel_mk4_4x4x16.h
+2
-2
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
+2
-2
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
+0
-14
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+23
-25
dnn/src/fallback/conv_bias/im2col/factory.h
dnn/src/fallback/conv_bias/im2col/factory.h
+11
-2
dnn/src/fallback/conv_bias/im2col/strategy_base.h
dnn/src/fallback/conv_bias/im2col/strategy_base.h
+5
-9
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
+19
-21
dnn/src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
...src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
+2
-14
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
+12
-13
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
+8
-8
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+12
-0
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+5
-0
dnn/src/fallback/convolution/img2col_helper.h
dnn/src/fallback/convolution/img2col_helper.h
+9
-40
dnn/test/arm_common/conv_bias_multi_thread.cpp
dnn/test/arm_common/conv_bias_multi_thread.cpp
+33
-14
未找到文件。
dnn/src/aarch64/matrix_mul/int8/kernel_mk4_4x4x16.h
浏览文件 @
270b7488
...
...
@@ -749,7 +749,7 @@ static void gemm_mk4_s8_4x4_pack_A(dt_int8* outptr, const dt_int8* inptr,
const
int8_t
*
inptr1
=
inptr0
+
ldin
;
const
int8_t
*
inptr2
=
inptr1
+
ldin
;
const
int8_t
*
inptr3
=
inptr2
+
ldin
;
int8_t
*
output
=
outptr
+
start_y
*
out_offset
;
int8_t
*
output
=
outptr
+
(
y
-
y0
)
/
4
*
out_offset
;
prefetch_2x
(
inptr0
);
prefetch_2x
(
inptr1
);
prefetch_2x
(
inptr2
);
...
...
@@ -776,7 +776,7 @@ static void gemm_mk4_s8_4x4_pack_A(dt_int8* outptr, const dt_int8* inptr,
}
for
(;
y
+
3
<
ymax
;
y
+=
4
,
start_y
++
)
{
const
int8_t
*
inptr0
=
inptr
+
start_y
*
ldin
+
k0
*
4
;
int8_t
*
output
=
outptr
+
start_y
*
out_offset
;
int8_t
*
output
=
outptr
+
(
y
-
y0
)
/
4
*
out_offset
;
prefetch_2x
(
inptr0
);
int
K
=
kmax
-
k0
;
for
(;
K
>
15
;
K
-=
16
)
{
...
...
dnn/src/armv7/matrix_mul/int8/kernel_mk4_4x2x16.h
浏览文件 @
270b7488
...
...
@@ -227,7 +227,7 @@ static void gemm_mk4_s8_4x2_pack_A(dt_int8* outptr, const dt_int8* inptr,
const
int8_t
*
inptr1
=
inptr0
+
ldin
;
const
int8_t
*
inptr2
=
inptr1
+
ldin
;
const
int8_t
*
inptr3
=
inptr2
+
ldin
;
int8_t
*
output
=
outptr
+
start_y
*
out_offset
;
int8_t
*
output
=
outptr
+
(
y
-
y0
)
/
4
*
out_offset
;
prefetch_2x
(
inptr0
);
prefetch_2x
(
inptr1
);
prefetch_2x
(
inptr2
);
...
...
@@ -254,7 +254,7 @@ static void gemm_mk4_s8_4x2_pack_A(dt_int8* outptr, const dt_int8* inptr,
}
for
(;
y
+
3
<
ymax
;
y
+=
4
,
start_y
++
)
{
const
int8_t
*
inptr0
=
inptr
+
start_y
*
ldin
+
k0
*
4
;
int8_t
*
output
=
outptr
+
start_y
*
out_offset
;
int8_t
*
output
=
outptr
+
(
y
-
y0
)
/
4
*
out_offset
;
prefetch_2x
(
inptr0
);
int
K
=
kmax
-
k0
;
for
(;
K
>
15
;
K
-=
16
)
{
...
...
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
浏览文件 @
270b7488
...
...
@@ -22,20 +22,6 @@ namespace conv1x1 {
namespace
{
size_t
get_format_pack_size
(
param
::
ConvBias
::
Format
format
)
{
switch
(
format
){
case
param
::
ConvBias
::
Format
::
NCHW44
:
case
param
::
ConvBias
::
Format
::
NCHW4
:
return
4
_z
;
case
param
::
ConvBias
::
Format
::
NCHW88
:
return
8
_z
;
case
param
::
ConvBias
::
Format
::
NCHW
:
return
1
_z
;
default:
megdnn_throw
(
"unknow pack size of the format"
);
}
}
struct
StrategyHashParam
{
ConvBiasImpl
::
NCBKernSizeParam
param
;
param
::
ConvBias
::
Format
format
;
...
...
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
270b7488
...
...
@@ -125,13 +125,10 @@ public:
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
pack_oc_size
=
1
;
size_t
pack_oc_size
=
get_format_pack_size
(
param
.
filter_meta
.
format
)
;
size_t
im2col
=
0
,
packb
=
0
,
bias_temp
=
0
;
bool
default_pack
=
matmul_algo
->
packmode
()
==
Pack_Mode
::
DEFAULT
;
megdnn_assert
(
default_pack
,
"only support default packa"
);
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
pack_oc_size
=
4
;
}
size_t
im2col_dst_size
=
IC
*
FH
*
FW
*
ohw_tile_size
*
sizeof
(
param
.
src_type
);
size_t
matmul_dst_size
=
pack_oc_size
*
oc_tile_size
*
ohw_tile_size
*
...
...
@@ -321,14 +318,17 @@ fallback::MatrixMulImpl::KernSizeParam
ConvBiasImpl
::
AlgoIm2col
::
get_matmul_kern_param
(
const
NCBKernSizeParam
&
param
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
const
{
bool
is_nchw44
=
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
;
auto
format
=
param
::
MatrixMul
::
Format
::
DEFAULT
;
size_t
pack_oc_size
=
get_format_pack_size
(
param
.
filter_meta
.
format
);
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
format
=
param
::
MatrixMul
::
Format
::
MK4
;
}
size_t
M
=
oc_tile_size
;
size_t
N
=
ohw_tile_size
;
size_t
K
=
param
.
filter_meta
.
icpg
*
param
.
filter_meta
.
spatial
[
0
]
*
param
.
filter_meta
.
spatial
[
1
];
size_t
pack_oc_size
=
is_nchw44
?
4
:
1
;
size_t
LDA
=
pack_oc_size
*
K
,
LDB
=
pack_oc_size
*
N
,
LDC
=
N
;
size_t
LDA
=
pack_oc_size
*
K
,
LDB
=
pack_oc_size
*
N
,
LDC
=
N
*
pack_oc_size
;
bool
is_dst_8bit
=
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
...
...
@@ -345,8 +345,7 @@ ConvBiasImpl::AlgoIm2col ::get_matmul_kern_param(const NCBKernSizeParam& param,
false
,
false
,
param
::
MatrixMul
::
ComputeMode
::
DEFAULT
,
is_nchw44
?
param
::
MatrixMul
::
Format
::
MK4
:
param
::
MatrixMul
::
Format
::
DEFAULT
};
format
};
}
void
ConvBiasImpl
::
AlgoIm2col
::
choice_ohw_oc_block
(
...
...
@@ -356,11 +355,7 @@ void ConvBiasImpl::AlgoIm2col::choice_ohw_oc_block(
size_t
nr_threads
=
param
.
nr_threads
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
ohw
=
param
.
osz
[
0
]
*
param
.
osz
[
1
];
//! pay attention please, should not change the 2 line code,
//! the opr use the same im2col algo, via choice_ohw_oc_block may change the
//! m_ohw_tile_size and m_oc_tile_size, if the two value changed, the
//! workspace size may change, will ocur workspace not match problem, so
//! should use the original data init them to avoid the problem
oc_tile_size
=
DEFAULT_OC_TILE_SIZE
;
ohw_tile_size
=
m_ohw_tile_size
;
...
...
@@ -505,14 +500,13 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
size_t
ohw_parallel_times
=
div_ceil
(
ohw
,
ohw_tile_size
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
size_t
packa_parallel_times
=
0
;
size_t
pack_oc_size
=
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
?
1
:
4
);
size_t
pack_oc_size
=
get_format_pack_size
(
param
.
filter_meta
.
format
);
if
(
only_packA
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
}
else
if
(
default_pack
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
m_matmul_algo
->
get_inner_block_size
().
m
*
pack_oc_size
);
OC
,
m_matmul_algo
->
get_inner_block_size
().
m
);
}
auto
matmul_param
=
get_matmul_kern_param
(
...
...
@@ -659,12 +653,16 @@ bool ConvBiasImpl::AlgoIm2col::usable(
param
.
nonlineMode
!=
megdnn
::
NonlineMode
::
IDENTITY
)
{
return
false
;
}
//! current now im2col only support int8 quantized s8 nchw44
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44
&&
(
param
.
src_type
.
enumv
()
==
param
.
filter_type
.
enumv
()
&&
(
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
Int8
)
&&
(
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
QuantizedS8
)))
{
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
//! current NCHW44 im2col only support DEFAULT mode matmul
if
(
m_matmul_algo
->
packmode
()
!=
Pack_Mode
::
DEFAULT
)
{
return
false
;
//! nchw44 hybird mode and channel wise is not support
}
else
if
(
param
.
filter_meta
.
icpg
<
4
_z
||
param
.
filter_meta
.
icpg
==
1
||
param
.
filter_meta
.
ocpg
==
1
)
{
return
false
;
}
}
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
...
...
dnn/src/fallback/conv_bias/im2col/factory.h
浏览文件 @
270b7488
...
...
@@ -221,8 +221,17 @@ public:
param
::
ConvBias
::
Format
format
=
param
.
filter_meta
.
format
;
switch
(
strategytype
)
{
case
StrategyType
::
FLOAT
:
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW
)
{
cb1
(
NCHW
,
DEFAULT
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"DefaultStrategyType::FLOAT"
_hash
);
PostprocessMode
::
FLOAT
,
"DefaultStrategyType::FLOAT"
_hash
);
}
else
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
cb1
(
NCHW44
,
DEFAULT
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"DefaultStrategyTypeNCHW44::FLOAT"
_hash
);
}
else
{
megdnn_throw
(
"not support format except nchw44 and nchw
\n
"
);
}
break
;
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case
StrategyType
::
FLOAT_FP16
:
...
...
dnn/src/fallback/conv_bias/im2col/strategy_base.h
浏览文件 @
270b7488
...
...
@@ -75,15 +75,14 @@ public:
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
,
PackMode
packmode
,
FormatMode
format
>
FormatMode
format
=
FormatMode
::
NCHW
>
class
Strategy
;
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>
:
public
StrategyBase
{
postprocess_mode
,
PackMode
::
DEFAULT
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
...
...
@@ -142,8 +141,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW44
>
:
public
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>
{
postprocess_mode
,
PackMode
::
DEFAULT
>
{
public:
const
size_t
BUNDLE_PADDING_INDEX
=
0
;
const
size_t
BUNDLE_PACKA_INDEX
=
1
;
...
...
@@ -164,8 +162,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>
:
public
StrategyBase
{
postprocess_mode
,
PackMode
::
NO_PACK
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
...
...
@@ -231,8 +228,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>
:
public
StrategyBase
{
postprocess_mode
,
PackMode
::
ONLY_PACKA
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
...
...
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
浏览文件 @
270b7488
...
...
@@ -26,7 +26,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
...
...
@@ -93,13 +93,13 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
size_t
pack_oc_size
)
{
size_t
)
{
bundle
.
set
(
param
.
workspace_ptr
);
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
...
...
@@ -112,19 +112,18 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
matmul_algo
->
get_packA_type_size
();
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packed_per_oc_block_size
;
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
(
pack_oc_size
==
4
?
0
:
a_panel_offset
);
group_id
*
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
));
matmul_algo
->
pack_A
(
matmul_param
,
a_panel
,
ncb_index
.
ndrange_id
[
1
],
matmul_algo
->
get_inner_block_size
().
m
*
pack_oc_size
);
matmul_algo
->
get_inner_block_size
().
m
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
...
...
@@ -193,7 +192,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
...
...
@@ -212,7 +211,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
...
...
@@ -249,7 +248,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
...
...
@@ -264,12 +263,12 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
?
bias_temp_ptr
:
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
bias_ptr
+
sparam
.
oc_cur_index
)));
size_t
pack_oc_size
=
sparam
.
pack_oc_size
;
PostProcess
<
op_ctype
,
op_dtype
,
postprocess_mode
>::
run
(
matmul_dst
,
bias_preprocess_ptr
,
matmul_dst
,
param
.
bias_mode
,
param
.
nonlineMode
,
param
.
bias_type
,
param
.
dst_type
,
1
_z
,
sparam
.
output_block_oc_size
,
1
_z
,
sparam
.
output_block_size
,
sparam
.
pack_oc_size
);
sparam
.
output_block_oc_size
/
pack_oc_size
,
1
_z
,
sparam
.
output_block_size
,
pack_oc_size
);
copy_dst
(
param
,
matmul_dst
,
sparam
);
}
...
...
@@ -277,7 +276,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
...
...
@@ -303,7 +302,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
get_bias_temp_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
)
{
bias_ctype
*
bias_tmp_ptr
=
...
...
@@ -318,7 +317,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
)
{
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
...
...
@@ -342,8 +341,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
#define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode, PackMode::DEFAULT, \
FormatMode::NCHW>;
_op_dtype, _postprocess_mode, PackMode::DEFAULT>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
...
...
dnn/src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
浏览文件 @
270b7488
...
...
@@ -12,10 +12,9 @@
#include "src/fallback/convolution/img2col_helper.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#elif (MEGDNN_ARMV7 || MEGDNN_AARCH64)
#include "src/arm_common/conv_bias/postprocess_helper.h"
#endif
using
namespace
megdnn
;
#if MEGDNN_X86
using
namespace
x86
;
...
...
@@ -101,23 +100,12 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#else
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_uint8
,
dt_qint32
,
dt_quint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int8
,
dt_qint32
,
dt_qint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
...
...
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
浏览文件 @
270b7488
...
...
@@ -27,7 +27,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
...
...
@@ -90,7 +90,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
...
...
@@ -110,7 +110,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
...
...
@@ -129,7 +129,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
...
...
@@ -162,7 +162,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
...
...
@@ -224,7 +224,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
...
...
@@ -252,7 +252,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
...
...
@@ -274,7 +274,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
)
{
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
...
...
@@ -298,8 +298,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
#define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode, PackMode::NO_PACK, \
FormatMode::NCHW>;
_op_dtype, _postprocess_mode, PackMode::NO_PACK>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
...
...
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
浏览文件 @
270b7488
...
...
@@ -27,7 +27,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
...
...
@@ -90,7 +90,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
...
...
@@ -124,7 +124,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
...
...
@@ -143,7 +143,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
...
...
@@ -181,7 +181,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
...
...
@@ -242,7 +242,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
...
...
@@ -283,7 +283,7 @@ template <typename src_ctype, typename bias_ctype, typename dst_ctype,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
,
FormatMode
::
NCHW
>::
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
...
...
@@ -305,7 +305,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode, \
PackMode::ONLY_PACKA
, FormatMode::NCHW
>;
PackMode::ONLY_PACKA>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
270b7488
...
...
@@ -26,6 +26,18 @@
using
namespace
megdnn
;
using
namespace
fallback
;
size_t
megdnn
::
fallback
::
get_format_pack_size
(
param
::
ConvBias
::
Format
format
)
{
switch
(
format
){
case
param
::
ConvBias
::
Format
::
NCHW44
:
case
param
::
ConvBias
::
Format
::
NCHW4
:
return
4
_z
;
case
param
::
ConvBias
::
Format
::
NCHW88
:
return
8
_z
;
default:
return
1
_z
;
}
}
namespace
{
template
<
typename
T
>
void
incr_ptr
(
T
*&
dst
,
ptrdiff_t
delta
)
{
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
270b7488
...
...
@@ -21,6 +21,11 @@
namespace
megdnn
{
namespace
fallback
{
/*!
* \brief get the pack_size according to the format
* */
size_t
get_format_pack_size
(
param
::
ConvBias
::
Format
format
);
/*!
* \brief fallback conv bias forward impl
*
...
...
dnn/src/fallback/convolution/img2col_helper.h
浏览文件 @
270b7488
...
...
@@ -9,9 +9,8 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/common/utils.h"
#if MEGDNN_ARMV7 || MEGDNN_AARCH64
#include "src/arm_common/simd_macro/marm_neon.h"
#endif
namespace
{
template
<
bool
is_xcorr
,
typename
dtype
>
...
...
@@ -268,12 +267,13 @@ void img2col_nchw4(const dtype* __restrict src, dtype* __restrict dst,
}
for
(
int
w
=
cur_remain_w
;
w
<
OW
;
w
++
)
{
size_t
index
=
ic
*
IH
*
IW
+
(
start_h
+
fh2
)
*
IW
+
(
w
+
fw2
);
dst
[
i
++
]
=
src
[
4
*
index
];
dst
[
i
++
]
=
src
[
4
*
index
+
1
];
dst
[
i
++
]
=
src
[
4
*
index
+
2
];
dst
[
i
++
]
=
src
[
4
*
index
+
3
];
size_t
index
=
4
*
(
ic
*
IH
*
IW
+
(
start_h
+
fh2
)
*
IW
+
(
w
+
fw2
));
dst
[
i
++
]
=
src
[
index
];
dst
[
i
++
]
=
src
[
index
+
1
];
dst
[
i
++
]
=
src
[
index
+
2
];
dst
[
i
++
]
=
src
[
index
+
3
];
}
for
(
int
h
=
start_h
+
1
;
h
<
end_h
;
h
++
)
{
...
...
@@ -317,26 +317,11 @@ void img2col_nchw4(const dtype* __restrict src, dtype* __restrict dst,
fh2
=
FH
-
fh
-
1
;
fw2
=
FW
-
fw
-
1
;
}
#if MEGDNN_ARMV7 || MEGDNN_AARCH64
int
w
=
cur_remain_w
;
size_t
index
=
(
ic
*
IH
*
IW
+
(
start_h
+
fh2
)
*
IW
+
(
w
+
fw2
));
for
(;
w
+
3
<
end_remain_w
;
w
+=
4
)
{
vst1q_u32
(
&
output
[
i
],
vld1q_u32
(
&
uint32_src
[
index
]));
i
+=
4
;
index
+=
4
;
}
for
(;
w
<
end_remain_w
;
w
++
)
{
output
[
i
++
]
=
uint32_src
[
index
];
}
#else
for
(
int
w
=
cur_remain_w
;
w
<
end_remain_w
;
w
++
)
{
size_t
index
=
(
ic
*
IH
*
IW
+
(
start_h
+
fh2
)
*
IW
+
(
w
+
fw2
));
output
[
i
++
]
=
uint32_src
[
index
];
}
#endif
}
}
}
...
...
@@ -360,27 +345,11 @@ void img2col_nchw4(const dtype* __restrict src, dtype* __restrict dst,
}
for
(
int
h
=
start_h
+
1
;
h
<
end_h
;
h
++
)
{
#if MEGDNN_ARMV7 || MEGDNN_AARCH64
int
ow
=
0
;
size_t
index
=
(
ic
*
IH
*
IW
+
(
h
+
fh2
)
*
IW
+
(
ow
+
fw2
));
for
(;
ow
+
3
<
OW
;
ow
+=
4
)
{
vst1q_u32
(
&
output
[
i
],
vld1q_u32
(
&
uint32_src
[
index
]));
i
+=
4
;
index
+=
4
;
}
for
(;
ow
<
OW
;
ow
++
)
{
output
[
i
++
]
=
uint32_src
[
index
++
];
}
#else
rep
(
ow
,
OW
)
{
size_t
index
=
(
ic
*
IH
*
IW
+
(
h
+
fh2
)
*
IW
+
(
ow
+
fw2
));
output
[
i
++
]
=
uint32_src
[
index
];
}
#endif
}
for
(
int
w
=
0
;
w
<
end_remain_w
;
w
++
)
{
...
...
dnn/test/arm_common/conv_bias_multi_thread.cpp
浏览文件 @
270b7488
...
...
@@ -1173,10 +1173,10 @@ void checker_conv_bias_mul_int8x8x32(std::vector<conv_bias::TestArg> args,
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
#if !__ARM_FEATURE_DOTPROD
TEST_F
(
ARM_COMMON
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44
)
{
TEST_F
(
ARM_COMMON
_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S2
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
2
,
3
,
4
,
5
,
6
,
7
},
1
,
false
,
true
,
true
);
get_nchw44_conv_bias_args
({
2
,
5
,
7
},
2
,
false
,
true
,
true
);
#define cb(name) checker_conv_bias_mul_int8x8x32(args, handle(), name);
#if MEGDNN_AARCH64
...
...
@@ -1187,10 +1187,10 @@ TEST_F(ARM_COMMON, CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_
MULTI
)
{
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_
S1
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
2
,
3
,
4
,
5
,
6
,
7
},
1
,
false
,
true
,
true
);
get_nchw44_conv_bias_args
({
3
,
4
,
6
},
1
,
false
,
true
,
true
);
#define cb(name) checker_conv_bias_mul_int8x8x32(args, handle(), name);
#if MEGDNN_AARCH64
...
...
@@ -1202,12 +1202,13 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_MULTI) {
#undef cb
}
TEST_F
(
ARM_COMMON
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44
)
{
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_S2
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
checker_conv_bias(get_nchw44_conv_bias_args({
2, 3, 4, 5, 6, 7}, 1),
\
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f),
\
checker_conv_bias(get_nchw44_conv_bias_args({
3, 4, 6}, 2), handle(), &rng,
\
epsilon, dtype::QuantizedS8(2.5f),
\
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
...
...
@@ -1220,12 +1221,12 @@ TEST_F(ARM_COMMON, CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44) {
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_
MULTI
)
{
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_
S1
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
checker_conv_bias(get_nchw44_conv_bias_args({2,
3, 4, 5, 6, 7}, 1),
\
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f),
\
checker_conv_bias(get_nchw44_conv_bias_args({2,
5, 7}, 1), handle(), &rng,
\
epsilon, dtype::QuantizedS8(2.5f),
\
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
...
...
@@ -1286,6 +1287,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) {
#undef cb
}
#if MEGDNN_AARCH64
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S1_MK4_PACK_F32
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
2
,
4
,
7
},
1
);
check_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
}
#endif
#if MEGDNN_AARCH64
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S2_MK4_PACK_F32
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
3
,
5
,
6
},
2
);
check_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
}
#endif
/***************************** Conv1x1 Algo Test ***********************/
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_1X1_S1_F32
)
{
using
namespace
conv_bias
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录