Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
edd7e167
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4704
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
edd7e167
编写于
7月 02, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
8月 04, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): add im2col filterpreprocess function
GitOrigin-RevId: 61c54ad258a42301711d3efdae0caef47d7b0584
上级
9e9e8ca0
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
1207 addition
and
252 deletion
+1207
-252
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+358
-193
dnn/src/fallback/conv_bias/im2col/algos.h
dnn/src/fallback/conv_bias/im2col/algos.h
+9
-24
dnn/src/fallback/conv_bias/im2col/strategy_base.h
dnn/src/fallback/conv_bias/im2col/strategy_base.h
+6
-4
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
+17
-8
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
+1
-1
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
+21
-14
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+2
-1
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+5
-0
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+3
-1
dnn/test/arm_common/conv_bias_multi_thread.cpp
dnn/test/arm_common/conv_bias_multi_thread.cpp
+471
-4
dnn/test/common/conv_bias.cpp
dnn/test/common/conv_bias.cpp
+24
-0
dnn/test/common/conv_bias.h
dnn/test/common/conv_bias.h
+4
-1
dnn/test/x86/conv_bias.cpp
dnn/test/x86/conv_bias.cpp
+286
-1
未找到文件。
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
edd7e167
...
...
@@ -31,35 +31,10 @@ using namespace im2col;
* *Through witch can convenient get the needed ptr
*/
struct
Im2colBundelIndex
{
static
constexpr
size_t
BUNDLE_PADDING_INDEX
=
0
_z
;
static
constexpr
size_t
BUNDLE_PACKA_INDEX
=
1
_z
;
static
constexpr
size_t
BUNDLE_THREAD_INDEX
=
2
_z
;
};
using
Pack_Mode
=
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
;
//! Process one input channel copy padding
static
void
copy_padding_kern
(
WorkspaceBundle
&
bundle
,
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
StrategyBase
*
im2colstrategy
,
size_t
pack_oc_size
)
{
im2colstrategy
->
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
pack_oc_size
);
}
//! packA_kern
static
void
packA_kern
(
WorkspaceBundle
&
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
StrategyBase
*
im2colstrategy
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
pack_oc_size
)
{
im2colstrategy
->
packA_kern
(
bundle
,
param
,
matmulparam
,
matmul_algo
,
ncb_index
,
matmul_desc
,
pack_oc_size
);
}
/*!
* *\brief Im2colKerns collects all the im2col kerns in it
*/
...
...
@@ -124,8 +99,8 @@ public:
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
im2col_kern_param
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
...
...
@@ -205,8 +180,8 @@ public:
}
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
im2col_kern_param
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
...
...
@@ -288,8 +263,8 @@ public:
}
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
im2col_kern_param
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
...
...
@@ -322,15 +297,16 @@ public:
}
};
fallback
::
MatrixMulImpl
::
KernSizeParam
ConvBiasImpl
::
AlgoIm2col
::
get_matmul_kern_param
(
const
NCBKernSizeParam
&
param
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
const
{
namespace
{
static
fallback
::
MatrixMulImpl
::
KernSizeParam
get_matmul_kern_param
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
auto
format
=
param
::
MatrixMul
::
Format
::
DEFAULT
;
size_t
pack_oc_size
=
pack_size
(
param
.
filter_meta
.
format
);
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
format
=
param
::
MatrixMul
::
Format
::
MK4
;
}
else
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
){
}
else
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
format
=
param
::
MatrixMul
::
Format
::
MK4_DOT
;
}
size_t
M
=
oc_tile_size
;
...
...
@@ -358,10 +334,23 @@ ConvBiasImpl::AlgoIm2col ::get_matmul_kern_param(const NCBKernSizeParam& param,
format
};
}
void
ConvBiasImpl
::
AlgoIm2col
::
choice_ohw_oc_block
(
const
NCBKernSizeParam
&
param
,
size_t
&
oc_tile_size
,
size_t
&
ohw_tile_size
,
size_t
block_m
,
size_t
block_n
,
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
)
const
{
static
void
choice_ohw_oc_block
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
size_t
&
oc_tile_size
,
size_t
&
ohw_tile_size
,
size_t
block_m
,
size_t
block_n
,
const
size_t
m_ohw_tile_size
,
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
)
{
//! calculate m_oc_tile_size in choice_ohw_oc_block() fucntion,
//! when ohw_tile_size < this value ohw_tile_size = ohw
static
constexpr
size_t
DEFAULT_OHW_MIN_TILE_SIZE
=
32
;
//! when nr_threads > 1 and round(ohw,nr_threads)>nr_threads,
//! oc_tile_size = DEFAULT_OC_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_TILE_SIZE
=
512
;
//! when oc_tile_size > this value m_oc_tile_size =
//! DEFAULT_OC_MAX_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_MAX_TILE_SIZE
=
1024
;
//! when oc_tile_size < this value oc_tile_size =
//! DEFAULT_OC_MIN_TILE_SIZE the purpose is aligning the calculation
static
constexpr
size_t
DEFAULT_OC_MIN_TILE_SIZE
=
128
;
size_t
nr_threads
=
param
.
nr_threads
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
ohw
=
param
.
osz
[
0
]
*
param
.
osz
[
1
];
...
...
@@ -393,8 +382,74 @@ void ConvBiasImpl::AlgoIm2col::choice_ohw_oc_block(
}
}
WorkspaceBundle
ConvBiasImpl
::
AlgoIm2col
::
get_bundle
(
const
NCBKernSizeParam
&
param
)
const
{
static
size_t
packA_group_size
(
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
packa_parallel_times
)
{
if
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
)
{
return
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
}
else
if
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
{
return
packa_parallel_times
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
}
megdnn_assert
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
);
//! nopack mode return 0;
return
0
;
}
static
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
oc_tile_size
,
size_t
ohw_tile_size
)
{
if
(
matmul_desc
.
packmode
==
Pack_Mode
::
DEFAULT
)
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_bundle_dft"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
DEFAULT
>
defaultkern
;
return
defaultkern
.
get_thread_bundle
(
param
,
matmul_param
,
matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
else
if
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_bundle_onlypacka"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>
onlypackakern
;
return
onlypackakern
.
get_thread_bundle
(
param
,
matmul_param
,
matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
else
{
megdnn_assert
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
);
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_thread_bundle_nopack"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
NO_PACK
>
nopackkern
;
return
nopackkern
.
get_thread_bundle
(
param
,
matmul_param
,
matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
return
{
nullptr
,
{}};
}
static
WorkspaceBundle
get_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
oc_tile_size
,
size_t
ohw_tile_size
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
param
);
MEGDNN_MARK_USED_VAR
(
OC
);
MEGDNN_MARK_USED_VAR
(
OH
);
...
...
@@ -410,23 +465,20 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
size_t
padding
=
0
,
packa_size
=
0
,
packa_group_size
=
0
;
size_t
nr_threads
=
param
.
nr_threads
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
mdesc
=
m_matmul_algo
->
matmul_description
();
bool
need_pack
=
mdesc
.
packmode
==
Pack_Mode
::
DEFAULT
;
bool
only_packA
=
mdesc
.
packmode
==
Pack_Mode
::
ONLY_PACKA
;
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
choice_ohw_oc_block
(
param
,
oc_tile_size
,
ohw_tile_size
,
mdesc
.
innerblocksize
.
m
,
mdesc
.
innerblocksize
.
n
,
mdesc
.
packmode
);
if
(
need_pack
||
only_packA
)
{
auto
im2col_kern_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
only_packA
?
oc_tile_size
:
OC
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
WorkspaceBundle
wb
=
m_matmul_algo
->
get_bundle
(
im2col_kern_param
);
packa_group_size
=
only_packA
?
oc_parallel_times
*
wb
.
get_size
(
0
)
:
wb
.
get_size
(
0
);
}
else
{
//! not support pack,not need pack
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
matmul_desc
=
matmul_algo
->
matmul_description
();
bool
default_pack
=
matmul_desc
.
packmode
==
Pack_Mode
::
DEFAULT
;
//! packmode is default should use oc
//! packmode is onlypackA should use oc_tile_size
auto
im2col_kern_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
default_pack
?
OC
:
oc_tile_size
);
if
(
is_enable_filter_preprocess
(
param
))
{
packa_group_size
=
0
;
}
else
{
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
packa_group_size
=
packA_group_size
(
matmul_algo
,
im2col_kern_param
,
matmul_desc
,
oc_parallel_times
);
}
if
(
no_need_pading
)
{
...
...
@@ -437,50 +489,27 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
}
packa_size
=
GROUP
*
packa_group_size
;
//! for packA size = GROUP * a_size
WorkspaceBundle
ws
=
{
nullptr
,
{}};
auto
im2col_kern_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
oc_tile_size
);
if
(
m_matmul_algo
->
packmode
()
==
Pack_Mode
::
DEFAULT
)
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_bundle_dft"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
DEFAULT
>
defaultkern
;
ws
=
defaultkern
.
get_thread_bundle
(
param
,
im2col_kern_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
else
if
(
m_matmul_algo
->
packmode
()
==
Pack_Mode
::
ONLY_PACKA
)
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_bundle_packa"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>
onlypackakern
;
ws
=
onlypackakern
.
get_thread_bundle
(
param
,
im2col_kern_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
else
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::get_bundle_other"
_hash
))
{
Im2colKerns
<
Pack_Mode
::
NO_PACK
>
nopackkern
;
ws
=
nopackkern
.
get_thread_bundle
(
param
,
im2col_kern_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
MIDOUT_END
();
}
WorkspaceBundle
ws
=
get_thread_bundle
(
param
,
matmul_algo
,
im2col_kern_param
,
matmul_desc
,
oc_tile_size
,
ohw_tile_size
);
return
{
nullptr
,
{
padding
,
packa_size
,
ws
.
total_size_in_bytes
()
*
nr_threads
}};
}
}
// namespace
size_t
ConvBiasImpl
::
AlgoIm2col
::
get_workspace
(
const
NCBKernSizeParam
&
p
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
0
,
0
)
{
return
get_bundle
(
p
).
total_size_in_bytes
();
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
matmul_desc
=
m_matmul_algo
->
matmul_description
();
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
choice_ohw_oc_block
(
p
,
oc_tile_size
,
ohw_tile_size
,
matmul_desc
.
innerblocksize
.
m
,
matmul_desc
.
innerblocksize
.
n
,
m_ohw_tile_size
,
matmul_desc
.
packmode
);
return
get_bundle
(
p
,
m_matmul_algo
,
oc_tile_size
,
ohw_tile_size
)
.
total_size_in_bytes
();
}
MIDOUT_END
();
return
0
;
...
...
@@ -499,22 +528,21 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
size_t
ohw
=
OH
*
OW
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
WorkspaceBundle
bundle
=
get_bundle
(
param
);
WorkspaceBundle
bundle_thread
=
{
nullptr
,
{}};
bool
need_padding
=
(
PH
!=
0
||
PW
!=
0
);
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
mdesc
=
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
m
atmul_
desc
=
m_matmul_algo
->
matmul_description
();
Pack_Mode
packmode
=
mdesc
.
packmode
;
bool
default_pack
=
packmode
==
Pack_Mode
::
DEFAULT
;
bool
no_pack
=
packmode
==
Pack_Mode
::
NO_PACK
;
bool
only_packA
=
packmode
==
Pack_Mode
::
ONLY_PACKA
;
bool
default_pack
=
matmul_desc
.
packmode
==
Pack_Mode
::
DEFAULT
;
bool
no_pack
=
matmul_desc
.
packmode
==
Pack_Mode
::
NO_PACK
;
bool
only_packA
=
matmul_desc
.
packmode
==
Pack_Mode
::
ONLY_PACKA
;
bool
enable_filter_preprocess
=
is_enable_filter_preprocess
(
param
);
choice_ohw_oc_block
(
param
,
oc_tile_size
,
ohw_tile_size
,
mdesc
.
innerblocksize
.
m
,
mdesc
.
innerblocksize
.
n
,
mdesc
.
packmode
);
matmul_desc
.
innerblocksize
.
m
,
matmul_desc
.
innerblocksize
.
n
,
m_ohw_tile_size
,
matmul_desc
.
packmode
);
WorkspaceBundle
bundle
=
get_bundle
(
param
,
m_matmul_algo
,
oc_tile_size
,
ohw_tile_size
);
size_t
ohw_parallel_times
=
div_ceil
(
ohw
,
ohw_tile_size
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
size_t
packa_parallel_times
=
0
;
...
...
@@ -523,28 +551,16 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
if
(
only_packA
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
}
else
if
(
default_pack
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
mdesc
.
innerblocksize
.
m
);
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
matmul_desc
.
innerblocksize
.
m
);
}
auto
matmul_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
only_packA
?
oc_tile_size
:
OC
);
if
(
mdesc
.
packmode
==
Pack_Mode
::
DEFAULT
)
{
Im2colKerns
<
Pack_Mode
::
DEFAULT
>
defaultkern
;
bundle_thread
=
defaultkern
.
get_thread_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
else
if
(
mdesc
.
packmode
==
Pack_Mode
::
ONLY_PACKA
)
{
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>
onlypackakern
;
bundle_thread
=
onlypackakern
.
get_thread_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
else
{
Im2colKerns
<
Pack_Mode
::
NO_PACK
>
nopackkern
;
bundle_thread
=
nopackkern
.
get_thread_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
ohw_tile_size
,
oc_tile_size
);
}
param
,
ohw_tile_size
,
default_pack
?
OC
:
oc_tile_size
);
WorkspaceBundle
bundle_thread
=
get_thread_bundle
(
param
,
m_matmul_algo
,
matmul_param
,
matmul_desc
,
oc_tile_size
,
ohw_tile_size
);
StrategyParam
strategyparam
;
strategyparam
.
ohw
=
ohw
;
strategyparam
.
is_dst_8bit
=
...
...
@@ -557,6 +573,9 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
strategyparam
.
is_ohw_size_bigger
&&
!
strategyparam
.
is_dst_8bit
;
strategyparam
.
oc_tile_size
=
oc_tile_size
;
strategyparam
.
pack_oc_size
=
pack_oc_size
;
strategyparam
.
enable_filter_preprocess
=
enable_filter_preprocess
;
strategyparam
.
packA_group_size
=
packA_group_size
(
m_matmul_algo
,
matmul_param
,
matmul_desc
,
packa_parallel_times
);
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
MIDOUT_BEGIN
(
...
...
@@ -569,88 +588,126 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
im2colstrategy
,
pack_oc_size
);
im2colstrategy
->
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
pack_oc_size
);
};
auto
kern_packA
=
[
bundle
,
matmul_algo
=
m_matmul_algo
,
matmul_param
,
im2colstrategy
,
pack_oc_size
=
pack_oc_size
,
mdesc
=
mdesc
](
strategyparam
=
strategyparam
,
matmul_desc
=
matmul_desc
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
packA_kern
(
bundle
,
param
,
matmul_param
,
matmul_algo
,
ncb_index
,
im2colstrategy
,
mdesc
,
pack_oc_size
);
im2colstrategy
->
packA_kern
(
bundle
,
param
,
matmul_param
,
matmul_algo
,
ncb_index
,
matmul_desc
,
strategyparam
);
};
if
(
default_pack
)
{
auto
kern_compute_default
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
ohw_tile_size
=
ohw_tile_size
,
strategyparam
=
strategyparam
,
matmul_desc
=
mdesc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
DEFAULT
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
if
(
need_padding
)
{
ret_kern
.
push_back
({
kern_padding
,
{
param
.
n
,
GROUP
,
IC
/
pack_oc_size
}});
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::dispatch_kerns_default_pack"
_hash
))
{
auto
kern_compute_default
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
ohw_tile_size
=
ohw_tile_size
,
strategyparam
=
strategyparam
,
matmul_desc
=
matmul_desc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
DEFAULT
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
if
(
!
enable_filter_preprocess
)
{
ret_kern
.
push_back
(
{
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
}
if
(
need_padding
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
param
.
n
,
GROUP
,
IC
/
pack_oc_size
}});
}
ret_kern
.
push_back
({
kern_compute_default
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
ret_kern
.
push_back
(
{
kern_compute_default
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
MIDOUT_END
();
return
{};
}
else
if
(
only_packA
)
{
auto
kern_compute_onlypackA
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
strategyparam
=
strategyparam
,
ohw_tile_size
=
ohw_tile_size
,
matmul_desc
=
mdesc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
if
(
need_padding
)
{
ret_kern
.
push_back
({
kern_padding
,
{
param
.
n
,
GROUP
,
IC
}});
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::dispatch_kerns_onlypacka"
_hash
))
{
auto
kern_compute_onlypackA
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
strategyparam
=
strategyparam
,
ohw_tile_size
=
ohw_tile_size
,
matmul_desc
=
matmul_desc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
if
(
!
enable_filter_preprocess
)
{
ret_kern
.
push_back
(
{
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
}
if
(
need_padding
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
param
.
n
,
GROUP
,
IC
}});
}
ret_kern
.
push_back
({
kern_compute_onlypackA
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
ret_kern
.
push_back
(
{
kern_compute_onlypackA
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
MIDOUT_END
();
return
{};
}
else
if
(
no_pack
)
{
auto
kern_compute_nopack
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
strategyparam
=
strategyparam
,
ohw_tile_size
=
ohw_tile_size
,
matmul_desc
=
mdesc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
NO_PACK
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
if
(
need_padding
)
{
ret_kern
.
push_back
({
kern_padding
,
{
param
.
n
,
GROUP
,
IC
}});
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::dispatch_kerns_no_pack"
_hash
))
{
auto
kern_compute_nopack
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
=
m_matmul_algo
,
strategyparam
=
strategyparam
,
ohw_tile_size
=
ohw_tile_size
,
matmul_desc
=
matmul_desc
,
im2colstrategy
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
Im2colKerns
<
Pack_Mode
::
NO_PACK
>::
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
if
(
need_padding
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
param
.
n
,
GROUP
,
IC
}});
}
ret_kern
.
push_back
({
kern_compute_nopack
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
ret_kern
.
push_back
(
{
kern_compute_nopack
,
{
N
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
MIDOUT_END
();
return
{};
}
return
ret_kern
;
return
{}
;
}
MIDOUT_END
();
return
{};
...
...
@@ -694,12 +751,19 @@ bool ConvBiasImpl::AlgoIm2col::usable(
return
false
;
}
}
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
mdesc
=
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
m
atmul_
desc
=
m_matmul_algo
->
matmul_description
();
//! only matmul's packmode is packa or default support weight preprocess
if
(
is_enable_filter_preprocess
(
param
)
&&
(
matmul_desc
.
packmode
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
))
{
return
false
;
}
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
//! current NCHW44 im2col only support DEFAULT mode matmul
if
(
mdesc
.
packmode
!=
Pack_Mode
::
DEFAULT
)
{
if
(
m
atmul_
desc
.
packmode
!=
Pack_Mode
::
DEFAULT
)
{
return
false
;
//! nchw44 hybird mode and channel wise is not support
}
else
if
(
param
.
filter_meta
.
icpg
<
4
_z
||
...
...
@@ -711,8 +775,9 @@ bool ConvBiasImpl::AlgoIm2col::usable(
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
choice_ohw_oc_block
(
param
,
oc_tile_size
,
ohw_tile_size
,
mdesc
.
innerblocksize
.
m
,
mdesc
.
innerblocksize
.
n
,
m_matmul_algo
->
packmode
());
matmul_desc
.
innerblocksize
.
m
,
matmul_desc
.
innerblocksize
.
n
,
m_ohw_tile_size
,
matmul_desc
.
packmode
);
fallback
::
MatrixMulImpl
::
KernSizeParam
matmul_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
oc_tile_size
);
bool
matmulusable
=
m_matmul_algo
->
usable
(
matmul_param
);
...
...
@@ -731,4 +796,104 @@ bool ConvBiasImpl::AlgoIm2col::usable(
return
false
;
}
SmallVector
<
TensorLayout
>
ConvBiasImpl
::
AlgoIm2col
::
deduce_preprocessed_filter_layout
(
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
midout_iv
(
"ConvBiasImpl::AlgoIm2col::deduce_preprocessed_filter_layout"
_hash
))
{
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
matmul_desc
=
m_matmul_algo
->
matmul_description
();
//! only support default_pack and only_packa mode
if
(
matmul_desc
.
packmode
==
Pack_Mode
::
NO_PACK
)
{
return
{};
}
size_t
GROUP
=
param
.
filter_meta
.
group
;
bool
default_pack
=
matmul_desc
.
packmode
==
Pack_Mode
::
DEFAULT
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
SmallVector
<
TensorLayout
>
preprocessed_layouts
;
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
choice_ohw_oc_block
(
param
,
oc_tile_size
,
ohw_tile_size
,
matmul_desc
.
innerblocksize
.
m
,
matmul_desc
.
innerblocksize
.
n
,
m_ohw_tile_size
,
matmul_desc
.
packmode
);
auto
matmul_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
default_pack
?
OC
:
oc_tile_size
);
size_t
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
default_pack
?
matmul_desc
.
innerblocksize
.
m
:
oc_tile_size
);
size_t
packa_group_size
=
packA_group_size
(
m_matmul_algo
,
matmul_param
,
matmul_desc
,
packa_parallel_times
);
preprocessed_layouts
.
push_back
(
{{
GROUP
,
packa_group_size
},
dtype
::
Int8
()});
return
preprocessed_layouts
;
}
MIDOUT_END
();
return
{};
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoIm2col
::
dispatch_preprocess_kerns
(
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
0
,
3
)
{
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
oc_tile_size
=
0
,
ohw_tile_size
=
0
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
matmul_desc
=
m_matmul_algo
->
matmul_description
();
choice_ohw_oc_block
(
param
,
oc_tile_size
,
ohw_tile_size
,
matmul_desc
.
innerblocksize
.
m
,
matmul_desc
.
innerblocksize
.
n
,
m_ohw_tile_size
,
matmul_desc
.
packmode
);
WorkspaceBundle
bundle
=
get_bundle
(
param
,
m_matmul_algo
,
oc_tile_size
,
ohw_tile_size
);
Pack_Mode
packmode
=
matmul_desc
.
packmode
;
bool
default_pack
=
packmode
==
Pack_Mode
::
DEFAULT
;
bool
only_packA
=
packmode
==
Pack_Mode
::
ONLY_PACKA
;
size_t
packa_parallel_times
=
0
;
if
(
only_packA
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
}
else
if
(
default_pack
)
{
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
matmul_desc
.
innerblocksize
.
m
);
}
else
{
//! if nopack return null so that OprWeightPreprocessProxy can run
//! with nopack mode
return
{};
}
auto
matmul_param
=
get_matmul_kern_param
(
param
,
ohw_tile_size
,
default_pack
?
OC
:
oc_tile_size
);
StrategyParam
strategyparam
;
strategyparam
.
enable_filter_preprocess
=
is_enable_filter_preprocess
(
param
);
strategyparam
.
packA_group_size
=
packA_group_size
(
m_matmul_algo
,
matmul_param
,
matmul_desc
,
packa_parallel_times
);
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
StrategyBase
*
im2colstrategy
=
Factory
::
get_im2col_strategy
(
param
,
m_matmul_algo
);
auto
kern_packA
=
[
bundle
,
matmul_algo
=
m_matmul_algo
,
matmul_param
,
im2colstrategy
,
strategyparam
=
strategyparam
,
matmul_desc
=
matmul_desc
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
packA_kern
(
bundle
,
param
,
matmul_param
,
matmul_algo
,
ncb_index
,
matmul_desc
,
strategyparam
);
};
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
return
ret_kern
;
}
MIDOUT_END
();
return
{};
}
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/im2col/algos.h
浏览文件 @
edd7e167
...
...
@@ -22,27 +22,6 @@ namespace megdnn {
namespace
fallback
{
class
ConvBiasImpl
::
AlgoIm2col
final
:
public
AlgoBase
{
//! calculate m_oc_tile_size in choice_ohw_oc_block() fucntion,
//! when m_oc_tile_size < this value m_oc_tile_size = ohw
static
constexpr
size_t
DEFAULT_OHW_MIN_TILE_SIZE
=
32
;
//! when nr_threads > 1 and round(ohw,nr_threads)>nr_threads,
//! m_oc_tile_size = DEFAULT_OC_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_TILE_SIZE
=
512
;
//! when m_oc_tile_size > this value m_oc_tile_size =
//! DEFAULT_OC_MAX_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_MAX_TILE_SIZE
=
1024
;
//! when m_oc_tile_size < this value m_oc_tile_size =
//! DEFAULT_OC_MIN_TILE_SIZE the purpose is aligning the calculation
static
constexpr
size_t
DEFAULT_OC_MIN_TILE_SIZE
=
128
;
fallback
::
MatrixMulImpl
::
KernSizeParam
get_matmul_kern_param
(
const
NCBKernSizeParam
&
param
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
const
;
WorkspaceBundle
get_bundle
(
const
NCBKernSizeParam
&
param
)
const
;
void
choice_ohw_oc_block
(
const
NCBKernSizeParam
&
param
,
size_t
&
oc_tile_size
,
size_t
&
ohw_tile_size
,
size_t
block_m
,
size_t
block_n
,
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
)
const
;
public:
AlgoIm2col
(
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
)
:
m_matmul_algo
(
matmul_algo
),
...
...
@@ -59,10 +38,16 @@ public:
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_preprocess_workspace
(
const
NCBKernSizeParam
&
/*param*/
)
const
override
{
return
0
;
}
SmallVector
<
NCBKern
>
dispatch_preprocess_kerns
(
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
if
(
param
.
src_type
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
static
CpuOprDelegationStorage
<
1
>
storage
;
auto
conv_bias_opr
=
storage
.
get
<
ConvBias
,
0
>
();
...
...
dnn/src/fallback/conv_bias/im2col/strategy_base.h
浏览文件 @
edd7e167
...
...
@@ -40,9 +40,11 @@ struct StrategyParam {
size_t
block_n
;
size_t
block_k
;
size_t
pack_oc_size
;
size_t
packA_group_size
;
bool
skip_copy_dst
;
bool
is_dst_8bit
;
bool
is_ohw_size_bigger
;
bool
enable_filter_preprocess
;
};
class
StrategyBase
{
...
...
@@ -62,7 +64,7 @@ public:
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desec
,
size_t
pack_size
)
=
0
;
const
StrategyParam
&
sparam
)
=
0
;
virtual
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
...
...
@@ -296,7 +298,7 @@ public:
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
virtual
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
,
...
...
@@ -375,7 +377,7 @@ public:
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
MDsec
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
const
WorkspaceBundle
&
bundle
,
...
...
@@ -431,7 +433,7 @@ public:
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
MDsec
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
...
...
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
浏览文件 @
edd7e167
...
...
@@ -25,19 +25,23 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
)
{
const
StrategyParam
&
sparam
)
{
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
size_t
packed_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_desc
.
innerblocksize
.
k
)
*
matmul_desc
.
innerblocksize
.
m
*
matmul_desc
.
packa_type_size
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packed_per_oc_block_size
;
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
int8_t
*
a_panel
=
tmp_ptr
+
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
));
matmul_algo
->
pack_A
(
matmul_param
,
a_panel
,
ncb_index
.
ndrange_id
[
1
],
...
...
@@ -149,15 +153,20 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
size_t
packA_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_desc
.
innerblocksize
.
k
)
*
sparam
.
oc_tile_size
*
matmul_desc
.
packa_type_size
;
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
)
;
size_t
packA_group_size
=
sparam
.
packA_group_size
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packA_group_size
+
ncb_index
.
ndrange_id
[
3
]
*
packA_per_oc_block_size
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
tmp_ptr
+
a_panel_offset
);
src_ctype
*
b_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_thread
.
get
(
THREAD_BUNDLE_PACKB_INDEX
)));
...
...
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
浏览文件 @
edd7e167
...
...
@@ -26,7 +26,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_dsec*/
,
size_t
)
{
const
StrategyParam
&
)
{
MEGDNN_MARK_USED_VAR
(
bundle
);
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
matmulparam
);
...
...
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
浏览文件 @
edd7e167
...
...
@@ -26,7 +26,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
,
size_t
)
{
const
StrategyParam
&
sparam
)
{
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
...
...
@@ -36,12 +36,17 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
size_t
output_block_oc_size
=
std
::
min
(
oc_tile_size
,
OC
-
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
);
size_t
oc_cur_index
=
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
;
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
int8_t
*
a_panel
=
tmp_ptr
+
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
))
+
oc_cur_index
*
matmul_param
.
K
;
...
...
@@ -60,20 +65,22 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
)
{
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
)
{
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
3
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
a_panel_offset
=
sparam
.
group_id
*
packA_group_size
+
a_panel_offset
;
a_panel_offset
=
sparam
.
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
tmp_ptr
+
a_panel_offset
);
src_ctype
*
b_panel
=
nullptr
;
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
edd7e167
...
...
@@ -154,7 +154,8 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout,
bias
{
nullptr
,
bias_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
bias
,
dst
,
workspace
,
preprocessed_filter
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
//! should not pass workspace_size limit otherwise can not find match algo
ConvBiasImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
edd7e167
...
...
@@ -299,6 +299,11 @@ private:
const
PreprocessedFilter
*
preprocessed_filter
);
};
inline
bool
is_enable_filter_preprocess
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
{
return
param
.
preprocessed_filter
&&
param
.
preprocessed_filter
->
tensors
.
size
()
>=
1
;
}
}
// namespace fallback
}
// namespace megdnn
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
edd7e167
...
...
@@ -109,7 +109,9 @@ void ConvolutionImpl::exec_preprocess(const TensorLayout& src_layout,
TensorND
src
{
nullptr
,
src_layout
},
dst
{
nullptr
,
dst_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
dst
,
preprocessed_filter
,
workspace
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
//! should not pass workspace_size limit otherwise can not find match algo
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
...
...
dnn/test/arm_common/conv_bias_multi_thread.cpp
浏览文件 @
edd7e167
...
...
@@ -1837,6 +1837,21 @@ void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COL_FP32_STRIDE2_PREPROCESS
)
{
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({1, 2, 3, 4, 5, 6, 7}, 2, false, false, false), \
handle(), nullptr, 0.001, dtype::Float32(), dtype::Float32(), \
dtype::Float32(), dtype::Float32(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F32K8X12X1"
)
cb
(
"IM2COLMATMUL:AARCH64_F32K4X16X1"
)
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_F32"
)
#endif
#undef cb
}
// clang-format off
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COL_FP32_STRIDE2
)
{
#define cb(name) \
...
...
@@ -1851,6 +1866,22 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_IM2COL_FP32_STRIDE2) {
cb
(
"IM2COLMATMUL:ARMV7_F32"
)
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COL_FP32_STRIDE1_PREPROCESS
)
{
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, false, false), \
handle(), nullptr, 0.001, dtype::Float32(), dtype::Float32(), \
dtype::Float32(), dtype::Float32(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F32K8X12X1"
)
cb
(
"IM2COLMATMUL:AARCH64_F32K4X16X1"
)
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_F32"
)
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COL_FP32_STRIDE1
)
{
...
...
@@ -1899,6 +1930,37 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess(get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, false, \
false, true, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name); \
check_conv_bias_preprocess( \
get_conv_bias_args({1}, 2, false, false, false, true, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K8X12X4_DOTPROD"
);
#else
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K8X8X8"
);
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K4X4X16"
);
#endif
#elif MEGDNN_ARMV7
epsilon
=
1
;
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_K4X8X8"
);
#endif
#undef cb
}
#if __ARM_FEATURE_DOTPROD
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT
)
{
...
...
@@ -1924,6 +1986,29 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT) {
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess(get_nchw44_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, \
false, false, false, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name); \
checker_conv_bias( \
get_nchw44_conv_bias_args({1}, 2, false, true, true, false, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT_S2_FUSE
)
{
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -1968,6 +2053,31 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_S8x8x32_MK4_DOT) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_S8x8x32_MK4_DOT_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, false, \
true, false, true, false, false, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), {}, name); \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({1}, 2, false, true, true, false, true, \
false, false, true), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), {}, name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32_MK4_DOT
)
{
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -1992,6 +2102,30 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32_MK4_DOT) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32_MK4_DOT_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, false, \
true, false, true, false, false, true), \
handle(), &rng, epsilon, dtype::Int8(), dtype::Int8(), \
dtype::Int32(), {}, name); \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({1}, 2, false, true, true, false, true, \
false, false, true), \
handle(), &rng, epsilon, dtype::Int8(), dtype::Int8(), \
dtype::Int32(), {}, name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_CONV1x1_QUANTIZEDSYM_MK4_DOT
)
{
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -2055,6 +2189,41 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUANTIZEDASYM) {
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDASYM_FILTERPREPROCESS
)
{
NormalRNG
rng
(
128.
f
);
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, false, false, \
true, true), \
handle(), &rng, epsilon, \
dtype::Quantized8Asymm(1.2f, (uint8_t)125), \
dtype::Quantized8Asymm(1.3f, (uint8_t)129), \
dtype::QuantizedS32(1.2 * 1.3), \
dtype::Quantized8Asymm(50.3f, (uint8_t)120), name); \
check_conv_bias_preprocess( \
get_conv_bias_args({1}, 2, false, false, false, true, true), \
handle(), &rng, epsilon, \
dtype::Quantized8Asymm(1.2f, (uint8_t)125), \
dtype::Quantized8Asymm(1.3f, (uint8_t)129), \
dtype::QuantizedS32(1.2 * 1.3), \
dtype::Quantized8Asymm(50.3f, (uint8_t)120), name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH64_QUINT8_K8X8X4_DOTPROD"
);
#else
cb
(
"IM2COLMATMUL:AARCH64_QUINT8_K8X8X8"
);
#endif
#elif MEGDNN_ARMV7
epsilon
=
1
;
cb
(
"IM2COLMATMUL:ARMV7_QUINT8_K4X8X8"
);
#endif
#undef cb
}
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
...
...
@@ -2088,6 +2257,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUINT8x8x32) {
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUINT8x8x32_FILTERPREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
float
epsilon
=
0.001
;
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, true, true), \
handle(), &rng, epsilon, \
dtype::Quantized8Asymm(1.2f, (uint8_t)125), \
dtype::Quantized8Asymm(1.3f, (uint8_t)129), \
dtype::QuantizedS32(1.2 * 1.3), {}, name); \
check_conv_bias_preprocess(get_conv_bias_args({1}, 2, false, true, true), \
handle(), &rng, epsilon, \
dtype::Quantized8Asymm(1.2f, (uint8_t)125), \
dtype::Quantized8Asymm(1.3f, (uint8_t)129), \
dtype::QuantizedS32(1.2 * 1.3), {}, name);
#if MEGDNN_AARCH64
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH64_QUINT8_K8X8X4_DOTPROD"
);
#else
cb
(
"IM2COLMATMUL:AARCH64_QUINT8_K8X8X8"
);
#endif
#elif MEGDNN_ARMV7
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH32_QUINT8_K4X8X4"
);
#endif
cb
(
"IM2COLMATMUL:ARMV7_QUINT8_K4X8X8"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COLMATMUL_INT8x8x16
)
{
UniformIntRNG
rng
{
-
50
,
50
};
float
epsilon
=
0.001
;
...
...
@@ -2127,6 +2329,51 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_IM2COLMATMUL_INT8x8x16) {
#undef cb
#undef cb_nchw44
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COLMATMUL_INT8x8x16_FILTERPREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
float
epsilon
=
0.001
;
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, true, true), \
handle(), &rng, epsilon, dtype::Int8{}, dtype::Int8{}, \
dtype::Int16{}, dtype::Int16{}, name); \
check_conv_bias_preprocess(get_conv_bias_args({1}, 2, false, true, true), \
handle(), &rng, epsilon, dtype::Int8{}, \
dtype::Int8{}, dtype::Int16{}, dtype::Int16{}, \
name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X16_K8X8X8"
);
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X16_K4X4X16"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X16_K4X8X8"
);
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X16_K4X2X16"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONVBIAS_IM2COLMATMUL_INT8x8x16_NOPACK_FILTERPREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
float
epsilon
=
0.001
;
#define cb(name) \
check_conv_bias_preprocess( \
get_conv_bias_args({2, 3, 4, 5, 6, 7}, 1, false, true, true), \
handle(), &rng, epsilon, dtype::Int8{}, dtype::Int8{}, \
dtype::Int16{}, dtype::Int16{}, name); \
check_conv_bias_preprocess(get_conv_bias_args({1}, 2, false, true, true), \
handle(), &rng, epsilon, dtype::Int8{}, \
dtype::Int8{}, dtype::Int16{}, dtype::Int16{}, \
name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:ARM_COMMON_INT8X8X16"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARM_COMMON_INT8X8X16"
);
#endif
#undef cb
}
#endif
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
...
...
@@ -2147,6 +2394,31 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP16) {
dtype::Float16{}, dtype::Float16{}, dtype::Float16{}, \
name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F16_K8X24X1"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:AARCH32_F16_K4X16X1"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_FP16_FILTERPREPROCESS
)
{
using
namespace
conv_bias
;
param
::
ConvBias
cur_param
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_conv_bias_args
({
2
,
3
,
4
,
5
,
6
,
7
},
1
,
false
,
false
,
false
);
std
::
vector
<
conv_bias
::
TestArg
>
args1
=
get_conv_bias_args
({
1
},
2
,
false
,
false
,
false
);
args
.
insert
(
args
.
begin
(),
args1
.
begin
(),
args1
.
end
());
NormalRNG
rng
(
1
);
#define cb(name) \
check_conv_bias_preprocess(args, handle(), &rng, 0.03, dtype::Float16{}, \
dtype::Float16{}, dtype::Float16{}, \
dtype::Float16{}, name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F16_K8X24X1"
);
#elif MEGDNN_ARMV7
...
...
@@ -2185,6 +2457,36 @@ void checker_conv_bias_mul_int8x8x32(std::vector<conv_bias::TestArg> args,
}
}
void
checker_conv_bias_int8x8x32_preprocess
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
const
char
*
algo_name
)
{
using
namespace
conv_bias
;
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
);
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
algo_name
));
checker
.
set_dtype
(
0
,
dtype
::
Int8
());
checker
.
set_dtype
(
1
,
dtype
::
Int8
());
checker
.
set_dtype
(
2
,
dtype
::
Int32
());
checker
.
set_dtype
(
4
,
dtype
::
Int32
());
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
({
arg
.
src
,
arg
.
filter
,
{},
{},
{}});
}
UniformIntRNG
rng
{
-
50
,
50
};
for
(
auto
&&
arg
:
args
)
{
checker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
(
2.5
f
))
.
set_dtype
(
1
,
dtype
::
QuantizedS8
(
2.5
f
))
.
set_dtype
(
2
,
dtype
::
QuantizedS32
(
6.25
f
))
.
set_dtype
(
4
,
{})
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_rng
(
2
,
&
rng
)
.
set_param
(
arg
.
param
)
.
execs
({
arg
.
src
,
arg
.
filter
,
{},
{},
{}});
}
}
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
#if !__ARM_FEATURE_DOTPROD
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S2
)
{
...
...
@@ -2201,6 +2503,20 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S2) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S2_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
2
,
5
,
7
},
2
,
false
,
true
,
true
);
#define cb(name) checker_conv_bias_int8x8x32_preprocess(args, handle(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#else
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_MK4_4X2X16:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S1
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
...
...
@@ -2216,6 +2532,21 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S1) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8x8x32NCHW44_S1_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
({
3
,
4
,
6
},
1
,
false
,
true
,
true
);
#define cb(name) checker_conv_bias_int8x8x32_preprocess(args, handle(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#else
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_MK4_4X2X16:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_S2
)
{
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -2234,6 +2565,25 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_S2_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({3, 4, 6}, 2), handle(), &rng, epsilon, \
dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), \
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#else
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_MK4_4X2X16:96"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_S1
)
{
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -2252,6 +2602,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_S1_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({2, 5, 7}, 1), handle(), &rng, epsilon, \
dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), \
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#else
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_MK4_4X2X16:96"
);
#endif
#undef cb
}
#if MEGDNN_AARCH64
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_FUSE
)
{
...
...
@@ -2266,6 +2634,21 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44_FUSE_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({3}, 1), handle(), &rng, epsilon, \
dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), \
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_4X4X16:96"
);
#undef cb
}
#endif
#endif
#endif
...
...
@@ -2287,6 +2670,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44DOT_FUSE_PREPROCESS
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
check_conv_bias_preprocess( \
get_nchw44_conv_bias_args({3}, 1, false, false, false, false, \
true, false, false, false), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#undef cb
}
#endif
#endif
...
...
@@ -2320,6 +2720,36 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) {
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X32_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_conv_bias_args
({
2
,
3
,
4
,
5
,
6
,
7
},
1
,
false
,
true
,
true
);
std
::
vector
<
conv_bias
::
TestArg
>
args1
=
get_conv_bias_args
({
1
},
2
,
false
,
true
,
true
);
args
.
insert
(
args
.
begin
(),
args1
.
begin
(),
args1
.
end
());
#define cb(name) checker_conv_bias_int8x8x32_preprocess(args, handle(), name);
#if MEGDNN_AARCH64
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K8X12X4_DOTPROD"
);
#else
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K8X8X8"
);
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_K4X4X16"
);
#endif
#elif MEGDNN_ARMV7
#if __ARM_FEATURE_DOTPROD
cb
(
"IM2COLMATMUL:AARCH32_INT8_K6X8X4"
);
#endif
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_K4X8X8"
);
#endif
#if MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_INT8X8X32_K4X2X16"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S1_MK4_PACK_F32
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
(
...
...
@@ -2331,25 +2761,62 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COL_S1_MK4_PACK_F32) {
#endif
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S1_MK4_PACK_F32_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
(
{
2
,
4
,
7
},
1
,
false
,
false
,
false
,
false
,
false
,
true
,
true
);
#define cb(name) \
check_conv_bias_preprocess(args, handle(), nullptr, 0.001, \
dtype::Float32(), dtype::Float32(), \
dtype::Float32(), dtype::Float32(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S2_MK4_PACK_F32
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
(
{
3
,
5
,
6
},
2
,
false
,
false
,
false
,
false
,
false
,
true
,
true
);
#define cb(name) check_conv_bias(args, handle(), name);
#if MEGDNN_AARCH64
c
heck_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
c
b
(
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
#elif MEGDNN_ARMV7
c
heck_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
c
b
(
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S2_MK4_PACK_F32_FUSE_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
(
{
3
},
2
,
false
,
false
,
false
,
false
,
false
,
true
,
true
,
false
);
#define cb(name) \
check_conv_bias_preprocess(args, handle(), nullptr, 0.001, \
dtype::Float32(), dtype::Float32(), \
dtype::Float32(), dtype::Float32(), name);
#if MEGDNN_AARCH64
cb
(
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
#elif MEGDNN_ARMV7
cb
(
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
#endif
#undef cb
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COL_S2_MK4_PACK_F32_FUSE
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_nchw44_conv_bias_args
(
{
3
},
2
,
false
,
false
,
false
,
false
,
false
,
true
,
true
,
false
);
#define cb(name) check_conv_bias(args, handle(), name);
#if MEGDNN_AARCH64
c
heck_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
c
b
(
"IM2COLMATMUL:AARCH64_F32_MK4_K8X12X1"
);
#elif MEGDNN_ARMV7
c
heck_conv_bias
(
args
,
handle
(),
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
c
b
(
"IM2COLMATMUL:ARMV7_F32_MK4_PACK_4X12"
);
#endif
#undef cb
}
/***************************** Conv1x1 Algo Test ***********************/
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_1X1_S1_F32
)
{
...
...
dnn/test/common/conv_bias.cpp
浏览文件 @
edd7e167
...
...
@@ -1118,6 +1118,30 @@ void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
}
}
void
check_conv_bias_preprocess
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
RNG
*
rng
,
float
epsilon
,
DType
type0
,
DType
type1
,
DType
type2
,
DType
type3
,
const
char
*
algo_name
)
{
using
namespace
conv_bias
;
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
);
checker
.
set_dtype
(
0
,
type0
);
checker
.
set_dtype
(
1
,
type1
);
checker
.
set_dtype
(
2
,
type2
);
checker
.
set_dtype
(
4
,
type3
);
checker
.
set_epsilon
(
epsilon
);
if
(
NULL
!=
rng
)
{
checker
.
set_rng
(
0
,
rng
).
set_rng
(
1
,
rng
).
set_rng
(
2
,
rng
).
set_rng
(
3
,
rng
);
}
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
algo_name
));
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
(
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
void
winograd_algo_extra_impl
(
const
TensorNDArray
&
tensors
,
uint32_t
m
,
param
::
ConvBias
param
,
Handle
*
handle
,
...
...
dnn/test/common/conv_bias.h
浏览文件 @
edd7e167
...
...
@@ -58,7 +58,10 @@ std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size);
std
::
vector
<
TestArg
>
get_int8_nchw44_args
(
size_t
kernel_size
,
size_t
pack_size
,
bool
compute_float32
=
false
,
bool
group_mode
=
false
);
void
check_conv_bias_preprocess
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
RNG
*
rng
,
float
epsilon
,
DType
type0
,
DType
type1
,
DType
type2
,
DType
type3
,
const
char
*
algo_name
);
template
<
typename
Opr
>
using
ConvBiasAlgoChecker
=
AlgoChecker
<
Opr
>
;
...
...
dnn/test/x86/conv_bias.cpp
浏览文件 @
edd7e167
...
...
@@ -752,7 +752,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_DIRECT_STRIDE2) {
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X
)
{
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X
32
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
@@ -842,6 +842,98 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8X8X) {
#undef cb2
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X32_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
20
,
21
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
//! test OC block
run
(
2046
,
1
,
8
,
8
,
2
,
0
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
checker.set_dtype(0, dtype::Int8()); \
checker.set_dtype(1, dtype::Int8()); \
checker.set_dtype(2, dtype::Int32()); \
checker.set_dtype(4, dtype::Int32()); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); \
} \
for (auto&& arg : args) { \
checker.set_dtype(0, dtype::QuantizedS8(2.5f)) \
.set_dtype(1, dtype::QuantizedS8(2.5f)) \
.set_dtype(2, dtype::QuantizedS32(6.25f)) \
.set_dtype(4, {}) \
.set_rng(0, &rng) \
.set_rng(1, &rng) \
.set_rng(2, &rng) \
.set_param(arg.param) \
.execs({arg.src, arg.filter, {}, {}, {}}); \
}
#define cb2(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
checker.set_dtype(0, dtype::Int8()); \
checker.set_dtype(1, dtype::Int8()); \
checker.set_dtype(2, dtype::Int16()); \
checker.set_dtype(4, dtype::Int16()); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); \
}
#if MEGDNN_X86_WITH_MKL_DNN
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_MKLDNN"
);
}
#endif
#if MEGDNN_X86_WITH_VNNI
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_VNNI"
);
}
#endif
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
AVX2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_2X4X16"
);
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_4X16X2"
);
cb2
(
"IM2COLMATMUL:X86_INT8X8X16_AVX2"
);
}
if
(
::
megdnn
::
x86
::
is_supported
(
::
megdnn
::
x86
::
SIMDType
::
SSE4_2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_SSE_4X8X2"
);
cb2
(
"IM2COLMATMUL:X86_INT8X8X16_SSE"
);
}
#undef cb
#undef cb2
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_FP32
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
@@ -950,6 +1042,61 @@ TEST_F(X86, CONV_BIAS_IM2COLMATMUL_FP32) {
#undef cb
}
TEST_F
(
X86
,
CONV_BIAS_IM2COLMATMUL_FP32_NOPACK_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
(
h
+
2
*
p
-
kernel
)
/
param
.
stride_h
+
1
,
(
w
+
2
*
p
-
kernel
)
/
param
.
stride_w
+
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
,
300
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
8
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs( \
{arg.src, arg.filter, arg.bias, {}, {}}); \
}
cb
(
"IM2COLMATMUL:X86_F32_BLAS"
);
#undef cb
}
#endif
...
...
@@ -1020,6 +1167,73 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) {
#undef cb
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_FP32_PACKA_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
(
h
+
2
*
p
-
kernel
)
/
param
.
stride_h
+
1
,
(
w
+
2
*
p
-
kernel
)
/
param
.
stride_w
+
1
});
param
.
sparse
=
param
::
ConvBias
::
Sparse
::
GROUP
;
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
*
2
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
2
*
oc
,
(
h
+
2
*
param
.
pad_h
-
kernel
)
/
1
+
1
,
(
w
+
2
*
param
.
pad_w
-
kernel
)
/
1
+
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
})
for
(
size_t
p
:
{
0
,
1
})
for
(
size_t
size
:
{
8
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs( \
{arg.src, arg.filter, arg.bias, {}, {}}); \
}
cb
(
"IM2COLMATMUL:X86_F32_MKL_PACKA:192"
);
#undef cb
}
/**************************** Conv1x1 PackA *************************/
namespace
{
void
checker_conv_bias
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
...
...
@@ -1169,6 +1383,77 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) {
#undef cb
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QINT8_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
//! bias channel
args
.
emplace_back
(
param
,
TensorShape
{
2
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
20
,
21
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
,
NonlineMode
::
H_SWISH
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
UniformIntRNG rng{-50, 50}; \
for (auto&& arg : args) { \
checker.set_dtype(0, dtype::QuantizedS8(2.5f)) \
.set_dtype(1, dtype::QuantizedS8(2.5f)) \
.set_dtype(2, dtype::QuantizedS32(6.25f)) \
.set_dtype(4, dtype::QuantizedS8(60.25)) \
.set_rng(0, &rng) \
.set_rng(1, &rng) \
.set_rng(2, &rng) \
.set_param(arg.param) \
.execs({arg.src, arg.filter, {}, {}, {}}); \
}
#if MEGDNN_X86_WITH_MKL_DNN
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_MKLDNN"
);
}
#endif
#if MEGDNN_X86_WITH_VNNI
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_VNNI"
);
}
#endif
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
AVX2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_2X4X16"
);
}
#undef cb
}
TEST_F
(
X86
,
CONV_BIAS_MATMUL
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录