Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
edd7e167
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
edd7e167
编写于
7月 02, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
8月 04, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): add im2col filterpreprocess function
GitOrigin-RevId: 61c54ad258a42301711d3efdae0caef47d7b0584
上级
9e9e8ca0
变更
13
展开全部
显示空白变更内容
内联
并排
Showing
13 changed file
with
1207 addition
and
252 deletion
+1207
-252
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+358
-193
dnn/src/fallback/conv_bias/im2col/algos.h
dnn/src/fallback/conv_bias/im2col/algos.h
+9
-24
dnn/src/fallback/conv_bias/im2col/strategy_base.h
dnn/src/fallback/conv_bias/im2col/strategy_base.h
+6
-4
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
+17
-8
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
+1
-1
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
+21
-14
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+2
-1
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+5
-0
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+3
-1
dnn/test/arm_common/conv_bias_multi_thread.cpp
dnn/test/arm_common/conv_bias_multi_thread.cpp
+471
-4
dnn/test/common/conv_bias.cpp
dnn/test/common/conv_bias.cpp
+24
-0
dnn/test/common/conv_bias.h
dnn/test/common/conv_bias.h
+4
-1
dnn/test/x86/conv_bias.cpp
dnn/test/x86/conv_bias.cpp
+286
-1
未找到文件。
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
edd7e167
此差异已折叠。
点击以展开。
dnn/src/fallback/conv_bias/im2col/algos.h
浏览文件 @
edd7e167
...
...
@@ -22,27 +22,6 @@ namespace megdnn {
namespace
fallback
{
class
ConvBiasImpl
::
AlgoIm2col
final
:
public
AlgoBase
{
//! calculate m_oc_tile_size in choice_ohw_oc_block() fucntion,
//! when m_oc_tile_size < this value m_oc_tile_size = ohw
static
constexpr
size_t
DEFAULT_OHW_MIN_TILE_SIZE
=
32
;
//! when nr_threads > 1 and round(ohw,nr_threads)>nr_threads,
//! m_oc_tile_size = DEFAULT_OC_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_TILE_SIZE
=
512
;
//! when m_oc_tile_size > this value m_oc_tile_size =
//! DEFAULT_OC_MAX_TILE_SIZE
static
constexpr
size_t
DEFAULT_OC_MAX_TILE_SIZE
=
1024
;
//! when m_oc_tile_size < this value m_oc_tile_size =
//! DEFAULT_OC_MIN_TILE_SIZE the purpose is aligning the calculation
static
constexpr
size_t
DEFAULT_OC_MIN_TILE_SIZE
=
128
;
fallback
::
MatrixMulImpl
::
KernSizeParam
get_matmul_kern_param
(
const
NCBKernSizeParam
&
param
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
const
;
WorkspaceBundle
get_bundle
(
const
NCBKernSizeParam
&
param
)
const
;
void
choice_ohw_oc_block
(
const
NCBKernSizeParam
&
param
,
size_t
&
oc_tile_size
,
size_t
&
ohw_tile_size
,
size_t
block_m
,
size_t
block_n
,
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
)
const
;
public:
AlgoIm2col
(
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
)
:
m_matmul_algo
(
matmul_algo
),
...
...
@@ -59,10 +38,16 @@ public:
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_preprocess_workspace
(
const
NCBKernSizeParam
&
/*param*/
)
const
override
{
return
0
;
}
SmallVector
<
NCBKern
>
dispatch_preprocess_kerns
(
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
if
(
param
.
src_type
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
static
CpuOprDelegationStorage
<
1
>
storage
;
auto
conv_bias_opr
=
storage
.
get
<
ConvBias
,
0
>
();
...
...
dnn/src/fallback/conv_bias/im2col/strategy_base.h
浏览文件 @
edd7e167
...
...
@@ -40,9 +40,11 @@ struct StrategyParam {
size_t
block_n
;
size_t
block_k
;
size_t
pack_oc_size
;
size_t
packA_group_size
;
bool
skip_copy_dst
;
bool
is_dst_8bit
;
bool
is_ohw_size_bigger
;
bool
enable_filter_preprocess
;
};
class
StrategyBase
{
...
...
@@ -62,7 +64,7 @@ public:
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desec
,
size_t
pack_size
)
=
0
;
const
StrategyParam
&
sparam
)
=
0
;
virtual
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
...
...
@@ -296,7 +298,7 @@ public:
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
virtual
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
,
...
...
@@ -375,7 +377,7 @@ public:
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
MDsec
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
const
WorkspaceBundle
&
bundle
,
...
...
@@ -431,7 +433,7 @@ public:
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
MDsec
,
size_t
pack_size
)
override
;
const
StrategyParam
&
sparam
)
override
;
void
exec_im2col
(
const
WorkspaceBundle
&
bundle
,
const
WorkspaceBundle
&
bundle_thread
,
...
...
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
浏览文件 @
edd7e167
...
...
@@ -25,19 +25,23 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
size_t
)
{
const
StrategyParam
&
sparam
)
{
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
size_t
packed_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_desc
.
innerblocksize
.
k
)
*
matmul_desc
.
innerblocksize
.
m
*
matmul_desc
.
packa_type_size
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packed_per_oc_block_size
;
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
int8_t
*
a_panel
=
tmp_ptr
+
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
));
matmul_algo
->
pack_A
(
matmul_param
,
a_panel
,
ncb_index
.
ndrange_id
[
1
],
...
...
@@ -149,15 +153,20 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
size_t
packA_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_desc
.
innerblocksize
.
k
)
*
sparam
.
oc_tile_size
*
matmul_desc
.
packa_type_size
;
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
)
;
size_t
packA_group_size
=
sparam
.
packA_group_size
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packA_group_size
+
ncb_index
.
ndrange_id
[
3
]
*
packA_per_oc_block_size
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
tmp_ptr
+
a_panel_offset
);
src_ctype
*
b_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_thread
.
get
(
THREAD_BUNDLE_PACKB_INDEX
)));
...
...
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
浏览文件 @
edd7e167
...
...
@@ -26,7 +26,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_dsec*/
,
size_t
)
{
const
StrategyParam
&
)
{
MEGDNN_MARK_USED_VAR
(
bundle
);
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
matmulparam
);
...
...
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
浏览文件 @
edd7e167
...
...
@@ -26,7 +26,7 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
,
size_t
)
{
const
StrategyParam
&
sparam
)
{
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
...
...
@@ -36,12 +36,17 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
size_t
output_block_oc_size
=
std
::
min
(
oc_tile_size
,
OC
-
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
);
size_t
oc_cur_index
=
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
;
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
int8_t
*
a_panel
=
tmp_ptr
+
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
))
+
oc_cur_index
*
matmul_param
.
K
;
...
...
@@ -60,20 +65,22 @@ void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
)
{
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
/*matmul_desc*/
)
{
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
3
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
a_panel_offset
=
sparam
.
group_id
*
packA_group_size
+
a_panel_offset
;
a_panel_offset
=
sparam
.
group_id
*
sparam
.
packA_group_size
+
a_panel_offset
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
int8_t
*
tmp_ptr
=
sparam
.
enable_filter_preprocess
?
static_cast
<
int8_t
*>
(
param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
)
:
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
));
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
tmp_ptr
+
a_panel_offset
);
src_ctype
*
b_panel
=
nullptr
;
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
edd7e167
...
...
@@ -154,7 +154,8 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout,
bias
{
nullptr
,
bias_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
bias
,
dst
,
workspace
,
preprocessed_filter
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
//! should not pass workspace_size limit otherwise can not find match algo
ConvBiasImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
edd7e167
...
...
@@ -299,6 +299,11 @@ private:
const
PreprocessedFilter
*
preprocessed_filter
);
};
inline
bool
is_enable_filter_preprocess
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
{
return
param
.
preprocessed_filter
&&
param
.
preprocessed_filter
->
tensors
.
size
()
>=
1
;
}
}
// namespace fallback
}
// namespace megdnn
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
edd7e167
...
...
@@ -109,7 +109,9 @@ void ConvolutionImpl::exec_preprocess(const TensorLayout& src_layout,
TensorND
src
{
nullptr
,
src_layout
},
dst
{
nullptr
,
dst_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
dst
,
preprocessed_filter
,
workspace
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
//! should not pass workspace_size limit otherwise can not find match algo
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
...
...
dnn/test/arm_common/conv_bias_multi_thread.cpp
浏览文件 @
edd7e167
此差异已折叠。
点击以展开。
dnn/test/common/conv_bias.cpp
浏览文件 @
edd7e167
...
...
@@ -1118,6 +1118,30 @@ void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
}
}
void
check_conv_bias_preprocess
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
RNG
*
rng
,
float
epsilon
,
DType
type0
,
DType
type1
,
DType
type2
,
DType
type3
,
const
char
*
algo_name
)
{
using
namespace
conv_bias
;
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
);
checker
.
set_dtype
(
0
,
type0
);
checker
.
set_dtype
(
1
,
type1
);
checker
.
set_dtype
(
2
,
type2
);
checker
.
set_dtype
(
4
,
type3
);
checker
.
set_epsilon
(
epsilon
);
if
(
NULL
!=
rng
)
{
checker
.
set_rng
(
0
,
rng
).
set_rng
(
1
,
rng
).
set_rng
(
2
,
rng
).
set_rng
(
3
,
rng
);
}
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
algo_name
));
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
(
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
void
winograd_algo_extra_impl
(
const
TensorNDArray
&
tensors
,
uint32_t
m
,
param
::
ConvBias
param
,
Handle
*
handle
,
...
...
dnn/test/common/conv_bias.h
浏览文件 @
edd7e167
...
...
@@ -58,7 +58,10 @@ std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size);
std
::
vector
<
TestArg
>
get_int8_nchw44_args
(
size_t
kernel_size
,
size_t
pack_size
,
bool
compute_float32
=
false
,
bool
group_mode
=
false
);
void
check_conv_bias_preprocess
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
RNG
*
rng
,
float
epsilon
,
DType
type0
,
DType
type1
,
DType
type2
,
DType
type3
,
const
char
*
algo_name
);
template
<
typename
Opr
>
using
ConvBiasAlgoChecker
=
AlgoChecker
<
Opr
>
;
...
...
dnn/test/x86/conv_bias.cpp
浏览文件 @
edd7e167
...
...
@@ -752,7 +752,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_DIRECT_STRIDE2) {
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X
)
{
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X
32
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
@@ -842,6 +842,98 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8X8X) {
#undef cb2
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_INT8X8X32_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
20
,
21
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
//! test OC block
run
(
2046
,
1
,
8
,
8
,
2
,
0
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
checker.set_dtype(0, dtype::Int8()); \
checker.set_dtype(1, dtype::Int8()); \
checker.set_dtype(2, dtype::Int32()); \
checker.set_dtype(4, dtype::Int32()); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); \
} \
for (auto&& arg : args) { \
checker.set_dtype(0, dtype::QuantizedS8(2.5f)) \
.set_dtype(1, dtype::QuantizedS8(2.5f)) \
.set_dtype(2, dtype::QuantizedS32(6.25f)) \
.set_dtype(4, {}) \
.set_rng(0, &rng) \
.set_rng(1, &rng) \
.set_rng(2, &rng) \
.set_param(arg.param) \
.execs({arg.src, arg.filter, {}, {}, {}}); \
}
#define cb2(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
checker.set_dtype(0, dtype::Int8()); \
checker.set_dtype(1, dtype::Int8()); \
checker.set_dtype(2, dtype::Int16()); \
checker.set_dtype(4, dtype::Int16()); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); \
}
#if MEGDNN_X86_WITH_MKL_DNN
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_MKLDNN"
);
}
#endif
#if MEGDNN_X86_WITH_VNNI
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_VNNI"
);
}
#endif
if
(
megdnn
::
x86
::
is_supported
(
x86
::
SIMDType
::
AVX2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_2X4X16"
);
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_4X16X2"
);
cb2
(
"IM2COLMATMUL:X86_INT8X8X16_AVX2"
);
}
if
(
::
megdnn
::
x86
::
is_supported
(
::
megdnn
::
x86
::
SIMDType
::
SSE4_2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_SSE_4X8X2"
);
cb2
(
"IM2COLMATMUL:X86_INT8X8X16_SSE"
);
}
#undef cb
#undef cb2
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_FP32
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
@@ -950,6 +1042,61 @@ TEST_F(X86, CONV_BIAS_IM2COLMATMUL_FP32) {
#undef cb
}
TEST_F
(
X86
,
CONV_BIAS_IM2COLMATMUL_FP32_NOPACK_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
(
h
+
2
*
p
-
kernel
)
/
param
.
stride_h
+
1
,
(
w
+
2
*
p
-
kernel
)
/
param
.
stride_w
+
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
,
300
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
8
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs( \
{arg.src, arg.filter, arg.bias, {}, {}}); \
}
cb
(
"IM2COLMATMUL:X86_F32_BLAS"
);
#undef cb
}
#endif
...
...
@@ -1020,6 +1167,73 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) {
#undef cb
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_FP32_PACKA_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
(
h
+
2
*
p
-
kernel
)
/
param
.
stride_h
+
1
,
(
w
+
2
*
p
-
kernel
)
/
param
.
stride_w
+
1
});
param
.
sparse
=
param
::
ConvBias
::
Sparse
::
GROUP
;
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
*
2
,
1
,
1
});
args
.
emplace_back
(
param
,
TensorShape
{
1
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
2
*
oc
,
(
h
+
2
*
param
.
pad_h
-
kernel
)
/
1
+
1
,
(
w
+
2
*
param
.
pad_w
-
kernel
)
/
1
+
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
})
for
(
size_t
p
:
{
0
,
1
})
for
(
size_t
size
:
{
8
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
for (auto&& arg : args) { \
checker.set_param(arg.param).execs( \
{arg.src, arg.filter, arg.bias, {}, {}}); \
}
cb
(
"IM2COLMATMUL:X86_F32_MKL_PACKA:192"
);
#undef cb
}
/**************************** Conv1x1 PackA *************************/
namespace
{
void
checker_conv_bias
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
...
...
@@ -1169,6 +1383,77 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) {
#undef cb
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QINT8_FILTER_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
auto
run
=
[
&
](
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
kernel
,
size_t
p
,
NonlineMode
nonline_mode
)
{
if
(
w
+
2
*
p
<
kernel
||
h
+
2
*
p
<
kernel
)
return
;
param
::
ConvBias
param
;
param
.
stride_h
=
1
;
param
.
stride_w
=
1
;
param
.
pad_h
=
p
;
param
.
pad_w
=
p
;
param
.
nonlineMode
=
nonline_mode
;
//! no bias
args
.
emplace_back
(
param
,
TensorShape
{
1
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
//! bias channel
args
.
emplace_back
(
param
,
TensorShape
{
2
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{
1
,
oc
,
1
,
1
});
};
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
for
(
size_t
size
:
{
20
,
21
,
24
})
for
(
NonlineMode
nonline_mode
:
{
NonlineMode
::
IDENTITY
,
NonlineMode
::
RELU
,
NonlineMode
::
H_SWISH
})
{
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
run
(
2046
,
8
,
20
,
20
,
3
,
1
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
#define cb(algo_name) \
checker.set_before_exec_callback( \
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); \
UniformIntRNG rng{-50, 50}; \
for (auto&& arg : args) { \
checker.set_dtype(0, dtype::QuantizedS8(2.5f)) \
.set_dtype(1, dtype::QuantizedS8(2.5f)) \
.set_dtype(2, dtype::QuantizedS32(6.25f)) \
.set_dtype(4, dtype::QuantizedS8(60.25)) \
.set_rng(0, &rng) \
.set_rng(1, &rng) \
.set_rng(2, &rng) \
.set_param(arg.param) \
.execs({arg.src, arg.filter, {}, {}, {}}); \
}
#if MEGDNN_X86_WITH_MKL_DNN
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_MKLDNN"
);
}
#endif
#if MEGDNN_X86_WITH_VNNI
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_VNNI"
);
}
#endif
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
AVX2
))
{
cb
(
"IM2COLMATMUL:X86_INT8X8X32_AVX2_2X4X16"
);
}
#undef cb
}
TEST_F
(
X86
,
CONV_BIAS_MATMUL
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录