Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
fff2cdc7
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
fff2cdc7
编写于
6月 24, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): add winograd weight preprocess
GitOrigin-RevId: 4741298e44a94ec439df1a4d372ac9fff2075e3f
上级
d37229fa
变更
16
展开全部
显示空白变更内容
内联
并排
Showing
16 changed file
with
896 addition
and
869 deletion
+896
-869
dnn/src/arm_common/conv_bias/f16/algos.cpp
dnn/src/arm_common/conv_bias/f16/algos.cpp
+26
-143
dnn/src/arm_common/conv_bias/f16/algos.h
dnn/src/arm_common/conv_bias/f16/algos.h
+4
-65
dnn/src/arm_common/conv_bias/fp32/algos.cpp
dnn/src/arm_common/conv_bias/fp32/algos.cpp
+41
-252
dnn/src/arm_common/conv_bias/fp32/algos.h
dnn/src/arm_common/conv_bias/fp32/algos.h
+9
-96
dnn/src/arm_common/conv_bias/int8/algos.cpp
dnn/src/arm_common/conv_bias/int8/algos.cpp
+16
-108
dnn/src/arm_common/conv_bias/int8/algos.h
dnn/src/arm_common/conv_bias/int8/algos.h
+3
-44
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
+0
-1
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
+2
-2
dnn/src/fallback/conv_bias/algos.cpp
dnn/src/fallback/conv_bias/algos.cpp
+19
-35
dnn/src/fallback/conv_bias/common.h
dnn/src/fallback/conv_bias/common.h
+24
-0
dnn/src/fallback/conv_bias/winograd/winograd.h
dnn/src/fallback/conv_bias/winograd/winograd.h
+232
-27
dnn/src/x86/conv_bias/f32/algos.h
dnn/src/x86/conv_bias/f32/algos.h
+2
-26
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
+10
-69
dnn/test/arm_common/conv_bias.cpp
dnn/test/arm_common/conv_bias.cpp
+17
-0
dnn/test/arm_common/conv_bias_multi_thread.cpp
dnn/test/arm_common/conv_bias_multi_thread.cpp
+459
-0
dnn/test/x86/conv_bias.cpp
dnn/test/x86/conv_bias.cpp
+32
-1
未找到文件。
dnn/src/arm_common/conv_bias/f16/algos.cpp
浏览文件 @
fff2cdc7
...
@@ -34,10 +34,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
...
@@ -34,10 +34,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
0
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
0
,
0
)
{
using
Strategy
=
winograd
::
winograd_2x3_4x4_f16
;
using
Strategy
=
winograd
::
winograd_2x3_4x4_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
...
@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP16WinogradF23
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP16WinogradF23
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_4x4_f16
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp16
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
0
,
1
)
{
param
::
MatrixMul
::
Format
::
DEFAULT
);
winograd
::
winograd_2x3_4x4_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP16WinogradF23
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
0
,
2
)
{
winograd
::
winograd_2x3_4x4_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP16WinogradF45 ======================== */
/* ======================= AlgoFP16WinogradF45 ======================== */
...
@@ -106,10 +76,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
...
@@ -106,10 +76,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
1
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
1
,
0
)
{
using
Strategy
=
winograd
::
winograd_4x5_1x1_f16
;
using
Strategy
=
winograd
::
winograd_4x5_1x1_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
...
@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP16WinogradF45
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP16WinogradF45
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_4x5_1x1_f16
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp16
,
winograd
::
winograd_4x5_1x1_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
::
MatrixMul
::
Format
::
DEFAULT
);
param
.
dst_type
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
1
,
1
)
{
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_4x5_1x1_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP16WinogradF45
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
1
,
2
)
{
winograd
::
winograd_4x5_1x1_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_4x5_1x1_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP16WinogradF63 ======================== */
/* ======================= AlgoFP16WinogradF63 ======================== */
bool
ConvBiasImpl
::
AlgoFP16WinogradF63
::
usable
(
bool
ConvBiasImpl
::
AlgoFP16WinogradF63
::
usable
(
...
@@ -174,10 +116,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
...
@@ -174,10 +116,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
2
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
2
,
0
)
{
using
Strategy
=
winograd
::
winograd_6x3_1x1_f16
;
using
Strategy
=
winograd
::
winograd_6x3_1x1_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
...
@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP16WinogradF63
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP16WinogradF63
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_6x3_1x1_f16
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp16
,
winograd
::
winograd_6x3_1x1_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
::
MatrixMul
::
Format
::
DEFAULT
);
param
.
dst_type
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
2
,
1
)
{
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_1x1_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP16WinogradF63
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
2
,
2
)
{
winograd
::
winograd_6x3_1x1_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_1x1_f16
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */
...
@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
...
@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
...
@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
...
@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP16WinogradF23_8x8
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP16WinogradF23_8x8
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_8x8_f16
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp16
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
3
,
1
)
{
param
::
MatrixMul
::
Format
::
MK8
);
winograd
::
winograd_2x3_8x8_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_f16
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP16WinogradF23_8x8
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
2
)
{
winograd
::
winograd_2x3_8x8_f16
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_f16
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/*========================from Convolution=============================*/
/*========================from Convolution=============================*/
...
...
dnn/src/arm_common/conv_bias/f16/algos.h
浏览文件 @
fff2cdc7
...
@@ -22,7 +22,6 @@ public:
...
@@ -22,7 +22,6 @@ public:
AlgoFP16WinogradF23
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP16WinogradF23
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -30,22 +29,7 @@ public:
...
@@ -30,22 +29,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP16WinogradF45
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP16WinogradF45
final
:
public
AlgoBase
{
...
@@ -53,7 +37,6 @@ public:
...
@@ -53,7 +37,6 @@ public:
AlgoFP16WinogradF45
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP16WinogradF45
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -61,30 +44,14 @@ public:
...
@@ -61,30 +44,14 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP16WinogradF63
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP16WinogradF63
final
:
public
AlgoBase
{
public:
public:
AlgoFP16WinogradF63
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP16WinogradF63
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -93,29 +60,13 @@ public:
...
@@ -93,29 +60,13 @@ public:
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP16WinogradF23_8x8
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP16WinogradF23_8x8
final
:
public
AlgoBase
{
public:
public:
AlgoFP16WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP16WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -123,19 +74,7 @@ public:
...
@@ -123,19 +74,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoF16Direct
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoF16Direct
final
:
public
AlgoBase
{
...
...
dnn/src/arm_common/conv_bias/fp32/algos.cpp
浏览文件 @
fff2cdc7
...
@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
...
@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
...
@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
...
@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF23_4x4
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_4x4_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
0
,
1
)
{
param
::
MatrixMul
::
Format
::
MK4
);
winograd
::
winograd_2x3_4x4_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
0
,
2
)
{
winograd
::
winograd_2x3_4x4_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP32WinogradF63 ======================== */
/* ======================= AlgoFP32WinogradF63 ======================== */
...
@@ -113,10 +83,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
...
@@ -113,10 +83,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
0
)
{
using
Strategy
=
winograd
::
winograd_6x3_1x1_f
;
using
Strategy
=
winograd
::
winograd_6x3_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
...
@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF63
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF63
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_6x3_1x1_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
1
)
{
param
::
MatrixMul
::
Format
::
DEFAULT
);
winograd
::
winograd_6x3_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF63
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
2
)
{
winograd
::
winograd_6x3_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP32WinogradF54 ======================== */
/* ======================= AlgoFP32WinogradF54 ======================== */
...
@@ -182,10 +123,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
...
@@ -182,10 +123,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
0
)
{
using
Strategy
=
winograd
::
winograd_5x4_1x1_f
;
using
Strategy
=
winograd
::
winograd_5x4_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
...
@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF54
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF54
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_5x4_1x1_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
1
)
{
param
::
MatrixMul
::
Format
::
DEFAULT
);
winograd
::
winograd_5x4_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_5x4_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF54
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
2
)
{
winograd
::
winograd_5x4_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_5x4_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP32WinogradF45 ======================== */
/* ======================= AlgoFP32WinogradF45 ======================== */
...
@@ -251,10 +163,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
...
@@ -251,10 +163,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
0
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
0
)
{
using
Strategy
=
winograd
::
winograd_4x5_1x1_f
;
using
Strategy
=
winograd
::
winograd_4x5_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
...
@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF45
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF45
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_4x5_1x1_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
1
)
{
param
::
MatrixMul
::
Format
::
DEFAULT
);
winograd
::
winograd_4x5_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_4x5_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF45
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
2
)
{
winograd
::
winograd_4x5_1x1_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_4x5_1x1_f
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */
...
@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
...
@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
...
@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
...
@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF63_4x4
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_6x3_4x4_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
4
,
1
)
{
param
::
MatrixMul
::
Format
::
MK4
);
winograd
::
winograd_6x3_4x4_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
4
,
2
)
{
winograd
::
winograd_6x3_4x4_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_6x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */
...
@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
...
@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
m_matmul_algo
->
packmode
()
==
...
@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
...
@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4_NCHW44
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF23_4x4_NCHW44
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_F23_mk4_f_nchw44
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
param
::
MatrixMul
::
Format
::
MK4
);
midout_iv
(
"AlgoFP32WinogradF23_4x4_NCHW44"
_hash
))
{
winograd
::
winograd_F23_mk4_f_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_F23_mk4_f_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4_NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
midout_iv
(
"AlgoFP32WinogradF23_4x4_NCHW44"
_hash
))
{
winograd
::
winograd_F23_mk4_f_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_F23_mk4_f_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */
...
@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
...
@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
m_matmul_algo
->
packmode
()
==
...
@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
...
@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4_NCHW44
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF63_4x4_NCHW44
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_F63_mk4_f_nchw44
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_arm_common_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
param
::
MatrixMul
::
Format
::
MK4
);
midout_iv
(
"AlgoFP32WinogradF63_4x4_NCHW44"
_hash
))
{
winograd
::
winograd_F63_mk4_f_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_F63_mk4_f_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4_NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
midout_iv
(
"AlgoFP32WinogradF63_4x4_NCHW44"
_hash
))
{
winograd
::
winograd_F63_mk4_f_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_F63_mk4_f_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ===================== direct algo ===================== */
/* ===================== direct algo ===================== */
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_f32_kimpl
);
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_f32_kimpl
);
...
...
dnn/src/arm_common/conv_bias/fp32/algos.h
浏览文件 @
fff2cdc7
...
@@ -17,13 +17,11 @@
...
@@ -17,13 +17,11 @@
namespace
megdnn
{
namespace
megdnn
{
namespace
arm_common
{
namespace
arm_common
{
class
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4
final
:
public
AlgoBase
{
public:
public:
AlgoFP32WinogradF23_4x4
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF23_4x4
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -31,18 +29,7 @@ public:
...
@@ -31,18 +29,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF63
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF63
final
:
public
AlgoBase
{
...
@@ -50,7 +37,6 @@ public:
...
@@ -50,7 +37,6 @@ public:
AlgoFP32WinogradF63
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF63
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -58,19 +44,7 @@ public:
...
@@ -58,19 +44,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4
final
:
public
AlgoBase
{
...
@@ -78,7 +52,6 @@ public:
...
@@ -78,7 +52,6 @@ public:
AlgoFP32WinogradF63_4x4
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF63_4x4
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -86,19 +59,7 @@ public:
...
@@ -86,19 +59,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF54
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF54
final
:
public
AlgoBase
{
...
@@ -106,7 +67,6 @@ public:
...
@@ -106,7 +67,6 @@ public:
AlgoFP32WinogradF54
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF54
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -114,19 +74,7 @@ public:
...
@@ -114,19 +74,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF45
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF45
final
:
public
AlgoBase
{
...
@@ -134,7 +82,6 @@ public:
...
@@ -134,7 +82,6 @@ public:
AlgoFP32WinogradF45
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF45
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -142,19 +89,7 @@ public:
...
@@ -142,19 +89,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
//===================== NCHW44 Winograd Support =====================//
//===================== NCHW44 Winograd Support =====================//
...
@@ -163,7 +98,6 @@ public:
...
@@ -163,7 +98,6 @@ public:
AlgoFP32WinogradF23_4x4_NCHW44
(
AlgoFP32WinogradF23_4x4_NCHW44
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -172,18 +106,7 @@ public:
...
@@ -172,18 +106,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4_NCHW44
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4_NCHW44
final
:
public
AlgoBase
{
...
@@ -191,7 +114,6 @@ public:
...
@@ -191,7 +114,6 @@ public:
AlgoFP32WinogradF63_4x4_NCHW44
(
AlgoFP32WinogradF63_4x4_NCHW44
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -200,18 +122,7 @@ public:
...
@@ -200,18 +122,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
// ================================================================= //
// ================================================================= //
...
@@ -329,4 +240,6 @@ public:
...
@@ -329,4 +240,6 @@ public:
}
// namespace arm_common
}
// namespace arm_common
}
// namespace megdnn
}
// namespace megdnn
#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/arm_common/conv_bias/int8/algos.cpp
浏览文件 @
fff2cdc7
...
@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
...
@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
...
@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
...
@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
}
}
size_t
ConvBiasImpl
::
AlgoS8WinogradF23_8x8
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoS8WinogradF23_8x8
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_8x8_s8
,
winograd
::
winograd_2x3_8x8_s8
strategy
(
param
.
src_type
,
param
.
filter_type
,
megdnn_arm_common_conv_bias_int8
,
param
.
dst_type
);
param
::
MatrixMul
::
Format
::
MK8
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_s8
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8WinogradF23_8x8
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
0
,
2
)
{
winograd
::
winograd_2x3_8x8_s8
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_s8
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
//=========================== input int8 compute float32 =========
//=========================== input int8 compute float32 =========
bool
ConvBiasImpl
::
AlgoS8CF32WinogradF23_4x4_NCHW44
::
usable
(
bool
ConvBiasImpl
::
AlgoS8CF32WinogradF23_4x4_NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
...
@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
...
@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
is_matmul_usable
=
m_matmul_algo
->
usable
(
is_matmul_usable
=
m_matmul_algo
->
usable
(
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
));
.
get_matmul_kern_param
(
param
));
return
is_matmul_usable
&&
return
is_matmul_usable
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
...
@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
...
@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoS8CF32WinogradF23_4x4_NCHW44
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoS8CF32WinogradF23_4x4_NCHW44
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_4x4_s8_f32_nchw44
,
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"arm_common_AlgoS8CF32WinogradF23_4x4::get_workspace"
_hash
))
{
param
::
MatrixMul
::
Format
::
MK4
);
winograd
::
winograd_2x3_4x4_s8_f32_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_s8_f32_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8CF32WinogradF23_4x4_NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"arm_common_AlgoS8CF32WinogradF23_4x4::dispatch_kerns"
_hash
))
{
winograd
::
winograd_2x3_4x4_s8_f32_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_4x4_s8_f32_nchw44
,
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
bool
ConvBiasImpl
::
AlgoS8WinogradF23_8x8_NCHW44
::
usable
(
bool
ConvBiasImpl
::
AlgoS8WinogradF23_8x8_NCHW44
::
usable
(
...
@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
...
@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
using
Strategy
=
winograd
::
winograd_2x3_8x8_s8_nchw44
;
using
Strategy
=
winograd
::
winograd_2x3_8x8_s8_nchw44
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
)
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
bool
is_matmul_usable
=
m_matmul_algo
->
usable
(
matmul_param
);
bool
is_matmul_usable
=
m_matmul_algo
->
usable
(
matmul_param
);
return
is_matmul_usable
&&
return
is_matmul_usable
&&
...
@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
...
@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoS8WinogradF23_8x8_NCHW44
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoS8WinogradF23_8x8_NCHW44
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_2x3_8x8_s8_nchw44
,
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
param
::
MatrixMul
::
Format
::
MK8
);
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::get_workspace"
_hash
))
{
winograd
::
winograd_2x3_8x8_s8_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_s8_nchw44
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8WinogradF23_8x8_NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns"
_hash
))
{
winograd
::
winograd_2x3_8x8_s8_nchw44
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_2x3_8x8_s8_nchw44
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/arm_common/conv_bias/int8/algos.h
浏览文件 @
fff2cdc7
...
@@ -201,7 +201,6 @@ public:
...
@@ -201,7 +201,6 @@ public:
AlgoS8WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoS8WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -209,20 +208,7 @@ public:
...
@@ -209,20 +208,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
//=======================input int8 compute fp32 output int8============
//=======================input int8 compute fp32 output int8============
...
@@ -231,7 +217,6 @@ public:
...
@@ -231,7 +217,6 @@ public:
AlgoS8CF32WinogradF23_4x4_NCHW44
(
AlgoS8CF32WinogradF23_4x4_NCHW44
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -240,20 +225,7 @@ public:
...
@@ -240,20 +225,7 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
//=======================input int8 compute int16 output int8============
//=======================input int8 compute int16 output int8============
...
@@ -262,7 +234,6 @@ public:
...
@@ -262,7 +234,6 @@ public:
AlgoS8WinogradF23_8x8_NCHW44
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoS8WinogradF23_8x8_NCHW44
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -271,20 +242,8 @@ public:
...
@@ -271,20 +242,8 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
static
std
::
vector
<
fallback
::
MatrixMulImpl
::
Algorithm
*>
get_avaiable_matmul_algos
(
const
NCBKernSizeParam
&
param
);
private:
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
}
// namespace arm_common
}
// namespace arm_common
...
...
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
浏览文件 @
fff2cdc7
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#include "src/arm_common/conv_bias/int8/algos.h"
#include "src/arm_common/conv_bias/int8/algos.h"
#include "src/arm_common/conv_bias/int8/direct.h"
#include "src/arm_common/conv_bias/int8/direct.h"
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h"
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h"
#include "src/arm_common/conv_bias/int8/strategy.h"
#include "src/arm_common/elemwise_op.h"
#include "src/arm_common/elemwise_op.h"
#include "src/common/opr_delegate.h"
#include "src/common/opr_delegate.h"
...
...
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
浏览文件 @
fff2cdc7
...
@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
...
@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
auto run = [=]() { \
auto run = [=]() { \
_strategy strategy(src.layout.dtype, src.layout.dtype, \
_strategy strategy(src.layout.dtype, src.layout.dtype, \
src.layout.dtype); \
src.layout.dtype); \
megdnn::winograd::ConvBias<_strategy, _format>(
\
megdnn::winograd::ConvBias<_strategy, _format>(
strategy,
\
strategy, 1, 1, 1, 1, 1)
\
1_z)
\
.filter_process(src_ptr, dst_ptr, workspace_ptr, \
.filter_process(src_ptr, dst_ptr, workspace_ptr, \
OC, IC); \
OC, IC); \
}; \
}; \
...
...
dnn/src/fallback/conv_bias/algos.cpp
浏览文件 @
fff2cdc7
...
@@ -242,10 +242,8 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
...
@@ -242,10 +242,8 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
1
,
0
)
{
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
1
,
0
)
{
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_f
;
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
UNIT_TILE_SIZE
,
param
)
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
...
@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
return
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_1x1_f
>
(
fallback
::
winograd
::
winograd_2x3_1x1_f
>
(
strategy
,
UNIT_TILE_SIZE
,
p
.
nr_threads
,
p
.
osz
[
0
],
strategy
,
UNIT_TILE_SIZE
,
p
)
p
.
osz
[
1
],
p
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
p
,
m_matmul_algo
);
.
get_workspace_size
(
p
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
...
@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_1x1_f
>
(
fallback
::
winograd
::
winograd_2x3_1x1_f
>
(
strategy
,
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
UNIT_TILE_SIZE
,
param
);
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
...
@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
strategy
,
UNIT_TILE_SIZE
,
param
)
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
...
@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
return
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_4x4_f
,
fallback
::
winograd
::
winograd_2x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
UNIT_TILE_SIZE
,
strategy
,
UNIT_TILE_SIZE
,
p
.
nr_threads
,
p
.
osz
[
0
],
p
)
p
.
osz
[
1
],
p
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
p
,
m_matmul_algo
);
.
get_workspace_size
(
p
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
...
@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_4x4_f
,
fallback
::
winograd
::
winograd_2x3_4x4_f
,
param
::
MatrixMul
::
Format
::
MK4
>
(
param
::
MatrixMul
::
Format
::
MK4
>
(
strategy
,
UNIT_TILE_SIZE
,
param
);
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -389,10 +381,8 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
...
@@ -389,10 +381,8 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
3
,
0
)
{
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
3
,
0
)
{
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_qs8
;
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_qs8
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
megdnn
::
winograd
::
ConvBias
<
Strategy
>
(
strategy
,
UNIT_TILE_SIZE
,
param
)
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
...
@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
...
@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
return
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_1x1_qs8
>
(
fallback
::
winograd
::
winograd_2x3_1x1_qs8
>
(
strategy
,
UNIT_TILE_SIZE
,
p
.
nr_threads
,
p
.
osz
[
0
],
strategy
,
UNIT_TILE_SIZE
,
p
)
p
.
osz
[
1
],
p
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
p
,
m_matmul_algo
);
.
get_workspace_size
(
p
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
...
@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_1x1_qs8
>
(
fallback
::
winograd
::
winograd_2x3_1x1_qs8
>
(
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
UNIT_TILE_SIZE
,
param
);
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
...
@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
strategy
,
UNIT_TILE_SIZE
,
param
)
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW
||
...
@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
...
@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
p
.
src_type
,
p
.
filter_type
,
p
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
return
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_8x8_qs8
,
fallback
::
winograd
::
winograd_2x3_8x8_qs8
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
UNIT_TILE_SIZE
,
strategy
,
UNIT_TILE_SIZE
,
p
.
nr_threads
,
p
.
osz
[
0
],
p
)
p
.
osz
[
1
],
p
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
p
,
m_matmul_algo
);
.
get_workspace_size
(
p
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(
...
@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
fallback
::
winograd
::
winograd_2x3_8x8_qs8
,
fallback
::
winograd
::
winograd_2x3_8x8_qs8
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
UNIT_TILE_SIZE
,
param
);
strategy
,
UNIT_TILE_SIZE
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
}
MIDOUT_END
();
MIDOUT_END
();
...
...
dnn/src/fallback/conv_bias/common.h
浏览文件 @
fff2cdc7
...
@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode;
...
@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode;
break; \
break; \
}
}
#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \
bool is_reproducible() const override { return true; } \
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \
AlgoSelectionStrategy algo_selection_strategy) const override; \
size_t get_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \
const NCBKernSizeParam& param) \
const override; \
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \
const override; \
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) \
const override; \
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \
const override; \
\
private: \
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \
mutable std::string m_name; \
uint32_t m_tile_size;
enum
class
PostprocessMode
:
uint8_t
{
enum
class
PostprocessMode
:
uint8_t
{
FLOAT
=
0
,
///< support all biasmode and no_nonlinemode
FLOAT
=
0
,
///< support all biasmode and no_nonlinemode
NO_PROCESS
,
///<support non bias and identity
NO_PROCESS
,
///<support non bias and identity
...
...
dnn/src/fallback/conv_bias/winograd/winograd.h
浏览文件 @
fff2cdc7
...
@@ -88,7 +88,8 @@ class ConvBias {
...
@@ -88,7 +88,8 @@ class ConvBias {
size_t
filter_transform_buf_size
=
0
;
size_t
filter_transform_buf_size
=
0
;
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! OC_BLOCK_SIZE)
//! OC_BLOCK_SIZE)
if
(
param
.
filter_meta
.
format
!=
if
(
param
.
preprocessed_filter
==
nullptr
&&
param
.
filter_meta
.
format
!=
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
param
.
filter_meta
.
format
!=
param
.
filter_meta
.
format
!=
param
::
ConvBias
::
Format
::
NCHW88_WINOGRAD
&&
param
::
ConvBias
::
Format
::
NCHW88_WINOGRAD
&&
...
@@ -150,14 +151,30 @@ class ConvBias {
...
@@ -150,14 +151,30 @@ class ConvBias {
transform_mid_buf_size
,
matmul_workspace_size
});
transform_mid_buf_size
,
matmul_workspace_size
});
}
}
WorkspaceBundle
get_preprocess_wbundle
(
const
NCBKernSizeParam
&
param
)
const
{
//! use for inner temporary usage
size_t
transform_mid_buf_size
=
2
*
Strategy
::
ALPHA
*
Strategy
::
ALPHA
*
sizeof
(
output_compute_type
)
*
std
::
max
(
Strategy
::
IC_BLOCK_SIZE
,
Strategy
::
OC_BLOCK_SIZE
);
size_t
nr_threads
=
param
.
nr_threads
;
SmallVector
<
size_t
>
space_vec
(
nr_threads
,
transform_mid_buf_size
);
return
WorkspaceBundle
{
nullptr
,
space_vec
};
}
public:
public:
//! Get the m_unit_oc_size, according to the nr_threads and
//! Get the m_unit_oc_size, according to the nr_threads and
//! output_featuremap_size. When single thread the m_unit_oc_size is set
//! output_featuremap_size. When single thread the m_unit_oc_size is set
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set
//! according to nr_threads and out_featuremap_size
//! according to nr_threads and out_featuremap_size
ConvBias
(
const
Strategy
&
strategy
,
size_t
unit_tile_size
,
size_t
nr_threads
,
ConvBias
(
const
Strategy
&
strategy
,
size_t
unit_tile_size
,
size_t
OH
,
size_t
OW
,
size_t
OC
)
const
NCBKernSizeParam
&
param
)
:
m_strategy
{
strategy
},
m_unit_tile_size
{
unit_tile_size
}
{
:
m_strategy
{
strategy
},
m_unit_tile_size
{
unit_tile_size
}
{
size_t
nr_threads
=
param
.
nr_threads
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
if
(
nr_threads
>
1
)
{
if
(
nr_threads
>
1
)
{
size_t
units_h
=
div_ceil
<
size_t
>
(
OH
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
size_t
units_h
=
div_ceil
<
size_t
>
(
OH
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
size_t
units_w
=
div_ceil
<
size_t
>
(
OW
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
size_t
units_w
=
div_ceil
<
size_t
>
(
OW
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
...
@@ -178,12 +195,55 @@ public:
...
@@ -178,12 +195,55 @@ public:
m_unit_oc_size
=
UNIT_OC_SIZE_DEFAULT
;
m_unit_oc_size
=
UNIT_OC_SIZE_DEFAULT
;
}
}
}
}
ConvBias
(
const
Strategy
&
strategy
,
size_t
unit_tile_size
)
:
m_strategy
{
strategy
},
m_unit_tile_size
{
unit_tile_size
}
{
m_unit_oc_size
=
UNIT_OC_SIZE_DEFAULT
;
}
size_t
get_workspace_size
(
size_t
get_workspace_size
(
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
const
{
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
const
{
return
get_wbundle
(
param
,
matmul_algo
).
total_size_in_bytes
();
return
get_wbundle
(
param
,
matmul_algo
).
total_size_in_bytes
();
}
}
size_t
get_preprocess_workspace_size
(
const
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
)
const
{
return
get_preprocess_wbundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
)
{
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
IC
=
param
.
filter_meta
.
icpg
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
SmallVector
<
TensorLayout
>
preprocessed_layouts
;
DType
dtype
=
m_strategy
.
filter_dtype
;
if
(
dtype
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
if
(
format
==
param
::
MatrixMul
::
Format
::
MK4
)
{
dtype
=
dtype
::
Float32
();
}
else
if
(
format
==
param
::
MatrixMul
::
Format
::
MK8
)
{
dtype
=
dtype
::
Int16
();
}
}
if
(
format
==
param
::
MatrixMul
::
Format
::
DEFAULT
)
{
preprocessed_layouts
.
push_back
(
{{
GROUP
,
Strategy
::
ALPHA
,
Strategy
::
ALPHA
,
OC
,
IC
},
dtype
});
}
else
if
(
format
==
param
::
MatrixMul
::
Format
::
MK4
)
{
preprocessed_layouts
.
push_back
(
{{
GROUP
,
Strategy
::
ALPHA
,
Strategy
::
ALPHA
,
OC
/
4
,
IC
/
4
,
4
,
4
},
dtype
});
}
else
{
megdnn_assert
(
format
==
param
::
MatrixMul
::
Format
::
MK8
);
preprocessed_layouts
.
push_back
(
{{
GROUP
,
Strategy
::
ALPHA
,
Strategy
::
ALPHA
,
OC
/
8
,
IC
/
8
,
8
,
8
},
dtype
});
}
return
preprocessed_layouts
;
}
//! Used by winograd_filter_preprocess opr
//! Used by winograd_filter_preprocess opr
void
filter_process
(
const
stype
*
filter_ptr
,
void
filter_process
(
const
stype
*
filter_ptr
,
input_filter_compute_type
*
filter_transform_buf
,
input_filter_compute_type
*
filter_transform_buf
,
...
@@ -199,7 +259,6 @@ public:
...
@@ -199,7 +259,6 @@ public:
const
WorkspaceBundle
&
bundle_compute
,
const
WorkspaceBundle
&
bundle_compute
,
const
NCBKernParam
&
kern_param
,
const
NCBKernParam
&
kern_param
,
const
NCBKernIndex
&
ncb_index
)
{
const
NCBKernIndex
&
ncb_index
)
{
size_t
compute_workspace_size_per_thread
=
size_t
compute_workspace_size_per_thread
=
bundle_compute
.
total_size_in_bytes
();
bundle_compute
.
total_size_in_bytes
();
size_t
thread_id
=
ncb_index
.
thread_id
;
size_t
thread_id
=
ncb_index
.
thread_id
;
...
@@ -235,6 +294,47 @@ public:
...
@@ -235,6 +294,47 @@ public:
IC
,
oc_start
,
oc_end
);
IC
,
oc_start
,
oc_end
);
}
}
static
void
filter_preprocess
(
Strategy
strategy
,
const
WorkspaceBundle
&
bundle
,
const
TensorND
&
preprocessed_tensor
,
const
NCBKernParam
&
kern_param
,
const
NCBKernIndex
&
ncb_index
)
{
size_t
thread_id
=
ncb_index
.
thread_id
;
size_t
oc_id
=
ncb_index
.
ndrange_id
[
1
];
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
OC
=
kern_param
.
filter_meta
.
ocpg
;
size_t
IC
=
kern_param
.
filter_meta
.
icpg
;
size_t
filter_group_size
=
Strategy
::
ALPHA
*
Strategy
::
ALPHA
*
OC
*
IC
*
sizeof
(
input_filter_compute_type
);
//! Filter trans dst ptr
input_filter_compute_type
*
filter_transform_buf
=
reinterpret_cast
<
input_filter_compute_type
*>
(
reinterpret_cast
<
uintptr_t
>
(
preprocessed_tensor
.
raw_ptr
)
+
group_id
*
filter_group_size
);
//! Filter trans src ptr
input_filter_compute_type
*
transform_mid_buf
=
reinterpret_cast
<
input_filter_compute_type
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
thread_id
)));
const
stype
*
filter_ptr
=
kern_param
.
filter
<
stype
>
(
group_id
);
size_t
oc_start
,
oc_end
;
if
(
kern_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
)
{
oc_start
=
8
*
oc_id
;
oc_end
=
oc_start
+
8
;
}
else
if
(
kern_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
oc_start
=
4
*
oc_id
;
oc_end
=
oc_start
+
4
;
}
else
{
oc_start
=
oc_id
;
oc_end
=
oc_id
+
1
;
}
strategy
.
filter
(
filter_ptr
,
filter_transform_buf
,
transform_mid_buf
,
OC
,
IC
,
oc_start
,
oc_end
);
}
static
void
winograd_compute
(
static
void
winograd_compute
(
Strategy
strategy
,
const
WorkspaceBundle
&
bundle_top
,
Strategy
strategy
,
const
WorkspaceBundle
&
bundle_top
,
const
WorkspaceBundle
&
bundle_compute
,
const
WorkspaceBundle
&
bundle_compute
,
...
@@ -287,16 +387,29 @@ public:
...
@@ -287,16 +387,29 @@ public:
compute_workspace_size_per_thread
*
thread_id
);
compute_workspace_size_per_thread
*
thread_id
);
//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset
//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset
const
input_filter_compute_type
*
filter_transform_buf
=
const
input_filter_compute_type
*
filter_transform_buf
=
nullptr
;
if
(
nullptr
!=
ncb_param
.
preprocessed_filter
)
{
auto
preprocess_raw_ptr
=
ncb_param
.
preprocessed_filter
->
tensors
[
0
].
raw_ptr
;
filter_transform_buf
=
reinterpret_cast
<
input_filter_compute_type
*>
(
reinterpret_cast
<
uintptr_t
>
(
preprocess_raw_ptr
)
+
group_id
*
filter_group_size
);
}
else
{
filter_transform_buf
=
static_cast
<
const
input_filter_compute_type
*>
(
static_cast
<
const
input_filter_compute_type
*>
(
ncb_param
.
filter
<
input_filter_compute_type
>
(
group_id
));
ncb_param
.
filter
<
input_filter_compute_type
>
(
group_id
));
if
(
ncb_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
if
(
ncb_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
ncb_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
ncb_param
.
filter_meta
.
format
==
ncb_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
param
::
ConvBias
::
Format
::
NCHW88
||
filter_transform_buf
=
reinterpret_cast
<
input_filter_compute_type
*>
(
ncb_param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
filter_transform_buf
=
reinterpret_cast
<
input_filter_compute_type
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_top
.
get
(
1
))
+
reinterpret_cast
<
uintptr_t
>
(
bundle_top
.
get
(
1
))
+
group_id
*
filter_group_size
);
group_id
*
filter_group_size
);
}
}
}
//! prepare matmul param
//! prepare matmul param
matmul_param
.
workspace_ptr
=
reinterpret_cast
<
void
*>
(
matmul_param
.
workspace_ptr
=
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_compute
.
get
(
3
))
+
reinterpret_cast
<
uintptr_t
>
(
bundle_compute
.
get
(
3
))
+
...
@@ -371,6 +484,47 @@ public:
...
@@ -371,6 +484,47 @@ public:
oc_start_idx
,
oc_end_idx
,
unit_start_idx
,
nr_tiles_in_unit
);
oc_start_idx
,
oc_end_idx
,
unit_start_idx
,
nr_tiles_in_unit
);
};
};
SmallVector
<
NCBKern
>
get_preprocess_kerns
(
const
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
)
{
megdnn_assert
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
);
megdnn_assert
(
param
.
preprocessed_filter
&&
param
.
preprocessed_filter
->
tensors
.
size
()
>
0
);
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
const
TensorND
&
preprocessed_dst
=
param
.
preprocessed_filter
->
tensors
[
0
];
WorkspaceBundle
bundle
=
get_preprocess_wbundle
(
param
);
Strategy
strategy
=
m_strategy
;
SmallVector
<
NCBKern
>
kerns
;
auto
filter_process_kern
=
[
strategy
,
bundle
,
&
preprocessed_dst
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
midout_iv
(
"filter_preprocess"
_hash
))
{
bundle
.
set
(
ncb_param
.
workspace_ptr
);
filter_preprocess
(
strategy
,
bundle
,
preprocessed_dst
,
ncb_param
,
ncb_index
);
}
MIDOUT_END
();
};
size_t
oc_parallelism
=
OC
;
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
)
{
megdnn_assert
(
OC
%
8
==
0
);
oc_parallelism
=
OC
/
8
;
}
else
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
megdnn_assert
(
OC
%
4
==
0
);
oc_parallelism
=
OC
/
4
;
}
kerns
.
push_back
({
filter_process_kern
,
{
GROUP
,
oc_parallelism
}});
return
kerns
;
}
SmallVector
<
NCBKern
>
get_kerns
(
SmallVector
<
NCBKern
>
get_kerns
(
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
{
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
{
...
@@ -386,7 +540,6 @@ public:
...
@@ -386,7 +540,6 @@ public:
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
get_matmul_kern_param
(
param
,
m_unit_oc_size
);
get_matmul_kern_param
(
param
,
m_unit_oc_size
);
Strategy
strategy
=
m_strategy
;
size_t
unit_tile_size
=
m_unit_tile_size
;
size_t
unit_tile_size
=
m_unit_tile_size
;
size_t
unit_oc_size
=
m_unit_oc_size
;
size_t
unit_oc_size
=
m_unit_oc_size
;
size_t
units_h
=
div_ceil
<
size_t
>
(
OH
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
size_t
units_h
=
div_ceil
<
size_t
>
(
OH
,
Strategy
::
OUTPUT_BLOCK_SIZE
);
...
@@ -411,20 +564,22 @@ public:
...
@@ -411,20 +564,22 @@ public:
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
));
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
));
SmallVector
<
NCBKern
>
kerns
;
SmallVector
<
NCBKern
>
kerns
;
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
if
(
param
.
preprocessed_filter
==
nullptr
&&
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
))
{
//! probably a gcc bug, labmda require capturing 'this' to call
//! static member function
auto
filter_process_kern
=
auto
filter_process_kern
=
[
this
,
strategy
,
bundle_top
,
bundle_compute
](
[
strategy
=
m_
strategy
,
bundle_top
,
bundle_compute
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
const
NCBKernIndex
&
ncb_index
)
mutable
{
MEGDNN_MARK_USED_VAR
(
this
);
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
midout_iv
(
"filter_process"
_hash
))
{
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
bundle_compute
.
set
(
bundle_top
.
get
(
0
));
bundle_compute
.
set
(
bundle_top
.
get
(
0
));
filter_process
(
strategy
,
bundle_top
,
bundle_compute
,
filter_process
(
strategy
,
bundle_top
,
bundle_compute
,
ncb_param
,
std
::
move
(
ncb_index
));
ncb_param
,
std
::
move
(
ncb_index
));
}
MIDOUT_END
();
};
};
size_t
oc_parallelism
=
OC
;
size_t
oc_parallelism
=
OC
;
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
)
{
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
)
{
...
@@ -438,12 +593,12 @@ public:
...
@@ -438,12 +593,12 @@ public:
kerns
.
push_back
({
filter_process_kern
,
{
GROUP
,
1
,
oc_parallelism
}});
kerns
.
push_back
({
filter_process_kern
,
{
GROUP
,
1
,
oc_parallelism
}});
}
}
auto
winograd_compute_kern
=
auto
winograd_compute_kern
=
[
strategy
,
bundle_top
,
bundle_compute
,
matmul_algo
,
[
strategy
=
m_strategy
,
bundle_top
,
bundle_compute
,
matmul_algo
,
matmul_param
,
unit_tile_size
,
matmul_param
,
unit_tile_size
,
unit_oc_size
](
const
NCBKernParam
&
ncb_param
,
unit_oc_size
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
const
NCBKernIndex
&
ncb_index
)
mutable
{
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
0
,
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
0
)
{
midout_iv
(
"winograd_compute"
_hash
)
)
{
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
bundle_compute
.
set
(
bundle_top
.
get
(
0
));
bundle_compute
.
set
(
bundle_top
.
get
(
0
));
winograd_compute
(
strategy
,
bundle_top
,
bundle_compute
,
winograd_compute
(
strategy
,
bundle_top
,
bundle_compute
,
...
@@ -562,4 +717,54 @@ public:
...
@@ -562,4 +717,54 @@ public:
filter_dtype(filter_dtype), \
filter_dtype(filter_dtype), \
dst_dtype(dst_dtype) {}
dst_dtype(dst_dtype) {}
#define MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, _fun, _strategy, \
_midout_flag, _matmul_format) \
MEGDNN_MARK_USED_VAR(param); \
MIDOUT_BEGIN(_midout_flag, midout_iv(#_class #_fun##_hash)) { \
_strategy strategy(param.src_type, param.filter_type, param.dst_type); \
return megdnn::winograd::ConvBias<_strategy, _matmul_format>( \
strategy, m_tile_size, param) \
._fun(param, m_matmul_algo); \
} \
MIDOUT_END();
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \
_matmul_format) \
size_t ConvBiasImpl::_class::get_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \
_strategy, _midout_flag, \
_matmul_format); \
return 0; \
} \
size_t ConvBiasImpl::_class::get_preprocess_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, get_preprocess_workspace_size, _strategy, \
_midout_flag, _matmul_format); \
return 0; \
} \
SmallVector<TensorLayout> \
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, deduce_preprocessed_filter_layout, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> \
ConvBiasImpl::_class::dispatch_preprocess_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \
_strategy, _midout_flag, \
_matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
}
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/x86/conv_bias/f32/algos.h
浏览文件 @
fff2cdc7
...
@@ -94,7 +94,6 @@ public:
...
@@ -94,7 +94,6 @@ public:
AlgoFP32WinogradF63_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF63_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -102,19 +101,8 @@ public:
...
@@ -102,19 +101,8 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
void
*
type
()
const
override
;
void
*
type
()
const
override
;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
class
ConvBiasImpl
::
AlgoFP32WinogradF23_8x8
final
:
public
AlgoBase
{
class
ConvBiasImpl
::
AlgoFP32WinogradF23_8x8
final
:
public
AlgoBase
{
...
@@ -122,7 +110,6 @@ public:
...
@@ -122,7 +110,6 @@ public:
AlgoFP32WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
AlgoFP32WinogradF23_8x8
(
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
uint32_t
tile_size
)
uint32_t
tile_size
)
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
:
m_matmul_algo
{
matmul_algo
},
m_tile_size
{
tile_size
}
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
m_name
=
ConvBiasImpl
::
algo_name
<
ConvBias
::
WinogradParam
>
(
...
@@ -130,19 +117,8 @@ public:
...
@@ -130,19 +117,8 @@ public:
}
}
return
m_name
.
c_str
();
return
m_name
.
c_str
();
}
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
void
*
type
()
const
override
;
void
*
type
()
const
override
;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
();
private:
fallback
::
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
uint32_t
m_tile_size
;
};
};
/* ===================== matmul algo ===================== */
/* ===================== matmul algo ===================== */
...
...
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
浏览文件 @
fff2cdc7
...
@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
...
@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
...
@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
...
@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF63_8x8
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF63_8x8
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_nchw88_6x3_8x8_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_x86_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_x86_winograd_fp32
,
1
,
1
)
{
param
::
MatrixMul
::
Format
::
MK8
);
winograd
::
winograd_nchw88_6x3_8x8_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_nchw88_6x3_8x8_f
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF63_8x8
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
2
)
{
winograd
::
winograd_nchw88_6x3_8x8_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_nchw88_6x3_8x8_f
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */
...
@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
...
@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
auto
&&
matmul_param
=
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
strategy
,
m_tile_size
,
param
)
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_matmul_kern_param
(
param
);
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
...
@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
...
@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
return
false
;
return
false
;
}
}
size_t
ConvBiasImpl
::
AlgoFP32WinogradF23_8x8
::
get_workspace
(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL
(
AlgoFP32WinogradF23_8x8
,
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
winograd
::
winograd_nchw88_2x3_8x8_f
,
MEGDNN_MARK_USED_VAR
(
param
);
megdnn_x86_winograd_fp32
,
MIDOUT_BEGIN
(
megdnn_x86_winograd_fp32
,
2
,
1
)
{
param
::
MatrixMul
::
Format
::
MK8
);
winograd
::
winograd_nchw88_2x3_8x8_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
return
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_nchw88_2x3_8x8_f
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
)
.
get_workspace_size
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoFP32WinogradF23_8x8
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
2
)
{
winograd
::
winograd_nchw88_2x3_8x8_f
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
winograd_impl
=
megdnn
::
winograd
::
ConvBias
<
winograd
::
winograd_nchw88_2x3_8x8_f
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
.
nr_threads
,
param
.
osz
[
0
],
param
.
osz
[
1
],
param
.
filter_meta
.
ocpg
);
return
winograd_impl
.
get_kerns
(
param
,
m_matmul_algo
);
}
MIDOUT_END
();
return
{};
}
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/test/arm_common/conv_bias.cpp
浏览文件 @
fff2cdc7
...
@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
...
@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
}
}
}
}
TEST_F
(
ARM_COMMON
,
CONV_BIAS_WINOGRAD_F63_4
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_packed_args
();
Checker
<
ConvBiasForward
>
checker
(
handle
());
check_winograd
(
"4:6:16"
,
checker
,
args
,
param
::
MatrixMul
::
Format
::
MK4
);
}
TEST_F
(
ARM_COMMON
,
CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_packed_args
();
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
check_winograd
(
"4:6:16"
,
checker
,
args
,
param
::
MatrixMul
::
Format
::
MK4
);
}
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \
using namespace conv_bias; \
using namespace conv_bias; \
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \
...
...
dnn/test/arm_common/conv_bias_multi_thread.cpp
浏览文件 @
fff2cdc7
此差异已折叠。
点击以展开。
dnn/test/x86/conv_bias.cpp
浏览文件 @
fff2cdc7
...
@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() {
...
@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() {
TensorShape
{
oc
,
ic
,
3
,
3
,
8
,
8
},
TensorShape
{});
TensorShape
{
oc
,
ic
,
3
,
3
,
8
,
8
},
TensorShape
{});
//! bias
//! bias
args
.
emplace_back
(
cur_param
,
TensorShape
{
2
,
ic
,
i
,
i
,
8
},
args
.
emplace_back
(
cur_param
,
TensorShape
{
2
,
ic
,
i
,
i
,
8
},
TensorShape
{
oc
,
ic
,
3
,
3
,
8
,
8
},
TensorShape
{
2
,
oc
,
i
,
i
,
8
});
TensorShape
{
oc
,
ic
,
3
,
3
,
8
,
8
},
TensorShape
{
2
,
oc
,
i
,
i
,
8
});
/*cur_param.sparse = param::ConvBias::Sparse::GROUP;
/*cur_param.sparse = param::ConvBias::Sparse::GROUP;
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8},
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8},
...
@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) {
...
@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) {
}
}
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_NCHW88_F63_WEIGHT_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ssprintf
(
"WINOGRAD:X86_F32MK8_8X8:8:6"
).
c_str
()));
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
(
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_NCHW88_F23
)
{
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_NCHW88_F23
)
{
using
namespace
conv_bias
;
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
...
@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
...
@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
}
}
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_NCHW88_F23_WEIGHT_PREPROCESS
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
handle
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ssprintf
(
"WINOGRAD:X86_F32MK8_8X8:8:2"
).
c_str
()));
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
(
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS
)
{
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS
)
{
using
namespace
conv_bias
;
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
std
::
vector
<
TestArg
>
args
=
get_winograd_mk_nchw88_args
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录