Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
ba5a43b8
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
ba5a43b8
编写于
7月 10, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
7月 23, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn/fallback): delete ConvBias* opr param of conv_bias algo
GitOrigin-RevId: ee5a6874fb3698b0c79698c1bb4f90997be715a1
上级
55844d3e
变更
54
隐藏空白更改
内联
并排
Showing
54 changed file
with
675 addition
and
856 deletion
+675
-856
dnn/src/aarch64/conv_bias/fp16/algos.cpp
dnn/src/aarch64/conv_bias/fp16/algos.cpp
+5
-4
dnn/src/aarch64/conv_bias/fp16/algos.h
dnn/src/aarch64/conv_bias/fp16/algos.h
+4
-6
dnn/src/aarch64/conv_bias/fp32/algos.cpp
dnn/src/aarch64/conv_bias/fp32/algos.cpp
+3
-3
dnn/src/aarch64/conv_bias/fp32/algos.h
dnn/src/aarch64/conv_bias/fp32/algos.h
+4
-5
dnn/src/aarch64/conv_bias/int8/algos.cpp
dnn/src/aarch64/conv_bias/int8/algos.cpp
+1
-2
dnn/src/aarch64/conv_bias/int8/algos.h
dnn/src/aarch64/conv_bias/int8/algos.h
+8
-7
dnn/src/aarch64/conv_bias/quint8/algos.cpp
dnn/src/aarch64/conv_bias/quint8/algos.cpp
+1
-2
dnn/src/aarch64/conv_bias/quint8/algos.h
dnn/src/aarch64/conv_bias/quint8/algos.h
+7
-7
dnn/src/arm_common/conv_bias/f16/algos.cpp
dnn/src/arm_common/conv_bias/f16/algos.cpp
+26
-30
dnn/src/arm_common/conv_bias/f16/algos.h
dnn/src/arm_common/conv_bias/f16/algos.h
+4
-8
dnn/src/arm_common/conv_bias/fp32/algos.cpp
dnn/src/arm_common/conv_bias/fp32/algos.cpp
+48
-53
dnn/src/arm_common/conv_bias/fp32/algos.h
dnn/src/arm_common/conv_bias/fp32/algos.h
+12
-24
dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp
...rc/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp
+4
-5
dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp
dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp
+3
-4
dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp
...arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp
+3
-4
dnn/src/arm_common/conv_bias/int8/algos.cpp
dnn/src/arm_common/conv_bias/int8/algos.cpp
+63
-60
dnn/src/arm_common/conv_bias/int8/algos.h
dnn/src/arm_common/conv_bias/int8/algos.h
+25
-51
dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp
.../arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp
+4
-4
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
+4
-6
dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp
...src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp
+3
-5
dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp
...arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp
+3
-4
dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp
dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp
+9
-9
dnn/src/arm_common/conv_bias/int8x8x16/algos.h
dnn/src/arm_common/conv_bias/int8x8x16/algos.h
+6
-12
dnn/src/arm_common/conv_bias/opr_impl.cpp
dnn/src/arm_common/conv_bias/opr_impl.cpp
+1
-1
dnn/src/arm_common/conv_bias/opr_impl.h
dnn/src/arm_common/conv_bias/opr_impl.h
+1
-1
dnn/src/arm_common/conv_bias/quint8/algos.cpp
dnn/src/arm_common/conv_bias/quint8/algos.cpp
+16
-15
dnn/src/arm_common/conv_bias/quint8/algos.h
dnn/src/arm_common/conv_bias/quint8/algos.h
+11
-18
dnn/src/armv7/conv_bias/int8/algos.cpp
dnn/src/armv7/conv_bias/int8/algos.cpp
+1
-2
dnn/src/armv7/conv_bias/int8/algos.h
dnn/src/armv7/conv_bias/int8/algos.h
+2
-4
dnn/src/armv7/conv_bias/quint8/algos.cpp
dnn/src/armv7/conv_bias/quint8/algos.cpp
+1
-2
dnn/src/armv7/conv_bias/quint8/algos.h
dnn/src/armv7/conv_bias/quint8/algos.h
+2
-4
dnn/src/fallback/conv_bias/algos.cpp
dnn/src/fallback/conv_bias/algos.cpp
+62
-57
dnn/src/fallback/conv_bias/algos.h
dnn/src/fallback/conv_bias/algos.h
+15
-25
dnn/src/fallback/conv_bias/common.h
dnn/src/fallback/conv_bias/common.h
+6
-11
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
+19
-19
dnn/src/fallback/conv_bias/conv1x1/algos.h
dnn/src/fallback/conv_bias/conv1x1/algos.h
+4
-5
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp
+14
-14
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h
+5
-8
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+9
-8
dnn/src/fallback/conv_bias/im2col/algos.h
dnn/src/fallback/conv_bias/im2col/algos.h
+10
-6
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+6
-8
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+10
-14
dnn/src/fallback/conv_bias/winograd/winograd.h
dnn/src/fallback/conv_bias/winograd/winograd.h
+45
-42
dnn/src/fallback/convolution/algos.cpp
dnn/src/fallback/convolution/algos.cpp
+62
-105
dnn/src/fallback/convolution/algos.h
dnn/src/fallback/convolution/algos.h
+23
-35
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+18
-23
dnn/src/fallback/convolution/opr_impl.h
dnn/src/fallback/convolution/opr_impl.h
+9
-13
dnn/src/x86/conv_bias/f32/algos.cpp
dnn/src/x86/conv_bias/f32/algos.cpp
+10
-8
dnn/src/x86/conv_bias/f32/algos.h
dnn/src/x86/conv_bias/f32/algos.h
+13
-21
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
+14
-15
dnn/src/x86/conv_bias/int8/algos.cpp
dnn/src/x86/conv_bias/int8/algos.cpp
+16
-19
dnn/src/x86/conv_bias/int8/algos.h
dnn/src/x86/conv_bias/int8/algos.h
+18
-36
dnn/src/x86/conv_bias/opr_impl.cpp
dnn/src/x86/conv_bias/opr_impl.cpp
+1
-1
dnn/src/x86/conv_bias/opr_impl.h
dnn/src/x86/conv_bias/opr_impl.h
+1
-1
未找到文件。
dnn/src/aarch64/conv_bias/fp16/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/aarch64/conv_bias/fp16/algos.h"
...
...
@@ -22,7 +23,7 @@ using namespace aarch64;
MIDOUT_DECL
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp16
)
bool
ConvBiasImpl
::
AlgoF16DirectStride2
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp16
,
0
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -47,7 +48,7 @@ bool ConvBiasImpl::AlgoF16DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoF16DirectStride2
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp16
,
0
,
1
)
{
auto
wbundle
=
arm_common
::
MultithreadDirectConvCommon
<
dt_float16
,
__fp16
>::
get_bundle_stride
(
param
,
m_large_group
);
...
...
@@ -59,7 +60,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF16DirectStride2
::
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp32
,
0
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
dnn/src/aarch64/conv_bias/fp16/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -19,6 +19,7 @@ namespace aarch64 {
class
ConvBiasImpl
::
AlgoF16DirectStride2
final
:
public
AlgoBase
{
SmallVector
<
NCBKern
>
get_kimpls
(
const
NCBKernSizeParam
&
param
)
const
;
bool
m_large_group
;
public:
AlgoF16DirectStride2
(
bool
large_group
)
:
m_large_group
(
large_group
)
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
...
...
@@ -26,15 +27,12 @@ public:
return
m_large_group
?
"ARMV8F16STRD2_LARGE_GROUP"
:
"ARMV8F16STRD2_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
};
}
// namespace aarch64
}
// namespace megdnn
...
...
dnn/src/aarch64/conv_bias/fp32/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -22,7 +22,7 @@ using namespace aarch64;
MIDOUT_DECL
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp32
)
bool
ConvBiasImpl
::
AlgoF32DirectStride2
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp32
,
0
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -47,7 +47,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoF32DirectStride2
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp32
,
0
,
1
)
{
auto
wbundle
=
arm_common
::
MultithreadDirectConvCommon
<
float
,
float
>::
get_bundle_stride
(
param
,
m_large_group
);
...
...
@@ -58,7 +58,7 @@ size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectStride2
::
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_aarch64_conv_bias_stride2_conv2357_fp32
,
0
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
dnn/src/aarch64/conv_bias/fp32/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -23,6 +23,7 @@ using FallbackConvBiasImpl = fallback::ConvBiasImpl;
class
ConvBiasImpl
::
AlgoF32DirectStride2
final
:
public
AlgoBase
{
SmallVector
<
NCBKern
>
get_kimpls
(
const
NCBKernSizeParam
&
param
)
const
;
bool
m_large_group
;
public:
AlgoF32DirectStride2
(
bool
large_group
)
:
m_large_group
(
large_group
)
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
...
...
@@ -31,14 +32,12 @@ public:
:
"ARMV8F32STRD2_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
};
}
// namespace aarch64
...
...
dnn/src/aarch64/conv_bias/int8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -30,9 +30,8 @@ using megdnn::arm_common::TypeCvtOp;
/* ===================== matrix mul algo ===================== */
bool
ConvBiasImpl
::
AlgoS8MatrixMul
::
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
auto
&&
fm
=
param
.
filter_meta
;
return
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
...
...
dnn/src/aarch64/conv_bias/int8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -13,6 +13,7 @@
#include "src/aarch64/conv_bias/opr_impl.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/common/opr_delegate.h"
namespace
megdnn
{
namespace
aarch64
{
...
...
@@ -27,21 +28,21 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8MATMUL"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kimpl
,
{
group
,
1
_z
,
1
_z
}}};
}
//! select matmul to the highest preference
bool
is_preferred
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
static_cast
<
arm_common
::
ConvBiasImpl
*>
(
opr
)
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
static
CpuOprDelegationStorage
<
1
>
storage
;
auto
conv_bias_opr
=
storage
.
get
<
ConvBias
,
0
>
();
return
static_cast
<
ConvBiasImpl
*>
(
conv_bias_opr
)
->
is_matmul_quantized_prefer
(
param
);
}
};
...
...
dnn/src/aarch64/conv_bias/quint8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -32,9 +32,8 @@ using megdnn::arm_common::TypeCvtOp;
/* ===================== matrix mul algo ===================== */
bool
ConvBiasImpl
::
AlgoQU8MatrixMul
::
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
auto
&&
fm
=
param
.
filter_meta
;
return
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
...
...
dnn/src/aarch64/conv_bias/quint8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -13,6 +13,7 @@
#include "src/aarch64/conv_bias/opr_impl.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/common/opr_delegate.h"
namespace
megdnn
{
namespace
aarch64
{
...
...
@@ -27,22 +28,21 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"QU8MATMUL"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kimpl
,
{
group
,
1
_z
,
1
_z
}}};
}
//! select matmul to the highest preference
bool
is_preferred
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
static_cast
<
arm_common
::
ConvBiasImpl
*>
(
opr
)
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
static
CpuOprDelegationStorage
<
1
>
storage
;
auto
conv_bias_opr
=
storage
.
get
<
ConvBias
,
0
>
();
return
static_cast
<
ConvBiasImpl
*>
(
conv_bias_opr
)
->
is_matmul_quantized_prefer
(
param
);
}
};
...
...
dnn/src/arm_common/conv_bias/f16/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -27,10 +27,9 @@ using namespace arm_common;
/* ======================= AlgoFP16WinogradF23 ======================== */
bool
ConvBiasImpl
::
AlgoFP16WinogradF23
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
0
,
0
)
{
using
Strategy
=
winograd
::
winograd_2x3_4x4_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -38,13 +37,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -69,10 +68,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23,
/* ======================= AlgoFP16WinogradF45 ======================== */
bool
ConvBiasImpl
::
AlgoFP16WinogradF45
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
1
,
0
)
{
using
Strategy
=
winograd
::
winograd_4x5_1x1_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -80,13 +78,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
4
&&
param
.
output_block_size
==
4
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
5
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -109,10 +107,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45,
/* ======================= AlgoFP16WinogradF63 ======================== */
bool
ConvBiasImpl
::
AlgoFP16WinogradF63
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
2
,
0
)
{
using
Strategy
=
winograd
::
winograd_6x3_1x1_f16
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -120,13 +117,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
6
&&
param
.
output_block_size
==
6
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -149,10 +146,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63,
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */
bool
ConvBiasImpl
::
AlgoFP16WinogradF23_8x8
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp16
,
3
,
0
)
{
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
param
.
filter_meta
.
ocpg
%
8
!=
0
)
return
false
;
...
...
@@ -166,13 +162,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -197,7 +193,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8,
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_fp16_kimpl
)
bool
ConvBiasImpl
::
AlgoF16Direct
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
0
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -227,7 +223,7 @@ bool ConvBiasImpl::AlgoF16Direct::usable(
}
size_t
ConvBiasImpl
::
AlgoF16Direct
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
0
,
1
)
{
auto
wbundle
=
MultithreadDirectConvCommon
<
dt_float16
,
__fp16
>::
get_bundle
(
...
...
@@ -310,7 +306,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::get_kimpls(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF16Direct
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
0
,
1
)
{
return
get_kimpls
(
param
);
}
...
...
@@ -321,7 +317,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::dispatch_kerns(
/* ===================== stride-1 algo ===================== */
bool
ConvBiasImpl
::
AlgoF16DirectStride1
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
1
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -425,7 +421,7 @@ ConvBiasImpl::AlgoF16DirectStride1::get_kimpls(
}
size_t
ConvBiasImpl
::
AlgoF16DirectStride1
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
1
,
1
)
{
auto
bundle
=
MultithreadDirectConvCommon
<
dt_float16
,
__fp16
>::
get_bundle_stride
(
param
,
m_large_group
);
...
...
@@ -437,7 +433,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF16DirectStride1
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_fp16_kimpl
,
1
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
dnn/src/arm_common/conv_bias/f16/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -88,14 +88,12 @@ public:
return
m_large_group
?
"F16DIRECT_LARGE_GROUP"
:
"F16DIRECT_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -109,12 +107,10 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"F16STRD1_LARGE_GROUP"
:
"F16STRD1_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
dnn/src/arm_common/conv_bias/fp32/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/algos.h"
...
...
@@ -30,9 +31,8 @@ using namespace arm_common;
/* ======================= AlgoFP32WinogradF23_4x4 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
0
,
0
)
{
if
(
param
.
filter_meta
.
icpg
%
4
!=
0
||
param
.
filter_meta
.
ocpg
%
4
!=
0
)
...
...
@@ -47,13 +47,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -76,10 +76,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4,
/* ======================= AlgoFP32WinogradF63 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF63
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
1
,
0
)
{
using
Strategy
=
winograd
::
winograd_6x3_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -87,13 +86,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
6
&&
param
.
output_block_size
==
6
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -116,10 +115,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63,
/* ======================= AlgoFP32WinogradF54 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF54
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
2
,
0
)
{
using
Strategy
=
winograd
::
winograd_5x4_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -127,13 +125,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
5
&&
param
.
output_block_size
==
5
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
4
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -156,10 +154,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54,
/* ======================= AlgoFP32WinogradF45 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF45
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
3
,
0
)
{
using
Strategy
=
winograd
::
winograd_4x5_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -167,13 +164,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
4
&&
param
.
output_block_size
==
4
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
5
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -196,10 +193,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45,
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
4
,
0
)
{
if
(
param
.
filter_meta
.
icpg
%
4
!=
0
||
param
.
filter_meta
.
ocpg
%
4
!=
0
)
return
false
;
...
...
@@ -213,13 +209,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
6
&&
param
.
output_block_size
==
6
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -244,9 +240,8 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4,
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF23_4x4_NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
midout_iv
(
"AlgoFP32WinogradF23_4x4_NCHW44"
_hash
))
{
...
...
@@ -262,13 +257,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -291,10 +286,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44,
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF63_4x4_NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_arm_common_winograd_fp32
,
midout_iv
(
"AlgoFP32WinogradF63_4x4_NCHW44"
_hash
))
{
if
(
param
.
filter_meta
.
icpg
%
4
!=
0
||
param
.
filter_meta
.
ocpg
%
4
!=
0
)
...
...
@@ -309,13 +303,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
6
&&
param
.
output_block_size
==
6
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -341,7 +335,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44,
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_f32_kimpl
);
bool
ConvBiasImpl
::
AlgoF32Direct
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
0
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -370,7 +364,7 @@ bool ConvBiasImpl::AlgoF32Direct::usable(
return
false
;
}
size_t
ConvBiasImpl
::
AlgoF32Direct
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
0
,
1
)
{
auto
wbundle
=
MultithreadDirectConvCommon
<
float
,
float
>::
get_bundle
(
param
,
m_large_group
);
...
...
@@ -409,7 +403,8 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::get_kimpls(
}
for
(
size_t
ic
=
0
;
ic
<
IC
;
ic
++
)
{
MultithreadDirectConvCommon
<
float
,
float
>::
copy_padding_kern
(
bundle
,
kern_param
,
ncb_index
,
{
ncb_index
.
thread_id
,
0
,
ic
});
bundle
,
kern_param
,
ncb_index
,
{
ncb_index
.
thread_id
,
0
,
ic
});
}
for
(
size_t
oc
=
0
;
oc
<
OC
;
oc
++
)
{
MultithreadDirectConvCommon
<
float
,
float
>::
do_conv_kern
(
...
...
@@ -449,7 +444,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::get_kimpls(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32Direct
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
0
,
1
)
{
return
get_kimpls
(
param
);
}
...
...
@@ -458,7 +453,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::dispatch_kerns(
}
/* ===================== stride-1 algo ===================== */
bool
ConvBiasImpl
::
AlgoF32DirectStride1
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
1
,
1
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -484,7 +479,7 @@ bool ConvBiasImpl::AlgoF32DirectStride1::usable(
}
size_t
ConvBiasImpl
::
AlgoF32DirectStride1
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
1
,
1
)
{
auto
bundle
=
MultithreadDirectConvCommon
<
float
,
float
>::
get_bundle_stride
(
...
...
@@ -575,7 +570,7 @@ ConvBiasImpl::AlgoF32DirectStride1::get_kimpls(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectStride1
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
1
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
@@ -586,7 +581,7 @@ ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns(
/* ===================== stride-2 algo ===================== */
bool
ConvBiasImpl
::
AlgoF32DirectStride2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
2
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -611,7 +606,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable(
return
false
;
}
size_t
ConvBiasImpl
::
AlgoF32DirectStride2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
2
,
1
)
{
auto
bundle
=
MultithreadDirectConvCommon
<
float
,
float
>::
get_bundle_stride
(
...
...
@@ -701,7 +696,7 @@ ConvBiasImpl::AlgoF32DirectStride2::get_kimpls(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectStride2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_f32_kimpl
,
2
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
dnn/src/arm_common/conv_bias/fp32/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -137,13 +137,11 @@ public:
return
m_large_group
?
"F32DIRECT_LARGE_GROUP"
:
"F32DIRECT_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -157,13 +155,11 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"F32STRD1_LARGE_GROUP"
:
"F32STRD1_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -177,13 +173,11 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"F32STRD2_LARGE_GROUP"
:
"F32STRD2_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -194,13 +188,11 @@ public:
AlgoF32DirectNCHW44
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"F32_CONV_NCHW44_DIRECT"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -211,13 +203,11 @@ public:
AlgoF32DirectNCHWNCHW44
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"F32_CONV_NCHW_NCHW44"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -227,13 +217,11 @@ class ConvBiasImpl::AlgoF32ChannelWiseNCHW44 final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"F32_CHANNEL_WISE_NCHW44"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -10,8 +10,8 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h"
#include "src/arm_common/conv_bias/fp32/algos.h"
#include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h"
#include "src/arm_common/elemwise_op.h"
#include "midout.h"
...
...
@@ -26,8 +26,7 @@ using conv_fun = std::function<void(
MIDOUT_DECL
(
conv_bias_fp32_channel_wise_nchw44
)
bool
ConvBiasImpl
::
AlgoF32ChannelWiseNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
FH
=
fm
.
spatial
[
0
];
size_t
OC
=
fm
.
ocpg
;
...
...
@@ -49,13 +48,13 @@ bool ConvBiasImpl::AlgoF32ChannelWiseNCHW44::usable(
}
size_t
ConvBiasImpl
::
AlgoF32ChannelWiseNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
const
NCBKernSizeParam
&
)
const
{
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32ChannelWiseNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
const
constexpr
size_t
pack_group_size
=
4
_z
;
auto
fm
=
param
.
filter_meta
;
const
int
batch
=
param
.
n
;
...
...
dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -159,8 +159,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle,
}
// namespace
/* ===================== stride1 algo ===================== */
bool
ConvBiasImpl
::
AlgoF32DirectNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
ConvBiasImpl
::
AlgoF32DirectNCHW44
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
fh
=
fm
.
spatial
[
0
];
...
...
@@ -182,13 +181,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(fallback::ConvBiasImpl*,
}
size_t
ConvBiasImpl
::
AlgoF32DirectNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
fm
=
param
.
filter_meta
;
const
int
batch
=
param
.
n
;
const
int
group
=
fm
.
group
;
...
...
dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -188,8 +188,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle,
}
// namespace
bool
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
fh
=
fm
.
spatial
[
0
];
int
oc
=
fm
.
ocpg
;
...
...
@@ -209,13 +208,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable(
}
size_t
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
fm
=
param
.
filter_meta
;
const
int
batch
=
param
.
n
;
const
int
group
=
fm
.
group
;
...
...
dnn/src/arm_common/conv_bias/int8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -28,7 +28,7 @@ using namespace arm_common;
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_int8
)
/* ===================== stride1 algo ===================== */
bool
ConvBiasImpl
::
AlgoS8DirectStride1
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_int8_stride1
::
can_conv_direct_stride1_int8
(
param
);
auto
fm
=
param
.
filter_meta
;
...
...
@@ -40,7 +40,7 @@ bool ConvBiasImpl::AlgoS8DirectStride1::usable(
return
avaible
;
}
bool
ConvBiasImpl
::
AlgoS8DirectStride1
::
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
FH
=
fm
.
spatial
[
0
];
auto
OC
=
fm
.
ocpg
;
...
...
@@ -53,14 +53,14 @@ bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred(
}
size_t
ConvBiasImpl
::
AlgoS8DirectStride1
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_int8_stride1
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8DirectStride1
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
1
,
0
)
{
return
direct_int8_stride1
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -70,20 +70,20 @@ ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns(
/* ===================== stride1 algo ===================== */
bool
ConvBiasImpl
::
AlgoS8ChanWiseStride1NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
channel_wise_nchw44
::
stride1
::
is_available
(
param
);
}
size_t
ConvBiasImpl
::
AlgoS8ChanWiseStride1NCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
channel_wise_nchw44
::
stride1
::
get_bundle
(
param
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8ChanWiseStride1NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"AlgoS8ChanWiseStride1NCHW44"
_hash
))
{
return
channel_wise_nchw44
::
stride1
::
get_kimpls
(
param
);
...
...
@@ -94,20 +94,20 @@ ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns(
/* ===================== stride2 algo ===================== */
bool
ConvBiasImpl
::
AlgoS8ChanWiseStride2NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
channel_wise_nchw44
::
stride2
::
is_available
(
param
);
}
size_t
ConvBiasImpl
::
AlgoS8ChanWiseStride2NCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
channel_wise_nchw44
::
stride2
::
get_bundle
(
param
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8ChanWiseStride2NCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"AlgoS8ChanWiseStride2NCHW44"
_hash
))
{
return
channel_wise_nchw44
::
stride2
::
get_kimpls
(
param
);
...
...
@@ -118,7 +118,7 @@ ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns(
/* ===================== stride2 algo ===================== */
bool
ConvBiasImpl
::
AlgoS8DirectStride2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_int8_stride2
::
can_conv_direct_stride2_int8
(
param
);
if
(
algo_selection_strategy
==
...
...
@@ -130,14 +130,14 @@ bool ConvBiasImpl::AlgoS8DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoS8DirectStride2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_int8_stride2
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8DirectStride2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
1
,
1
)
{
return
direct_int8_stride2
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -148,7 +148,7 @@ ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns(
#if __ARM_FEATURE_DOTPROD
/* ===================== dot stride1 algo ======================== */
bool
ConvBiasImpl
::
AlgoDotS8DirectStride1
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_dotprod_int8_stride1
::
can_conv_direct_stride1_int8
(
param
);
...
...
@@ -163,14 +163,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride1::usable(
}
size_t
ConvBiasImpl
::
AlgoDotS8DirectStride1
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_dotprod_int8_stride1
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotS8DirectStride1
::
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
2
,
1
)
{
return
direct_dotprod_int8_stride1
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -180,7 +180,7 @@ ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns(
/* ===================== dot stride2 algo ======================== */
bool
ConvBiasImpl
::
AlgoDotS8DirectStride2
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_dotprod_int8_stride2
::
can_conv_direct_stride2_int8
(
param
);
...
...
@@ -193,14 +193,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoDotS8DirectStride2
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_dotprod_int8_stride2
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotS8DirectStride2
::
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
2
,
2
)
{
return
direct_dotprod_int8_stride2
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -212,7 +212,7 @@ ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns(
/* ======================= AlgoS8WinogradF23_8x8 ======================== */
bool
ConvBiasImpl
::
AlgoS8WinogradF23_8x8
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
param
.
filter_meta
.
ocpg
%
8
!=
0
)
return
false
;
...
...
@@ -225,13 +225,14 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
((
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
&&
((
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
().
output_block_size
==
2
&&
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS16
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -251,7 +252,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8,
//=========================== input int8 compute float32 =========
bool
ConvBiasImpl
::
AlgoS8CF32WinogradF23_4x4_NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
...
...
@@ -270,14 +271,14 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
.
get_matmul_kern_param
(
param
));
return
is_matmul_usable
&&
m_matmul_algo
->
packmode
()
==
PackMode
::
NO_PACK
&&
((
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
&&
((
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
((
opr
->
param
()
.
format
==
((
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
)
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -302,40 +303,42 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44,
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
bool
ConvBiasImpl
::
AlgoS8WinogradF23_8x8_NCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"
_hash
))
{
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
param
.
filter_meta
.
ocpg
%
8
!=
0
)
return
false
;
using
Strategy
=
winograd
::
winograd_2x3_8x8_s8_nchw44
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
bool
is_matmul_usable
=
m_matmul_algo
->
usable
(
matmul_param
);
return
is_matmul_usable
&&
((
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
&&
opr
->
param
().
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS16
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
param
.
filter_meta
.
stride
[
0
]
==
1
)
&&
(
param
.
filter_meta
.
dilation
[
0
]
==
param
.
filter_meta
.
dilation
[
1
]
&&
param
.
filter_meta
.
dilation
[
0
]
==
1
)
&&
param
.
compute_mode
==
param
::
ConvBias
::
ComputeMode
::
DEFAULT
&&
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
bias_type
.
enumv
()
==
DTypeEnum
::
QuantizedS32
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
midout_iv
(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"
_hash
))
{
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
param
.
filter_meta
.
ocpg
%
8
!=
0
)
return
false
;
using
Strategy
=
winograd
::
winograd_2x3_8x8_s8_nchw44
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
auto
&&
matmul_param
=
megdnn
::
winograd
::
ConvBias
<
Strategy
,
param
::
MatrixMul
::
Format
::
MK8
>
(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
bool
is_matmul_usable
=
m_matmul_algo
->
usable
(
matmul_param
);
return
is_matmul_usable
&&
((
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
QuantizedS16
))
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
param
.
filter_meta
.
stride
[
0
]
==
1
)
&&
(
param
.
filter_meta
.
dilation
[
0
]
==
param
.
filter_meta
.
dilation
[
1
]
&&
param
.
filter_meta
.
dilation
[
0
]
==
1
)
&&
param
.
compute_mode
==
param
::
ConvBias
::
ComputeMode
::
DEFAULT
&&
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
bias_type
.
enumv
()
==
DTypeEnum
::
QuantizedS32
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
}
MIDOUT_END
();
return
false
;
...
...
dnn/src/arm_common/conv_bias/int8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -26,16 +26,13 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"S8STRD1_LARGE_GROUP"
:
"S8STRD1_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
class
ConvBiasImpl
::
AlgoS8DirectStride2
final
:
public
AlgoBase
{
...
...
@@ -47,13 +44,11 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"S8STRD2_LARGE_GROUP"
:
"S8STRD2_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -62,15 +57,12 @@ public:
AlgoS8DirectNCHW44
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8_NCHW44_DIRECT"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
class
ConvBiasImpl
::
AlgoS8DirectNCHWNCHW44
final
:
public
AlgoBase
{
...
...
@@ -78,27 +70,22 @@ public:
AlgoS8DirectNCHWNCHW44
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8_CONV_NCHW_NCHW44"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
class
ConvBiasImpl
::
AlgoS8ChanWiseStride1NCHW44
final
:
public
AlgoBase
{
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8_CHAN_WISE_STRD1_NCHW44"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -106,12 +93,10 @@ class ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44 final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8_CHAN_WISE_STRD2_NCHW44"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -121,13 +106,11 @@ class ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44 final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"ARMDOTS8_NCHW_NCHW44"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
,
bool
usable
(
const
NCBKernSizeParam
&
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -142,13 +125,11 @@ public:
return
m_large_group
?
"ARMDOTS8STRD1_LARGE_GROUP"
:
"ARMDOTS8STRD1_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
,
bool
usable
(
const
NCBKernSizeParam
&
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -163,13 +144,11 @@ public:
:
"ARMDOTS8STRD2_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
,
bool
usable
(
const
NCBKernSizeParam
&
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -178,21 +157,16 @@ public:
AlgoDotS8Direct_NCHW44
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"ARMDOTS8DIRECT_NCHW44"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
,
const
char
*
name
()
const
override
{
return
"ARMDOTS8DIRECT_NCHW44"
;
}
bool
usable
(
const
NCBKernSizeParam
&
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
#endif
...
...
dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -161,7 +161,7 @@ static void conv_kern(const WorkspaceBundle& bundle,
}
// namespace
bool
ConvBiasImpl
::
AlgoDotS8Direct_NCHW44
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MEGDNN_MARK_USED_VAR
(
algo_selection_strategy
);
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -199,19 +199,19 @@ bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable(
}
bool
ConvBiasImpl
::
AlgoDotS8Direct_NCHW44
::
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
return
true
;
}
size_t
ConvBiasImpl
::
AlgoDotS8Direct_NCHW44
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotS8Direct_NCHW44
::
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8
,
midout_iv
(
"ALGODOTS8DIRECT_NCHW44"
_hash
))
{
auto
fm
=
param
.
filter_meta
;
...
...
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -189,7 +189,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle,
}
bool
ConvBiasImpl
::
AlgoS8DirectNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MEGDNN_MARK_USED_VAR
(
algo_selection_strategy
);
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -213,22 +213,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHW44::usable(
}
bool
ConvBiasImpl
::
AlgoS8DirectNCHW44
::
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
conv_bias_impl_ptr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
// TODO: benchmark and fix
MEGDNN_MARK_USED_VAR
(
conv_bias_impl_ptr
);
MEGDNN_MARK_USED_VAR
(
param
);
return
false
;
}
size_t
ConvBiasImpl
::
AlgoS8DirectNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8DirectNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
fm
=
param
.
filter_meta
;
size_t
N
=
param
.
n
;
size_t
IC
=
fm
.
icpg
;
...
...
dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -214,7 +214,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle,
}
bool
ConvBiasImpl
::
AlgoS8DirectNCHWNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MEGDNN_MARK_USED_VAR
(
algo_selection_strategy
);
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -236,22 +236,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable(
}
bool
ConvBiasImpl
::
AlgoS8DirectNCHWNCHW44
::
is_preferred
(
megdnn
::
fallback
::
ConvBiasImpl
*
conv_bias_impl_ptr
,
const
NCBKernSizeParam
&
param
)
const
{
// TODO: benchmark and fix
MEGDNN_MARK_USED_VAR
(
conv_bias_impl_ptr
);
MEGDNN_MARK_USED_VAR
(
param
);
return
false
;
}
size_t
ConvBiasImpl
::
AlgoS8DirectNCHWNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoS8DirectNCHWNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
fm
=
param
.
filter_meta
;
size_t
N
=
param
.
n
;
size_t
OC
=
fm
.
ocpg
;
...
...
dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -172,8 +172,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle,
}
// namespace
bool
ConvBiasImpl
::
AlgoDotS8DirectNCHWNCHW44
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
fh
=
fm
.
spatial
[
0
];
int
oc
=
fm
.
ocpg
;
...
...
@@ -194,13 +193,13 @@ bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable(
}
size_t
ConvBiasImpl
::
AlgoDotS8DirectNCHWNCHW44
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotS8DirectNCHWNCHW44
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
fm
=
param
.
filter_meta
;
const
int
batch
=
param
.
n
;
const
int
group
=
fm
.
group
;
...
...
dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -83,7 +83,7 @@ void get_rectified_size_str2(size_t IH, size_t IW, size_t OH, size_t OW,
/* ===================== direct algo ===================== */
bool
ConvBiasImpl
::
AlgoI8x8x16Direct
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
1
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -129,7 +129,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Direct::get_bundle(
return
{
nullptr
,
{
part0
,
part1
}};
}
size_t
ConvBiasImpl
::
AlgoI8x8x16Direct
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
1
,
1
)
{
auto
bundle
=
get_bundle
(
param
);
return
bundle
.
total_size_in_bytes
();
...
...
@@ -293,7 +293,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoI8x8x16Direct::get_kimpls(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoI8x8x16Direct
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
1
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
@@ -303,7 +303,7 @@ ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns(
/* ===================== stride-2 algo ===================== */
bool
ConvBiasImpl
::
AlgoI8x8x16Stride2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
2
,
0
)
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -350,7 +350,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Stride2::get_bundle(
return
{
nullptr
,
{
part0
,
part1
}};
}
size_t
ConvBiasImpl
::
AlgoI8x8x16Stride2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
2
,
1
)
{
auto
bundle
=
get_bundle
(
param
);
return
bundle
.
total_size_in_bytes
();
...
...
@@ -513,7 +513,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoI8x8x16Stride2::get_kimpls(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoI8x8x16Stride2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
2
,
2
)
{
return
get_kimpls
(
param
);
}
...
...
@@ -521,7 +521,7 @@ ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns(
return
{};
}
bool
ConvBiasImpl
::
AlgoI8x8x16Stride2Filter2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
3
,
0
)
{
return
param
.
bias_mode
==
BiasMode
::
NO_BIAS
&&
...
...
@@ -534,7 +534,7 @@ bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable(
}
size_t
ConvBiasImpl
::
AlgoI8x8x16Stride2Filter2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
3
,
1
)
{
return
conv_bias
::
get_workspace_in_bytes_conv_int8x8x16_stride2_flt2
(
param
);
...
...
@@ -545,7 +545,7 @@ size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoI8x8x16Stride2Filter2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
// return {conv_bias::conv_int8x8x16_stride2_flt2,true};
auto
kern
=
[](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_int8816_kimpl
,
3
,
2
)
{
...
...
dnn/src/arm_common/conv_bias/int8x8x16/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -35,12 +35,10 @@ public:
return
m_large_group
?
"I8816DIRECT_LARGE_GROUP"
:
"I8816DIRECT_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -64,13 +62,11 @@ public:
return
m_large_group
?
"I8816STRD2_LARGE_GROUP"
:
"I8816STRD2_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -79,13 +75,11 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"I8816STRD2F2"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
dnn/src/arm_common/conv_bias/opr_impl.cpp
浏览文件 @
ba5a43b8
...
...
@@ -232,7 +232,7 @@ void* const ConvBiasImpl::sm_arm_common_algo_type =
&
arm_common_algo_type_storage
;
bool
ConvBiasImpl
::
is_matmul_quantized_prefer
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
{
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
const
{
// fallback::ConvBiasImpl::NCBKernParam conv_ncb_param;
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
conv_ncb_param
(
param
,
0
,
param
::
MatrixMul
::
Format
::
DEFAULT
,
{},
0
,
...
...
dnn/src/arm_common/conv_bias/opr_impl.h
浏览文件 @
ba5a43b8
...
...
@@ -27,7 +27,7 @@ public:
SmallVector
<
AlgoBase
*>
algo_pack
()
override
;
bool
is_matmul_quantized_prefer
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
override
;
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
const
override
;
class
AlgoPack
;
protected:
...
...
dnn/src/arm_common/conv_bias/quint8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -6,17 +6,18 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/quint8/algos.h"
#include "midout.h"
#include "src/arm_common/conv_bias/quint8/stride1.h"
#include "src/arm_common/conv_bias/quint8/stride2.h"
#include "src/arm_common/conv_bias/quint8/stride1_dotprod.h"
#include "src/arm_common/conv_bias/quint8/stride2.h"
#include "src/arm_common/conv_bias/quint8/stride2_dotprod.h"
#include "src/arm_common/elemwise_op.h"
#include "src/fallback/conv_bias/common.h"
#include "midout.h"
MIDOUT_DECL
(
megdnn_arm_common_conv_bias_quint8
)
...
...
@@ -25,7 +26,7 @@ using namespace arm_common;
/* ===================== stride1 algo ===================== */
bool
ConvBiasImpl
::
AlgoQU8DirectStride1
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_quint8_stride1
::
can_conv_direct_stride1_quint8
(
param
);
if
(
algo_selection_strategy
==
...
...
@@ -37,14 +38,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride1::usable(
}
size_t
ConvBiasImpl
::
AlgoQU8DirectStride1
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_quint8_stride1
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoQU8DirectStride1
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_quint8
,
0
,
0
)
{
return
direct_quint8_stride1
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -54,7 +55,7 @@ ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns(
/* ===================== stride2 algo ===================== */
bool
ConvBiasImpl
::
AlgoQU8DirectStride2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_quint8_stride2
::
can_conv_direct_stride2_quint8
(
param
);
if
(
algo_selection_strategy
==
...
...
@@ -66,14 +67,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoQU8DirectStride2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_quint8_stride2
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoQU8DirectStride2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_quint8
,
0
,
1
)
{
return
direct_quint8_stride2
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -83,7 +84,7 @@ ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns(
#if __ARM_FEATURE_DOTPROD
/* ===================== stride1 algo ===================== */
bool
ConvBiasImpl
::
AlgoDotU8DirectStride1
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_dotprod_quint8_stride1
::
can_conv_direct_stride1_quint8
(
...
...
@@ -97,7 +98,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride1::usable(
}
size_t
ConvBiasImpl
::
AlgoDotU8DirectStride1
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_dotprod_quint8_stride1
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
...
...
@@ -105,7 +106,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotU8DirectStride1
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_quint8
,
1
,
0
)
{
return
direct_dotprod_quint8_stride1
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
@@ -115,7 +116,7 @@ ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns(
/* ===================== stride2 algo ===================== */
bool
ConvBiasImpl
::
AlgoDotU8DirectStride2
::
usable
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
bool
avaible
=
direct_dotprod_quint8_stride2
::
can_conv_direct_stride2_quint8
(
...
...
@@ -129,7 +130,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride2::usable(
}
size_t
ConvBiasImpl
::
AlgoDotU8DirectStride2
::
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
bundle
=
direct_dotprod_quint8_stride2
::
get_bundle
(
param
,
m_large_group
);
return
bundle
.
total_size_in_bytes
();
...
...
@@ -137,7 +138,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoDotU8DirectStride2
::
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_arm_common_conv_bias_quint8
,
1
,
1
)
{
return
direct_dotprod_quint8_stride2
::
get_kimpls
(
param
,
m_large_group
);
}
...
...
dnn/src/arm_common/conv_bias/quint8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
...
...
@@ -26,13 +27,11 @@ public:
return
m_large_group
?
"QU8STRD1_LARGE_GROUP"
:
"QU8STRD1_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -45,16 +44,14 @@ public:
const
char
*
name
()
const
override
{
return
m_large_group
?
"QU8STRD2_LARGE_GROUP"
:
"QU8STRD2_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
#if
__ARM_FEATURE_DOTPROD
#if __ARM_FEATURE_DOTPROD
class
ConvBiasImpl
::
AlgoDotU8DirectStride1
final
:
public
AlgoBase
{
bool
m_large_group
;
...
...
@@ -66,13 +63,11 @@ public:
:
"ARMDOTU8STRD1_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
...
...
@@ -86,13 +81,11 @@ public:
return
m_large_group
?
"ARMDOTU8STRD2_LARGE_GROUP"
:
"ARMDOTU8STRD2_SMALL_GROUP"
;
}
bool
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
};
#endif
...
...
dnn/src/armv7/conv_bias/int8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -26,9 +26,8 @@ using namespace armv7;
/* ===================== matrix mul algo ===================== */
bool
ConvBiasImpl
::
AlgoS8MatrixMul
::
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
auto
&&
fm
=
param
.
filter_meta
;
return
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
...
...
dnn/src/armv7/conv_bias/int8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -27,14 +27,12 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"S8MATMUL"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kimpl
,
{
group
,
1
_z
,
1
_z
}}};
...
...
dnn/src/armv7/conv_bias/quint8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -26,9 +26,8 @@ using namespace armv7;
/* ===================== matrix mul algo ===================== */
bool
ConvBiasImpl
::
AlgoQU8MatrixMul
::
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
opr
);
auto
&&
fm
=
param
.
filter_meta
;
return
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
...
...
dnn/src/armv7/conv_bias/quint8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -27,15 +27,13 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"QU8MATMUL"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
fallback
::
ConvBiasImpl
::
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kimpl
,
{
group
,
1
_z
,
1
_z
}}};
...
...
dnn/src/fallback/conv_bias/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -10,6 +10,7 @@
*/
#include "src/fallback/conv_bias/algos.h"
#include "megdnn/opr_param_defs.h"
#include "src/common/opr_delegate.h"
#include "src/fallback/conv_bias/winograd/strategy.h"
#include "src/naive/convolution/helper.h"
...
...
@@ -21,18 +22,28 @@ using namespace fallback;
namespace
{
param
::
Convolution
get_param_convolution
(
const
param
::
ConvBias
param
)
{
param
::
Convolution
ret
{
param
.
mode
,
param
.
pad_h
,
param
.
pad_w
,
param
.
stride_h
,
param
.
stride_w
,
param
.
dilate_h
,
param
.
dilate_w
,
param
::
Convolution
::
Sparse
::
DENSE
,
param
.
format
};
return
ret
;
param
::
Convolution
get_param_convolution
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
{
param
::
Convolution
::
Mode
mode
;
param
::
Convolution
::
Sparse
sparse
;
if
(
param
.
filter_meta
.
should_flip
)
{
mode
=
param
::
Convolution
::
Mode
::
CONVOLUTION
;
}
else
{
mode
=
param
::
Convolution
::
Mode
::
CROSS_CORRELATION
;
}
return
param
::
Convolution
{
mode
,
param
.
filter_meta
.
padding
[
0
],
param
.
filter_meta
.
padding
[
1
],
param
.
filter_meta
.
stride
[
0
],
param
.
filter_meta
.
stride
[
1
],
param
.
filter_meta
.
dilation
[
1
],
param
.
filter_meta
.
dilation
[
0
],
sparse
=
param
::
Convolution
::
Sparse
::
DENSE
,
param
.
filter_meta
.
format
};
}
TensorLayoutArray
get_layouts
(
const
param
::
ConvBias
&
param
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
p
)
{
megdnn_assert
(
param
.
format
==
param
::
ConvBias
::
Format
::
NCHW
);
TensorLayoutArray
get_layouts
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
p
)
{
megdnn_assert
(
p
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
);
UNPACK_CONV_NCB_KERN_SIZES
(
p
);
MEGDNN_MARK_USED_VAR
(
SH
);
MEGDNN_MARK_USED_VAR
(
SW
);
...
...
@@ -53,14 +64,14 @@ TensorLayoutArray get_layouts(const param::ConvBias& param,
return
{
src_layout
,
filter_layout
,
bias_layout
,
dst_layout
};
}
void
kern_default
(
param
::
ConvBias
param
,
const
ConvBiasImpl
::
NCBKernParam
&
p
)
{
void
kern_default
(
const
ConvBiasImpl
::
NCBKernParam
&
p
)
{
dt_byte
*
workspace_ptr
=
static_cast
<
dt_byte
*>
(
p
.
workspace_ptr
);
auto
filter_meta_ptr
=
reinterpret_cast
<
const
ConvBiasForward
::
CanonizedFilterMeta
*>
(
&
p
.
filter_meta
);
auto
filter_meta
=
*
filter_meta_ptr
;
auto
layouts
=
get_layouts
(
p
aram
,
p
);
auto
layouts
=
get_layouts
(
p
);
TensorND
src
{
reinterpret_cast
<
dt_byte
*>
(
const_cast
<
void
*>
(
p
.
src_ptr
)),
layouts
[
0
]};
...
...
@@ -83,7 +94,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) {
bias.layout.dtype.enumv() == \
DTypeTrait<dtype::bias_dt>::enumv) && \
sfb.layout.dtype.enumv() == DTypeTrait<dtype::out_dt>::enumv && \
p
aram.compute_mode == param::ConvBias::ComputeMode::cmode) {
\
p
.compute_mode == param::ConvBias::ComputeMode::cmode) {
\
func(src, filter, bias, sfb, workspace_ptr, filter_meta); \
}
#define DISPATCH(in_dt, out_dt) \
...
...
@@ -118,7 +129,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) {
auto
res
=
sfb
;
using
NonlineMode
=
param
::
ConvBias
::
NonlineMode
;
switch
(
p
aram
.
nonlineMode
)
{
switch
(
p
.
nonlineMode
)
{
#define cb(_mode) \
case NonlineMode::_mode: { \
if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \
...
...
@@ -168,24 +179,23 @@ MIDOUT_DECL(megdnn_fallback_naive)
/* ======================= AlgoNaive ======================== */
bool
ConvBiasImpl
::
AlgoNaive
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_naive
,
0
)
{
return
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
;
return
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
;
}
MIDOUT_END
();
return
false
;
}
size_t
ConvBiasImpl
::
AlgoNaive
::
get_workspace
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
p
)
const
{
size_t
ConvBiasImpl
::
AlgoNaive
::
get_workspace
(
const
NCBKernSizeParam
&
p
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_naive
,
1
)
{
auto
layouts
=
get_layouts
(
opr
->
param
(),
p
);
auto
layouts
=
get_layouts
(
p
);
//! When group>1 or n>1, this algo will parallel by group and n
size_t
nr_threads
=
p
.
nr_threads
;
auto
conv_opr
=
inplace_cpu_handle
()
->
create_operator
<
ConvolutionForward
>
();
conv_opr
->
param
()
=
get_param_convolution
(
opr
->
param
()
);
conv_opr
->
param
()
=
get_param_convolution
(
p
);
if
(
p
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
||
p
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
TensorLayout
conv_dst_layout
;
...
...
@@ -201,15 +211,14 @@ size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr,
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoNaive
::
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
p
)
const
{
param
::
ConvBias
opr_param
=
opr
->
param
();
size_t
workspace_size
=
get_workspace
(
opr
,
p
);
const
NCBKernSizeParam
&
p
)
const
{
size_t
workspace_size
=
get_workspace
(
p
);
//! When group>1 or n>1, this algo will parallel by group and n
size_t
nr_threads
=
p
.
nr_threads
;
size_t
GROUP
=
p
.
filter_meta
.
group
;
size_t
N
=
p
.
n
;
size_t
workspace_per_thread
=
workspace_size
/
nr_threads
;
auto
kern
=
[
opr_param
,
workspace_per_thread
](
auto
kern
=
[
workspace_per_thread
](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
{
MIDOUT_BEGIN
(
megdnn_fallback_naive
,
2
)
{
...
...
@@ -224,7 +233,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoNaive::dispatch_kerns(
thread_param
.
dst_ptr
=
param
.
dst
<
void
>
(
batch_id
,
group_id
);
thread_param
.
src_ptr
=
param
.
src
<
void
>
(
batch_id
,
group_id
);
thread_param
.
bias_ptr
=
param
.
bias
<
void
>
(
batch_id
,
group_id
);
kern_default
(
opr_param
,
thread_param
);
kern_default
(
thread_param
);
}
MIDOUT_END
();
};
...
...
@@ -235,10 +244,9 @@ MIDOUT_DECL(megdnn_fallback_winograd)
/* ======================= AlgoWinogradF32 ======================== */
bool
ConvBiasImpl
::
AlgoWinogradF32
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
1
,
0
)
{
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_f
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -246,13 +254,13 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
strategy
,
UNIT_TILE_SIZE
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -268,7 +276,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
}
size_t
ConvBiasImpl
::
AlgoWinogradF32
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
p
)
const
{
const
NCBKernSizeParam
&
p
)
const
{
MEGDNN_MARK_USED_VAR
(
p
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
1
,
1
)
{
fallback
::
winograd
::
winograd_2x3_1x1_f
strategy
(
...
...
@@ -284,7 +292,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoWinogradF32
::
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
1
,
2
)
{
fallback
::
winograd
::
winograd_2x3_1x1_f
strategy
(
...
...
@@ -302,10 +310,9 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
/* ======================= AlgoWinogradF32 4x4 ======================== */
bool
ConvBiasImpl
::
AlgoWinogradF32_4x4
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
2
,
0
)
{
if
(
param
.
filter_meta
.
icpg
%
4
!=
0
||
param
.
filter_meta
.
ocpg
%
4
!=
0
)
return
false
;
...
...
@@ -317,13 +324,13 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
strategy
,
UNIT_TILE_SIZE
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK4
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -339,7 +346,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
}
size_t
ConvBiasImpl
::
AlgoWinogradF32_4x4
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
p
)
const
{
const
NCBKernSizeParam
&
p
)
const
{
MEGDNN_MARK_USED_VAR
(
p
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
2
,
1
)
{
fallback
::
winograd
::
winograd_2x3_4x4_f
strategy
(
...
...
@@ -356,7 +363,7 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoWinogradF32_4x4
::
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
2
,
2
)
{
fallback
::
winograd
::
winograd_2x3_4x4_f
strategy
(
...
...
@@ -374,10 +381,9 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
/* ======================= AlgoWinogradQS8 ======================== */
bool
ConvBiasImpl
::
AlgoWinogradQS8
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
3
,
0
)
{
using
Strategy
=
fallback
::
winograd
::
winograd_2x3_1x1_qs8
;
Strategy
strategy
(
param
.
src_type
,
param
.
filter_type
,
param
.
dst_type
);
...
...
@@ -386,13 +392,13 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
DEFAULT
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -408,7 +414,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
}
size_t
ConvBiasImpl
::
AlgoWinogradQS8
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
p
)
const
{
const
NCBKernSizeParam
&
p
)
const
{
MEGDNN_MARK_USED_VAR
(
p
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
3
,
1
)
{
fallback
::
winograd
::
winograd_2x3_1x1_qs8
strategy
(
...
...
@@ -424,7 +430,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoWinogradQS8
::
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
3
,
2
)
{
fallback
::
winograd
::
winograd_2x3_1x1_qs8
strategy
(
...
...
@@ -442,10 +448,9 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
/* ======================= AlgoWinogradQS8 8x8 ======================== */
bool
ConvBiasImpl
::
AlgoWinogradQS8_8x8
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
4
,
0
)
{
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
param
.
filter_meta
.
ocpg
%
8
!=
0
)
return
false
;
...
...
@@ -457,13 +462,13 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
strategy
,
UNIT_TILE_SIZE
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -479,7 +484,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
}
size_t
ConvBiasImpl
::
AlgoWinogradQS8_8x8
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
p
)
const
{
const
NCBKernSizeParam
&
p
)
const
{
MEGDNN_MARK_USED_VAR
(
p
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
4
,
1
)
{
fallback
::
winograd
::
winograd_2x3_8x8_qs8
strategy
(
...
...
@@ -496,7 +501,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoWinogradQS8_8x8
::
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MIDOUT_BEGIN
(
megdnn_fallback_winograd
,
4
,
2
)
{
fallback
::
winograd
::
winograd_2x3_8x8_qs8
strategy
(
...
...
dnn/src/fallback/conv_bias/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -22,12 +22,10 @@ class ConvBiasImpl::AlgoNaive final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"FALLBACK_NAIVE"
;
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
};
class
ConvBiasImpl
::
AlgoWinogradF32
final
:
public
AlgoBase
{
...
...
@@ -43,12 +41,10 @@ public:
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
private:
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
...
...
@@ -69,12 +65,10 @@ public:
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
private:
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
...
...
@@ -95,12 +89,10 @@ public:
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
private:
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
...
...
@@ -121,12 +113,10 @@ public:
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
)
const
override
;
private:
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
...
...
dnn/src/fallback/conv_bias/common.h
浏览文件 @
ba5a43b8
...
...
@@ -140,22 +140,17 @@ using BiasMode = ConvBiasForward::BiasMode;
#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \
bool is_reproducible() const override { return true; } \
bool usable(
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
\
bool usable(
const NCBKernSizeParam& param,
\
AlgoSelectionStrategy algo_selection_strategy) const override; \
size_t get_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \
const NCBKernSizeParam& param) \
size_t get_workspace(const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam& param) \
const override; \
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \
const override; \
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) \
const NCBKernSizeParam& param) const override; \
size_t get_preprocess_workspace(const NCBKernSizeParam& param) \
const override; \
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \
const override; \
const NCBKernSizeParam& param) const override; \
\
private: \
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \
...
...
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -48,7 +48,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic(
}
size_t
ConvBiasImpl
::
AlgoConv1x1
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
compt_oc_block_size
=
get_oc_tile_size_heuristic
(
param
);
...
...
@@ -90,7 +90,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_workspace(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoConv1x1
::
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
...
...
@@ -138,11 +138,11 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns(
//! get thread bundle
thread_bundle
=
utils
::
get_thread_bundle
(
param
,
matmul_bundle
.
get_size
(
2
),
compt_oc_block_size
);
compt_oc_block_size
);
Conv1x1StrategyBase
*
conv1x1_strategy
=
Conv1x1Factory
::
make_conv1x1_strategy
(
param
,
pack_mode
,
opr
->
param
()
.
format
);
param
.
filter_meta
.
format
);
auto
kern_packA
=
[
this
,
whole_bundle
,
matmul_bundle
,
param
,
compt_oc_block_size
,
conv1x1_strategy
](
...
...
@@ -180,13 +180,12 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns(
return
ret_kern
;
}
bool
ConvBiasImpl
::
AlgoConv1x1
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
ConvBiasImpl
::
AlgoConv1x1
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
2
)
{
if
(
opr
->
param
()
.
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
opr
->
param
()
.
format
!=
param
::
ConvBias
::
Format
::
NCHW44
&&
opr
->
param
()
.
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
if
(
param
.
filter_meta
.
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
param
.
filter_meta
.
format
!=
param
::
ConvBias
::
Format
::
NCHW44
&&
param
.
filter_meta
.
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
return
false
;
size_t
FH
=
param
.
filter_meta
.
spatial
[
0
],
...
...
@@ -199,7 +198,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr,
if
(
FH
!=
1
||
FW
!=
1
||
PH
||
PW
||
SH
!=
1
||
SW
!=
1
)
return
false
;
if
(
param
.
src_type
.
enumv
()
!=
param
.
filter_type
.
enumv
())
{
if
(
param
.
src_type
.
enumv
()
!=
param
.
filter_type
.
enumv
())
{
return
false
;
}
...
...
@@ -225,8 +224,8 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr,
}
}
if
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
if
(
param
.
filter_meta
.
icpg
<
4
_z
||
param
.
filter_meta
.
icpg
==
1
||
param
.
filter_meta
.
ocpg
==
1
)
{
return
false
;
...
...
@@ -236,13 +235,14 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr,
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
MatrixMulImpl
::
KernSizeParam
matmul_param
=
utils
::
get_matmul_kern_param
(
param
,
OH
*
OW
,
get_oc_tile_size_heuristic
(
param
));
MatrixMulImpl
::
KernSizeParam
matmul_param
=
utils
::
get_matmul_kern_param
(
param
,
OH
*
OW
,
get_oc_tile_size_heuristic
(
param
));
bool
matmul_usable
=
m_matmul_algo
->
usable
(
matmul_param
);
auto
pack_mode
=
m_matmul_algo
->
packmode
();
bool
strategy_usable
=
Conv1x1Factory
::
can_make_conv1x1_strategy
(
param
,
pack_mode
,
opr
->
param
()
.
format
);
param
,
pack_mode
,
param
.
filter_meta
.
format
);
return
matmul_usable
&&
strategy_usable
&&
(
param
.
filter_meta
.
dilation
[
0
]
==
...
...
@@ -255,7 +255,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr,
}
bool
ConvBiasImpl
::
AlgoConv1x1
::
is_preferred
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
if
(
OH
*
OW
!=
1
)
{
...
...
@@ -265,8 +265,8 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred(
if
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Int8
&&
param
.
filter_type
.
enumv
()
==
DTypeEnum
::
Int8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Int16
)
{
return
true
;
}
return
true
;
}
#elif MEGDNN_X86
size_t
OC
=
param
.
filter_meta
.
ocpg
;
if
(
OC
>
2
||
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Float32
)
...
...
@@ -276,4 +276,4 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred(
}
}
// vim: syntax=cpp.doxygen
\ No newline at end of file
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/conv1x1/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -34,14 +34,13 @@ public:
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
override
;
protected:
size_t
get_oc_tile_size_heuristic
(
const
NCBKernSizeParam
&
param
)
const
;
...
...
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp
浏览文件 @
ba5a43b8
...
...
@@ -249,7 +249,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_oc_tile_size_heuristic(
}
size_t
ConvBiasImpl
::
AlgoConv1x1Gemv
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1_gemv
,
midout_iv
(
"AlgoConv1x1Gemv::get_workspace"
_hash
))
{
size_t
compt_oc_block_size
=
get_oc_tile_size_heuristic
(
param
);
...
...
@@ -265,7 +265,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace(
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoConv1x1Gemv
::
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
compt_oc_block_size
=
get_oc_tile_size_heuristic
(
param
);
...
...
@@ -311,7 +311,7 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns(
} \
MIDOUT_END()
switch
(
opr
->
param
()
.
format
)
{
switch
(
param
.
filter_meta
.
format
)
{
case
param
::
ConvBias
::
Format
::
NCHW
:
cb1
(
param
::
ConvBias
::
Format
::
NCHW
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"NCHW::GEMV::FLOAT"
_hash
);
...
...
@@ -401,18 +401,18 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns(
return
ret_kern
;
}
bool
ConvBiasImpl
::
AlgoConv1x1Gemv
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
ConvBiasImpl
::
AlgoConv1x1Gemv
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1_gemv
,
midout_iv
(
"AlgoConv1x1Gemv::usable"
_hash
))
{
auto
format
=
param
.
filter_meta
.
format
;
#if MEGDNN_X86
if
(
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW
)
if
(
format
!=
param
::
ConvBias
::
Format
::
NCHW
)
return
false
;
#elif MEGDNN_AARCH64 || MEGDNN_ARMV7
if
(
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW44
&&
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
if
(
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
format
!=
param
::
ConvBias
::
Format
::
NCHW44
&&
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
return
false
;
#endif
...
...
@@ -469,13 +469,13 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr,
return
false
;
}
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW44
)
{
if
(
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
Float32
&&
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
Int8
&&
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
QuantizedS8
)
{
return
false
;
}
}
else
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
}
else
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
if
(
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
Int8
&&
param
.
src_type
.
enumv
()
!=
DTypeEnum
::
QuantizedS8
)
{
return
false
;
...
...
@@ -492,11 +492,11 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr,
}
bool
ConvBiasImpl
::
AlgoConv1x1Gemv
::
is_preferred
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1_gemv
,
midout_iv
(
"AlgoConv1x1Gemv::is_preferred"
_hash
))
{
#if (MEGDNN_ARMV7 || MEGDNN_AARCH64)
if
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW
&&
if
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
&&
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
return
false
;
}
...
...
@@ -507,4 +507,4 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred(
return
false
;
}
// vim: syntax=cpp.doxygen
\ No newline at end of file
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h
浏览文件 @
ba5a43b8
...
...
@@ -24,18 +24,15 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"CONV1x1_GEMV"
;
}
const
char
*
name
()
const
override
{
return
"CONV1x1_GEMV"
;
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
override
;
protected:
size_t
get_oc_tile_size_heuristic
(
const
NCBKernSizeParam
&
param
)
const
;
...
...
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -478,7 +478,7 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoIm2col
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
p
)
const
{
const
NCBKernSizeParam
&
p
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
0
,
0
)
{
return
get_bundle
(
p
).
total_size_in_bytes
();
}
...
...
@@ -487,7 +487,7 @@ size_t ConvBiasImpl::AlgoIm2col::get_workspace(
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoIm2col
::
dispatch_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
0
,
1
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
param
);
MEGDNN_MARK_USED_VAR
(
SH
);
...
...
@@ -660,12 +660,13 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns(
}
bool
ConvBiasImpl
::
AlgoIm2col
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col
,
0
,
2
)
{
if
(
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
&&
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW44
)
{
auto
format
=
param
.
filter_meta
.
format
;
if
(
format
!=
param
::
ConvBias
::
Format
::
NCHW
&&
format
!=
param
::
ConvBias
::
Format
::
NCHW44_DOT
&&
format
!=
param
::
ConvBias
::
Format
::
NCHW44
)
{
return
false
;
}
...
...
@@ -695,8 +696,8 @@ bool ConvBiasImpl::AlgoIm2col::usable(
}
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
mdesc
=
m_matmul_algo
->
matmul_description
();
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW44
||
format
==
param
::
ConvBias
::
Format
::
NCHW44_DOT
)
{
//! current NCHW44 im2col only support DEFAULT mode matmul
if
(
mdesc
.
packmode
!=
Pack_Mode
::
DEFAULT
)
{
return
false
;
...
...
dnn/src/fallback/conv_bias/im2col/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -15,6 +15,8 @@
#include "src/common/utils.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h"
#include "src/common/opr_delegate.h"
namespace
megdnn
{
namespace
fallback
{
...
...
@@ -54,16 +56,18 @@ public:
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
{
if
(
param
.
src_type
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
return
opr
->
is_matmul_quantized_prefer
(
param
);
static
CpuOprDelegationStorage
<
1
>
storage
;
auto
conv_bias_opr
=
storage
.
get
<
ConvBias
,
0
>
();
return
static_cast
<
ConvBiasImpl
*>
(
conv_bias_opr
)
->
is_matmul_quantized_prefer
(
param
);
}
auto
&&
fm
=
param
.
filter_meta
;
auto
OC
=
fm
.
ocpg
,
IC
=
fm
.
icpg
;
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
ba5a43b8
...
...
@@ -54,7 +54,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj {
public:
AlgoPack
()
{
refhold
.
emplace_back
(
new
AlgoConv1x1Gemv
());
all_algos
.
emplace_back
(
refhold
.
back
().
get
());
...
...
@@ -121,7 +120,7 @@ bool ConvBiasImpl::is_naive_algo(ConvBiasImpl::Algorithm* algo) {
}
#define NCB_ALGO_FUNC(name, algo, param) \
static_cast<AlgoBase*>(algo)->name(
this,
param)
static_cast<AlgoBase*>(algo)->name(param)
void
ConvBiasImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
...
...
@@ -243,11 +242,10 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic_with_ncb(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
for
(
auto
i
:
get_all_algorithms_with_ncb
(
param
))
{
size_t
need_workspace
=
NCB_ALGO_FUNC
(
get_workspace
,
i
,
param
);
if
(
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
)
&&
need_workspace
<=
workspace_limit_in_bytes
)
{
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
)
&&
NCB_ALGO_FUNC
(
get_workspace
,
i
,
param
)
<=
workspace_limit_in_bytes
)
{
return
i
;
}
}
...
...
@@ -392,8 +390,8 @@ std::vector<ConvBiasImpl::Algorithm*> ConvBiasImpl::get_all_algorithms_with_ncb(
std
::
vector
<
Algorithm
*>
algos
;
std
::
vector
<
Algorithm
*>
prefer_algos
;
for
(
auto
&&
algo
:
algo_pack
())
{
if
(
algo
->
usable
(
this
,
param
,
AlgoSelectionStrategy
::
FULL_RUN
))
{
if
(
algo
->
is_preferred
(
this
,
param
))
{
if
(
algo
->
usable
(
param
,
AlgoSelectionStrategy
::
FULL_RUN
))
{
if
(
algo
->
is_preferred
(
param
))
{
prefer_algos
.
push_back
(
algo
);
}
else
{
algos
.
push_back
(
algo
);
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
ba5a43b8
...
...
@@ -193,7 +193,7 @@ public:
//! move arm_common to fallback
virtual
bool
is_matmul_quantized_prefer
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
{
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
const
{
MEGDNN_MARK_USED_VAR
(
ncb_param
);
return
true
;
};
...
...
@@ -209,43 +209,39 @@ public:
public:
virtual
~
AlgoBase
()
=
default
;
virtual
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
=
0
;
virtual
size_t
get_workspace
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_preprocess_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the layouts of weight_prerocess dst
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the workspace when weight_prerocess
virtual
size_t
get_preprocess_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
virtual
size_t
get_preprocess_workspace
(
const
NCBKernSizeParam
&
)
const
{
return
0
_z
;
};
//! Temporarily used to identify whether the matmul algorithm is
//! is_preferred.
virtual
bool
is_preferred
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
virtual
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
{
return
false
;
}
bool
usable_reproducible
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable_reproducible
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
,
bool
reproducible
=
true
)
const
{
return
(
!
reproducible
||
is_reproducible
())
&&
usable
(
opr
,
param
,
algo_selection_strategy
);
usable
(
param
,
algo_selection_strategy
);
}
};
...
...
dnn/src/fallback/conv_bias/winograd/winograd.h
浏览文件 @
ba5a43b8
...
...
@@ -501,9 +501,10 @@ public:
Strategy
strategy
=
m_strategy
;
SmallVector
<
NCBKern
>
kerns
;
auto
filter_process_kern
=
[
strategy
,
bundle
,
&
preprocessed_dst
](
[
strategy
,
bundle
,
&
preprocessed_dst
,
this
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
MEGDNN_MARK_USED_VAR
(
this
);
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
midout_iv
(
"filter_preprocess"
_hash
))
{
bundle
.
set
(
ncb_param
.
workspace_ptr
);
...
...
@@ -569,9 +570,10 @@ public:
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW44
))
{
auto
filter_process_kern
=
[
strategy
=
m_strategy
,
bundle_top
,
bundle_compute
](
[
strategy
=
m_strategy
,
bundle_top
,
bundle_compute
,
this
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
MEGDNN_MARK_USED_VAR
(
this
);
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
midout_iv
(
"filter_process"
_hash
))
{
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
...
...
@@ -594,9 +596,10 @@ public:
}
auto
winograd_compute_kern
=
[
strategy
=
m_strategy
,
bundle_top
,
bundle_compute
,
matmul_algo
,
matmul_param
,
unit_tile_size
,
unit_oc_size
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
matmul_param
,
unit_tile_size
,
unit_oc_size
,
this
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
MEGDNN_MARK_USED_VAR
(
this
);
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_winograd_common
,
midout_iv
(
"winograd_compute"
_hash
))
{
bundle_top
.
set
(
ncb_param
.
workspace_ptr
);
...
...
@@ -728,43 +731,43 @@ public:
} \
MIDOUT_END();
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \
_matmul_format) \
size_t ConvBiasImpl::_class::get_workspace(
\
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \
_strategy, _midout_flag, \
_matmul_format); \
return 0; \
} \
size_t ConvBiasImpl::_class::get_preprocess_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, get_preprocess_workspace_size, _strategy, \
_midout_flag, _matmul_format); \
return 0; \
} \
SmallVector<TensorLayout> \
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, deduce_preprocessed_filter_layout, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> \
ConvBiasImpl::_class::dispatch_preprocess_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \
_strategy, _midout_flag, \
_matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag,
\
_matmul_format)
\
size_t ConvBiasImpl::_class::get_workspace(
const NCBKernSizeParam& param)
\
const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size,
\
_strategy, _midout_flag,
\
_matmul_format);
\
return 0;
\
}
\
size_t ConvBiasImpl::_class::get_preprocess_workspace(
\
const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(
\
_class, get_preprocess_workspace_size, _strategy,
\
_midout_flag, _matmul_format);
\
return 0;
\
}
\
SmallVector<TensorLayout>
\
ConvBiasImpl::_class::deduce_preprocessed_filter_layout(
\
const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(
\
_class, deduce_preprocessed_filter_layout, _strategy,
\
_midout_flag, _matmul_format);
\
return {};
\
}
\
SmallVector<ConvBiasImpl::NCBKern>
\
ConvBiasImpl::_class::dispatch_preprocess_kerns(
\
const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns,
\
_strategy, _midout_flag,
\
_matmul_format);
\
return {};
\
}
\
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns(
\
const NCBKernSizeParam& param) const {
\
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy,
\
_midout_flag, _matmul_format);
\
return {};
\
}
// vim: syntax=cpp.doxygen
dnn/src/fallback/convolution/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -164,7 +164,7 @@ void kern_direct(const NCBKernParam& param) {
/* ===================== fallback algo ===================== */
bool
ConvolutionImpl
::
AlgoFallback
::
usable
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
return
fm
.
format
==
param
::
Convolution
::
Format
::
NCHW
&&
...
...
@@ -175,7 +175,7 @@ bool ConvolutionImpl::AlgoFallback::usable(
}
size_t
ConvolutionImpl
::
AlgoFallback
::
get_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
nr_threads
=
param
.
nr_threads
;
if
(
param
.
filter_meta
.
should_flip
)
{
...
...
@@ -190,11 +190,11 @@ size_t ConvolutionImpl::AlgoFallback::get_workspace(
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoFallback
::
dispatch_kern
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
size_t
group
=
param
.
filter_meta
.
group
;
size_t
N
=
param
.
n
;
size_t
nr_threads
=
param
.
nr_threads
;
size_t
workspace_per_thread
=
get_workspace
(
opr
,
param
)
/
nr_threads
;
size_t
workspace_per_thread
=
get_workspace
(
param
)
/
nr_threads
;
auto
kern_fallback
=
[
workspace_per_thread
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
p
);
...
...
@@ -218,7 +218,7 @@ ConvolutionImpl::AlgoFallback::dispatch_kern(
/* ===================== naive algo ===================== */
bool
ConvolutionImpl
::
AlgoNaive
::
usable
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
bool
ret
=
false
;
...
...
@@ -241,7 +241,7 @@ bool ConvolutionImpl::AlgoNaive::usable(
}
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoNaive
::
dispatch_kern
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
size_t
N
=
param
.
n
;
size_t
group
=
param
.
filter_meta
.
group
;
#define cb(dt, cmode, compute_type) \
...
...
@@ -289,75 +289,42 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
/* ===================== default algo ===================== */
ConvolutionImpl
::
AlgoDefault
::
AlgoDefault
(
fallback
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algorithm
)
:
m_conv_bias_opr
(
conv_bias_opr
),
m_algorithm
(
algorithm
)
{
ConvolutionImpl
::
AlgoDefault
::
AlgoDefault
(
ConvBiasImpl
::
AlgoBase
*
algorithm
)
:
m_algorithm
(
algorithm
)
{
megdnn_assert_internal
(
algorithm
);
m_name
=
ssprintf
(
"CONVOLUTION_DEFAULT_%s"
,
m_algorithm
->
name
());
}
ConvBiasImpl
::
NCBKernSizeParam
ConvolutionImpl
::
AlgoDefault
::
AlgoDefault
::
init_convbias_opr_and
_param
(
ConvBiasImpl
*
conv_bias_opr
,
const
NCBKernSizeParam
&
param
)
{
ConvolutionImpl
::
AlgoDefault
::
init_conv_bias
_param
(
const
NCBKernSizeParam
&
param
)
{
DType
bias_type
=
param
.
dst_type
;
if
(
bias_type
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
bias_type
=
dtype
::
QuantizedS32
(
mul_scale
(
param
.
src_type
,
param
.
filter_type
));
}
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_size_param
(
param
,
0
,
param
::
MatrixMul
::
Format
::
DEFAULT
,
bias_type
,
0
,
BiasMode
::
NO_BIAS
,
param
::
ConvBias
::
NonlineMode
::
IDENTITY
);
// nonline mode
conv_bias_opr
->
param
().
nonlineMode
=
conv_bias_size_param
.
nonlineMode
;
// convolution mode
if
(
conv_bias_size_param
.
filter_meta
.
should_flip
)
{
conv_bias_opr
->
param
().
mode
=
param
::
ConvolutionV0
::
Mode
::
CONVOLUTION
;
}
else
{
conv_bias_opr
->
param
().
mode
=
param
::
ConvolutionV0
::
Mode
::
CROSS_CORRELATION
;
}
// sparse
if
(
conv_bias_size_param
.
filter_meta
.
group
>
1
)
{
conv_bias_opr
->
param
().
sparse
=
param
::
ConvolutionV0
::
Sparse
::
GROUP
;
}
else
{
conv_bias_opr
->
param
().
sparse
=
param
::
ConvolutionV0
::
Sparse
::
DENSE
;
}
// format
conv_bias_opr
->
param
().
format
=
conv_bias_size_param
.
filter_meta
.
format
;
// pad stride dilate
conv_bias_opr
->
param
().
pad_h
=
conv_bias_size_param
.
filter_meta
.
padding
[
0
];
conv_bias_opr
->
param
().
pad_w
=
conv_bias_size_param
.
filter_meta
.
padding
[
1
];
conv_bias_opr
->
param
().
stride_h
=
conv_bias_size_param
.
filter_meta
.
stride
[
0
];
conv_bias_opr
->
param
().
stride_w
=
conv_bias_size_param
.
filter_meta
.
stride
[
1
];
conv_bias_opr
->
param
().
dilate_h
=
conv_bias_size_param
.
filter_meta
.
dilation
[
0
];
conv_bias_opr
->
param
().
dilate_w
=
conv_bias_size_param
.
filter_meta
.
dilation
[
1
];
// output_block_size
conv_bias_opr
->
param
().
output_block_size
=
conv_bias_size_param
.
output_block_size
;
// compute_mode
conv_bias_opr
->
param
().
compute_mode
=
conv_bias_size_param
.
compute_mode
;
return
conv_bias_size_param
;
return
{
param
,
0
,
param
::
MatrixMul
::
Format
::
DEFAULT
,
bias_type
,
0
,
BiasMode
::
NO_BIAS
,
param
::
ConvBias
::
NonlineMode
::
IDENTITY
};
}
bool
ConvolutionImpl
::
AlgoDefault
::
is_preferred
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_conv
bias_opr_and_param
(
m_conv_bias_opr
,
param
);
return
m_algorithm
->
is_preferred
(
m_conv_bias_opr
,
conv_bias_param
);
init_conv
_bias_param
(
param
);
return
m_algorithm
->
is_preferred
(
conv_bias_param
);
}
bool
ConvolutionImpl
::
AlgoDefault
::
usable
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_conv
bias_opr_and_param
(
m_conv_bias_opr
,
param
);
return
m_algorithm
->
usable
(
m_conv_bias_opr
,
conv_bias_param
,
init_conv
_bias_param
(
param
);
return
m_algorithm
->
usable
(
conv_bias_param
,
static_cast
<
ConvBiasImpl
::
AlgoSelectionStrategy
>
(
algo_selection_strategy
));
}
...
...
@@ -365,69 +332,62 @@ bool ConvolutionImpl::AlgoDefault::usable(
WorkspaceBundle
ConvolutionImpl
::
AlgoDefault
::
get_bundle
(
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_convbias_opr_and_param
(
m_conv_bias_opr
,
param
);
m_conv_bias_opr
->
execution_policy
()
=
{
m_algorithm
};
init_conv_bias_param
(
param
);
return
WorkspaceBundle
(
nullptr
,
{
m_algorithm
->
get_workspace
(
m_conv_bias_opr
,
conv_bias_param
)});
conv_bias_param
)});
}
size_t
ConvolutionImpl
::
AlgoDefault
::
get_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
size_t
ConvolutionImpl
::
AlgoDefault
::
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_convbias_opr_and_param
(
m_conv_bias_opr
,
param
);
m_conv_bias_opr
->
execution_policy
()
=
{
m_algorithm
};
return
m_algorithm
->
get_preprocess_workspace
(
m_conv_bias_opr
,
conv_bias_param
);
init_conv_bias_param
(
param
);
return
m_algorithm
->
get_preprocess_workspace
(
conv_bias_param
);
}
SmallVector
<
TensorLayout
>
ConvolutionImpl
::
AlgoDefault
::
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_convbias_opr_and_param
(
m_conv_bias_opr
,
param
);
m_conv_bias_opr
->
execution_policy
()
=
{
m_algorithm
};
return
m_algorithm
->
deduce_preprocessed_filter_layout
(
m_conv_bias_opr
,
conv_bias_param
);
init_conv_bias_param
(
param
);
return
m_algorithm
->
deduce_preprocessed_filter_layout
(
conv_bias_param
);
}
//! Return the implement preprocess kernel
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoDefault
::
get_preprocess_kimpl
(
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv
,
midout_iv
(
"get_preprocess_kimpl"
_hash
))
{
// construct the conv_bias kern param
::
ConvBiasImpl
::
NCBKernParam
conv_bias_param
;
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_size_param
=
init_convbias_opr_and_param
(
conv_bias_opr
,
param
);
static_cast
<::
ConvBiasImpl
::
NCBKernSizeParam
&>
(
conv_bias_param
)
=
conv_bias_size_param
;
init_conv_bias_param
(
param
)
;
auto
conv_bias_preprocess_kerns
=
algo
->
dispatch_preprocess_kerns
(
conv_bias_
opr
,
conv_bias_
param
);
algo
->
dispatch_preprocess_kerns
(
conv_bias_param
);
SmallVector
<
ConvolutionImpl
::
NCBKern
>
convolution_preprocess_kerns
;
//! Set the conv_bias param using convolution param
auto
set_
copy_
param_filter_workspace_ptr
=
auto
set_param_filter_workspace_ptr
=
[](
const
NCBKernParam
&
conv_param
,
::
ConvBiasImpl
::
NCBKernParam
&
co
pied
_param
)
{
co
pied
_param
.
filter_ptr
=
conv_param
.
filter_ptr
;
co
pied
_param
.
workspace_ptr
=
conv_param
.
workspace_ptr
;
co
pied
_param
.
workspace_size
=
conv_param
.
workspace_size
;
::
ConvBiasImpl
::
NCBKernParam
&
co
nv_bias
_param
)
{
co
nv_bias
_param
.
filter_ptr
=
conv_param
.
filter_ptr
;
co
nv_bias
_param
.
workspace_ptr
=
conv_param
.
workspace_ptr
;
co
nv_bias
_param
.
workspace_size
=
conv_param
.
workspace_size
;
};
for
(
size_t
i
=
0
;
i
<
conv_bias_preprocess_kerns
.
size
();
i
++
)
{
auto
kernel
=
conv_bias_preprocess_kerns
[
i
];
//! If the kerenl batch parallel
auto
run
=
[
=
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
auto
copy_param
=
conv_bias_param
;
set_copy_param_filter_workspace_ptr
(
p
,
copy_param
);
kernel
.
kern
(
copy_param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
auto
run
=
[
param
=
conv_bias_param
,
kernel
,
&
set_param_filter_workspace_ptr
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
set_param_filter_workspace_ptr
(
p
,
param
);
kernel
.
kern
(
param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
};
convolution_preprocess_kerns
.
push_back
({
run
,
kernel
.
global_size
});
}
...
...
@@ -438,38 +398,35 @@ ConvolutionImpl::AlgoDefault::get_preprocess_kimpl(
//! Return the implement kernel
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoDefault
::
get_kimpl
(
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv
,
midout_iv
(
0
))
{
// construct the conv_bias kern param
::
ConvBiasImpl
::
NCBKernParam
conv_bias_param
;
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_size_param
=
init_convbias_opr_and_param
(
conv_bias_opr
,
param
);
static_cast
<::
ConvBiasImpl
::
NCBKernSizeParam
&>
(
conv_bias_param
)
=
conv_bias_size_param
;
auto
conv_bias_kerns
=
algo
->
dispatch_kerns
(
conv_bias_opr
,
conv_bias_param
);
init_conv_bias_param
(
param
);
auto
&&
conv_bias_kerns
=
algo
->
dispatch_kerns
(
conv_bias_param
);
SmallVector
<
ConvolutionImpl
::
NCBKern
>
convolution_kerns
;
//! Set the conv_bias param using convolution param
auto
set_copy_param_compute_address
=
[](
const
NCBKernParam
&
conv_param
,
::
ConvBiasImpl
::
NCBKernParam
&
co
pied
_param
)
{
co
pied
_param
.
src_ptr
=
conv_param
.
src_ptr
;
co
pied
_param
.
filter_ptr
=
conv_param
.
filter_ptr
;
co
pied
_param
.
dst_ptr
=
conv_param
.
dst_ptr
;
co
pied
_param
.
workspace_ptr
=
conv_param
.
workspace_ptr
;
co
pied
_param
.
workspace_size
=
conv_param
.
workspace_size
;
::
ConvBiasImpl
::
NCBKernParam
&
co
nv_bias
_param
)
{
co
nv_bias
_param
.
src_ptr
=
conv_param
.
src_ptr
;
co
nv_bias
_param
.
filter_ptr
=
conv_param
.
filter_ptr
;
co
nv_bias
_param
.
dst_ptr
=
conv_param
.
dst_ptr
;
co
nv_bias
_param
.
workspace_ptr
=
conv_param
.
workspace_ptr
;
co
nv_bias
_param
.
workspace_size
=
conv_param
.
workspace_size
;
};
for
(
size_t
i
=
0
;
i
<
conv_bias_kerns
.
size
();
i
++
)
{
auto
kernel
=
conv_bias_kerns
[
i
];
auto
&&
kernel
=
conv_bias_kerns
[
i
];
//! If the kerenl batch parallel
auto
run
=
[
=
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
auto
copy_param
=
conv_bias_param
;
set_copy_param_compute_address
(
p
,
copy_param
);
kernel
.
kern
(
copy_param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
auto
run
=
[
param
=
conv_bias_param
,
kernel
,
&
set_copy_param_compute_address
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
set_copy_param_compute_address
(
p
,
param
);
kernel
.
kern
(
param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
};
convolution_kerns
.
push_back
({
run
,
kernel
.
global_size
});
}
...
...
dnn/src/fallback/convolution/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
...
...
@@ -35,10 +36,10 @@ void kern_naive_forward(const ConvolutionImpl::NCBKernParam& p,
src
.
layout
.
dtype
=
p
.
src_type
;
dst
.
layout
.
dtype
=
p
.
dst_type
;
if
(
p
.
filter_meta
.
format
==
param
::
Convolution
::
Format
::
NCHW
)
{
istrd
*=
p
.
isz
[
0
]
*
p
.
isz
[
1
];
ostrd
*=
p
.
osz
[
0
]
*
p
.
osz
[
1
];
src
.
layout
.
init_contiguous_stride
({
1
,
IC
,
IH
,
IW
});
dst
.
layout
.
init_contiguous_stride
({
1
,
OC
,
OH
,
OW
});
istrd
*=
p
.
isz
[
0
]
*
p
.
isz
[
1
];
ostrd
*=
p
.
osz
[
0
]
*
p
.
osz
[
1
];
src
.
layout
.
init_contiguous_stride
({
1
,
IC
,
IH
,
IW
});
dst
.
layout
.
init_contiguous_stride
({
1
,
OC
,
OH
,
OW
});
}
else
{
// Must be NHWC
megdnn_assert
(
...
...
@@ -75,14 +76,12 @@ class ConvolutionImpl::AlgoFallback final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"FALLBACK_ALGO"
;
}
bool
usable
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
/*param*/
)
const
override
;
};
...
...
@@ -90,66 +89,55 @@ class ConvolutionImpl::AlgoNaive final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"NAIVE_ALGO"
;
}
bool
usable
(
ConvolutionImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
/*param*/
,
bool
usable
(
const
NCBKernSizeParam
&
/*param*/
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
override
{
return
0
;
};
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
{
return
0
;
};
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
/*param*/
)
const
override
;
};
class
ConvolutionImpl
::
AlgoDefault
final
:
public
AlgoBase
{
static
ConvBiasImpl
::
NCBKernSizeParam
init_conv
bias_opr_and
_param
(
ConvBiasImpl
*
conv_bias_opr
,
const
NCBKernSizeParam
&
param
);
static
ConvBiasImpl
::
NCBKernSizeParam
init_conv
_bias
_param
(
const
NCBKernSizeParam
&
param
);
WorkspaceBundle
get_bundle
(
const
NCBKernSizeParam
&
param
)
const
;
static
SmallVector
<
NCBKern
>
get_kimpl
(
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
static
SmallVector
<
NCBKern
>
get_kimpl
(
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
);
static
SmallVector
<
NCBKern
>
get_preprocess_kimpl
(
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
);
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
);
public:
AlgoDefault
(
fallback
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
);
AlgoDefault
(
ConvBiasImpl
::
AlgoBase
*
);
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
bool
usable
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_preprocess_workspace
(
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_preprocess_kern
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_preprocess_kimpl
(
m_
conv_bias_opr
,
m_
algorithm
,
param
);
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_preprocess_kimpl
(
m_algorithm
,
param
);
}
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpl
(
m_
conv_bias_opr
,
m_
algorithm
,
param
);
return
get_kimpl
(
m_algorithm
,
param
);
}
void
*
type
()
const
override
{
return
sm_fallback_conv_algo_type
;
}
//! select matmul to the highest preference
bool
is_preferred
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
private:
std
::
string
m_name
;
fallback
::
ConvBiasImpl
*
m_conv_bias_opr
;
ConvBiasImpl
::
AlgoBase
*
m_algorithm
;
};
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
ba5a43b8
...
...
@@ -59,8 +59,7 @@ public:
static_cast
<
ConvBiasImpl
*>
(
conv_bias_opr
)
->
algo_pack
();
for
(
auto
&&
algorithm
:
conv_bias_algo
)
{
// fallback algo
refhold
.
emplace_back
(
new
AlgoDefault
(
static_cast
<
ConvBiasImpl
*>
(
conv_bias_opr
),
algorithm
));
refhold
.
emplace_back
(
new
AlgoDefault
(
algorithm
));
all_algos
.
emplace_back
(
refhold
.
back
().
get
());
}
...
...
@@ -82,7 +81,7 @@ bool ConvolutionImpl::is_naive_algo(ConvolutionImpl::Algorithm* algo) {
}
#define NCB_ALGO_FUNC(name, algo, param) \
static_cast<AlgoBase*>(algo)->name(
this, f
param)
static_cast<AlgoBase*>(algo)->name(param)
void
ConvolutionImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
...
...
@@ -131,7 +130,7 @@ size_t ConvolutionImpl::get_workspace_in_bytes(
return
naive
::
ConvolutionForwardImpl
::
get_workspace_in_bytes
(
src
,
filter
,
dst
,
preprocessed_filter
);
}
else
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
this
,
fparam
);
return
NCB_ALGO_FUNC
(
get_workspace
,
algo
,
fparam
);
}
}
...
...
@@ -144,8 +143,7 @@ size_t ConvolutionImpl::get_preprocess_workspace_in_bytes(
return
naive
::
ConvolutionForwardImpl
::
get_preprocess_workspace_in_bytes
(
src
,
filter
,
dst
);
}
else
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_preprocess_workspace
(
this
,
fparam
);
return
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
);
}
}
...
...
@@ -158,8 +156,7 @@ SmallVector<TensorLayout> ConvolutionImpl::deduce_preprocessed_filter_layout(
return
naive
::
ConvolutionForwardImpl
::
deduce_preprocessed_filter_layout
(
src
,
filter
,
dst
);
}
else
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
deduce_preprocessed_filter_layout
(
this
,
fparam
);
return
NCB_ALGO_FUNC
(
deduce_preprocessed_filter_layout
,
algo
,
fparam
);
}
}
...
...
@@ -251,8 +248,7 @@ ConvolutionImpl::NCBKernParam ConvolutionImpl::make_ncb_kern_param(
void
ConvolutionImpl
::
exec_preprocess_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
)
{
auto
kerns
=
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_preprocess_kern
(
this
,
param
);
auto
kerns
=
NCB_ALGO_FUNC
(
dispatch_preprocess_kern
,
algo
,
param
);
auto
fallback_handle
=
handle
();
for
(
auto
kernel
:
kerns
)
{
megdnn_assert
(
...
...
@@ -272,14 +268,15 @@ void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param,
void
ConvolutionImpl
::
exec_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
)
{
auto
kerns
=
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_kern
(
this
,
param
);
auto
kerns
=
NCB_ALGO_FUNC
(
dispatch_kern
,
algo
,
param
);
auto
fallback_handle
=
handle
();
for
(
auto
kernel
:
kerns
)
{
megdnn_assert
(
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NHWC
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW44
,
"invalid conv format"
);
megdnn_assert
(
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NHWC
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW44
,
"invalid conv format"
);
auto
run
=
[
param
,
kernel
](
size_t
index
,
size_t
thread_id
)
{
CpuNDRange
ndrange_id
(
kernel
.
global_size
,
index
);
kernel
.
kern
(
param
,
{
thread_id
,
ndrange_id
});
...
...
@@ -293,13 +290,11 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic_with_ncb(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
for
(
auto
i
:
get_all_algorithms_with_ncb
(
param
))
{
size_t
need_workspace
=
static_cast
<
AlgoBase
*>
(
i
)
->
get_workspace
(
this
,
param
);
bool
usable_reproducible
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
);
if
(
usable_reproducible
&&
need_workspace
<=
workspace_limit_in_bytes
)
{
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
);
if
(
usable_reproducible
&&
NCB_ALGO_FUNC
(
get_workspace
,
i
,
param
)
<=
workspace_limit_in_bytes
)
{
return
i
;
}
}
...
...
@@ -311,8 +306,8 @@ ConvolutionImpl::get_all_algorithms_with_ncb(const NCBKernSizeParam& param) {
std
::
vector
<
Algorithm
*>
ret
;
std
::
vector
<
Algorithm
*>
prefer_algos
;
for
(
auto
&&
i
:
algo_pack
())
{
if
(
i
->
usable
(
this
,
param
,
AlgoSelectionStrategy
::
FULL_RUN
))
{
if
(
i
->
is_preferred
(
this
,
param
))
{
if
(
i
->
usable
(
param
,
AlgoSelectionStrategy
::
FULL_RUN
))
{
if
(
i
->
is_preferred
(
param
))
{
prefer_algos
.
push_back
(
i
);
}
else
{
ret
.
push_back
(
i
);
...
...
dnn/src/fallback/convolution/opr_impl.h
浏览文件 @
ba5a43b8
...
...
@@ -178,42 +178,38 @@ public:
class
AlgoBase
:
public
Algorithm
{
public:
virtual
~
AlgoBase
()
=
default
;
virtual
bool
usable
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
virtual
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
=
0
;
virtual
size_t
get_workspace
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_preprocess_kern
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the layouts of weight_prerocess dst
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the workspace when weight_prerocess
virtual
size_t
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
virtual
size_t
get_preprocess_workspace
(
const
NCBKernSizeParam
&
)
const
{
return
0
_z
;
};
//! Temporarily used to identify whether the matmul algorithm is
//! is_preferred.
virtual
bool
is_preferred
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
virtual
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
{
return
false
;
}
bool
usable_reproducible
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable_reproducible
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
,
bool
reproducible
=
true
)
const
{
return
(
!
reproducible
||
is_reproducible
())
&&
usable
(
opr
,
param
,
algo_selection_strategy
);
usable
(
param
,
algo_selection_strategy
);
}
};
...
...
dnn/src/x86/conv_bias/f32/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/x86/conv_bias/f32/algos.h"
...
...
@@ -104,7 +105,7 @@ void get_rectified_size(size_t IH, size_t IW, size_t OH, size_t OW, size_t FH,
/* ===================== direct algo ===================== */
bool
ConvBiasImpl
::
AlgoDirect
::
usable
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
bool
aviliable
=
fm
.
format
==
Param
::
Format
::
NCHW
&&
fm
.
spatial_ndim
==
2
&&
...
...
@@ -142,7 +143,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirect::get_bundle(
return
{
nullptr
,
{
part0
,
part1
}};
}
size_t
ConvBiasImpl
::
AlgoDirect
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
...
...
@@ -280,7 +281,8 @@ void ConvBiasImpl::AlgoDirect::do_conv_kern(const WorkspaceBundle& bundle,
size_t
workspace_group_id
=
workspace_ids
[
0
],
workspace_batch_id
=
workspace_ids
[
1
],
oc
=
workspace_ids
[
2
];
const
float
*
sptr
=
kern_param
.
src
<
float
>
(
batch_id
,
group_id
);
const
float
*
filter
=
kern_param
.
filter
<
float
>
(
group_id
)
+
oc
*
FH
*
FW
*
IC
;
const
float
*
filter
=
kern_param
.
filter
<
float
>
(
group_id
)
+
oc
*
FH
*
FW
*
IC
;
const
float
*
bias_ptr
=
kern_param
.
bias
<
float
>
(
batch_id
,
group_id
)
+
oc
*
bias_offset
;
float
*
dst
=
kern_param
.
dst
<
float
>
(
batch_id
,
group_id
)
+
oc
*
OH
*
OW
;
...
...
@@ -318,7 +320,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoDirect::get_kimpls(
}
/* ===================== direct-stride2 algo ===================== */
bool
ConvBiasImpl
::
AlgoDirectStride2
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
auto
FH
=
fm
.
spatial
[
0
];
...
...
@@ -363,7 +365,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectStride2::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoDirectStride2
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
//! Process one input channel copy padding
...
...
@@ -528,7 +530,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMatrixMul::get_bundle(
}
bool
ConvBiasImpl
::
AlgoMatrixMul
::
is_preferred
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
auto
&&
fm
=
param
.
filter_meta
;
if
(
fm
.
dilation
[
0
]
!=
1
||
fm
.
dilation
[
1
]
!=
1
)
{
return
false
;
...
...
@@ -550,7 +552,7 @@ bool ConvBiasImpl::AlgoMatrixMul::is_preferred(
int
ic
=
find_nearest_elem
<
int
>
(
fm
.
icpg
,
{
4
,
8
,
16
,
32
,
64
,
96
,
128
});
int
on
=
std
::
round
(
geometric_mean
(
param
.
osz
[
0
],
param
.
osz
[
1
]));
ProfileElement
cur
(
f
,
oc
,
ic
,
on
);
auto
H
=
static_cast
<
HandleImpl
*>
(
opr
->
handle
());
auto
H
=
static_cast
<
HandleImpl
*>
(
inplace_cpu_handle
().
get
());
auto
&&
target
=
std
::
lower_bound
(
H
->
profile_cache
().
begin
(),
H
->
profile_cache
().
end
(),
cur
);
megdnn_assert_internal
(
target
->
f
==
cur
.
f
);
...
...
dnn/src/x86/conv_bias/f32/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
...
...
@@ -37,14 +38,13 @@ public:
return
m_large_group
?
"X86_CONV_BIAS_DIRECT_STRIDE1_LARGE_GROUP"
:
"X86_CONV_BIAS_DIRECT_STRIDE1_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
...
...
@@ -74,14 +74,13 @@ public:
return
m_large_group
?
"X86_CONV_BIAS_DIRECT_STRIDE2_LARGE_GROUP"
:
"X86_CONV_BIAS_DIRECT_STRIDE2_SMALL_GROUP"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
...
...
@@ -131,7 +130,7 @@ public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"X86_CONV_BIAS_MATMUL"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
override
{
auto
&&
fm
=
param
.
filter_meta
;
return
fm
.
format
==
Param
::
Format
::
NCHW
&&
fm
.
spatial_ndim
==
2
&&
...
...
@@ -145,15 +144,12 @@ public:
param
.
nr_threads
==
1
_z
;
}
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kimpl
,
{
group
,
1
_z
,
1
_z
}}};
...
...
@@ -171,7 +167,7 @@ public:
AlgoMkldnnConv
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"MKLDNN_CONV_FP32"
;
}
bool
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
override
{
auto
&&
fm
=
param
.
filter_meta
;
...
...
@@ -184,13 +180,9 @@ public:
return
ok
;
};
size_t
get_workspace
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
)
const
override
{
return
0
;
}
size_t
get_workspace
(
const
NCBKernSizeParam
&
)
const
override
{
return
0
;
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
/*param*/
)
const
override
{
auto
kern
=
[](
const
NCBKernParam
&
param
,
const
NCBKernIndex
&
ncb_index
)
{
...
...
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
浏览文件 @
ba5a43b8
...
...
@@ -6,16 +6,17 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/x86/conv_bias/f32/algos.h"
#include "src/common/utils.h"
#include "src/x86/conv_bias/f32/algos.h"
#include "src/x86/conv_bias/f32/strategy.h"
#include "src/x86/conv_bias/opr_impl.h"
#include "src/x86/conv_bias/postprocess_helper.h"
#include "src/x86/handle.h"
#include "src/x86/profile.h"
#include "src/x86/conv_bias/f32/strategy.h"
#include "midout.h"
...
...
@@ -27,10 +28,9 @@ using namespace x86;
/* ======================= AlgoFP32WinogradF63_8*8 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF63_8x8
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_x86_winograd_fp32
,
1
,
0
)
{
//! TODO: now nchw88 winograd only support Dense mode
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
...
...
@@ -44,13 +44,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
6
&&
param
.
output_block_size
==
6
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
@@ -74,10 +74,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8,
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */
bool
ConvBiasImpl
::
AlgoFP32WinogradF23_8x8
::
usable
(
fallback
::
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
opr
);
MIDOUT_BEGIN
(
megdnn_x86_winograd_fp32
,
2
,
0
)
{
//! TODO: now nchw88 winograd only support Dense mode
if
(
param
.
filter_meta
.
icpg
%
8
!=
0
||
...
...
@@ -91,13 +90,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
strategy
,
m_tile_size
,
param
)
.
get_matmul_kern_param
(
param
);
return
m_matmul_algo
->
usable
(
matmul_param
)
&&
(
opr
->
param
()
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
opr
->
param
()
.
format
==
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88
||
(
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW88_WINOGRAD
&&
opr
->
param
()
.
output_block_size
==
2
&&
param
.
output_block_size
==
2
&&
param
.
winograd_matmul_format
==
param
::
MatrixMul
::
Format
::
MK8
))
&&
opr
->
param
().
mode
==
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
&&
!
param
.
filter_meta
.
should_flip
&&
(
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
]
&&
param
.
filter_meta
.
spatial
[
0
]
==
3
)
&&
(
param
.
filter_meta
.
stride
[
0
]
==
param
.
filter_meta
.
stride
[
1
]
&&
...
...
dnn/src/x86/conv_bias/int8/algos.cpp
浏览文件 @
ba5a43b8
...
...
@@ -36,7 +36,7 @@ using namespace megdnn;
using
namespace
x86
;
bool
ConvBiasImpl
::
AlgoChanWiseAvx2Stride1Qint8
::
usable
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
return
chanwise_avx2_stride1_qint8_usable
(
param
);
}
...
...
@@ -66,7 +66,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoChanWiseAvx2Stride1Qint8
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
...
...
@@ -78,12 +78,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls(
}
bool
ConvBiasImpl
::
AlgoChanWiseAvx2Stride1Qint8
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
chanwise_avx2_stride1_qint8_preferred
(
param
);
}
bool
ConvBiasImpl
::
AlgoChanWiseAvx2Stride2Qint8
::
usable
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
return
chanwise_avx2_stride2_qint8_usable
(
param
);
}
...
...
@@ -113,7 +113,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoChanWiseAvx2Stride2Qint8
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
...
...
@@ -125,12 +125,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls(
}
bool
ConvBiasImpl
::
AlgoChanWiseAvx2Stride2Qint8
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
chanwise_avx2_stride2_qint8_preferred
(
param
);
}
bool
ConvBiasImpl
::
AlgoDirectAvx2Stride1Int8
::
usable
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
return
direct_avx2_stride1_int8_usable
(
param
);
}
...
...
@@ -170,7 +170,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoDirectAvx2Stride1Int8
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
...
...
@@ -182,14 +182,13 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls(
}
bool
ConvBiasImpl
::
AlgoDirectAvx2Stride1Int8
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
direct_avx2_stride1_int8_preferred
(
param
);
}
/* ===================== avx2 int8 stride 2 ===================== */
bool
ConvBiasImpl
::
AlgoAVX2DirectConvStride2
::
usable
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
direct_avx2_stride2_int8_usable
(
param
);
}
...
...
@@ -229,7 +228,7 @@ WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle(
}
size_t
ConvBiasImpl
::
AlgoAVX2DirectConvStride2
::
get_workspace
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
...
...
@@ -241,13 +240,12 @@ ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls(
}
bool
ConvBiasImpl
::
AlgoAVX2DirectConvStride2
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
direct_avx2_stride2_int8_preferred
(
param
);
}
#if MEGDNN_X86_WITH_MKL_DNN
bool
ConvBiasImpl
::
AlgoMkldnnQint8
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
ConvBiasImpl
::
AlgoMkldnnQint8
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
mkldnn_qint8_usable
(
param
);
}
...
...
@@ -426,19 +424,18 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32(
#undef REORDER_MEMORY
bool
ConvBiasImpl
::
AlgoMkldnnQint8
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
mkldnn_qint8_preferred
(
param
);
}
/* ===================== mkldnn qint8 matmul algo ===================== */
bool
ConvBiasImpl
::
AlgoMkldnnMatmulQint8
::
usable
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
,
bool
ConvBiasImpl
::
AlgoMkldnnMatmulQint8
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
mkldnn_matmul_qint8_usable
(
param
);
}
bool
ConvBiasImpl
::
AlgoMkldnnMatmulQint8
::
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
const
NCBKernSizeParam
&
param
)
const
{
return
mkldnn_matmul_qint8_preferred
(
param
);
}
...
...
dnn/src/x86/conv_bias/int8/algos.h
浏览文件 @
ba5a43b8
...
...
@@ -25,18 +25,15 @@ public:
const
char
*
name
()
const
override
{
return
"X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE1"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
void
*
type
()
const
override
;
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
/* ===================== avx2 stride2 chanwise algo ===================== */
...
...
@@ -49,18 +46,15 @@ public:
const
char
*
name
()
const
override
{
return
"X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE2"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
void
*
type
()
const
override
;
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
/* ===================== avx2 stride1 direct algo ===================== */
...
...
@@ -73,18 +67,15 @@ public:
const
char
*
name
()
const
override
{
return
"X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE1"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
void
*
type
()
const
override
;
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
/* ================== avx2 int8 direct conv stride2 algo ================== */
...
...
@@ -97,18 +88,15 @@ public:
const
char
*
name
()
const
override
{
return
"X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
fallback
::
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_kimpls
(
param
);
}
void
*
type
()
const
override
;
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
#if MEGDNN_X86_WITH_MKL_DNN
...
...
@@ -122,16 +110,14 @@ public:
AlgoMkldnnQint8
()
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"MKLDNN_INT8"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
nr_threads
=
param
.
nr_threads
;
return
get_bundle
(
param
).
total_size_in_bytes
()
*
nr_threads
;
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
size_t
n
=
param
.
n
;
...
...
@@ -147,8 +133,7 @@ public:
return
{{
kern
,
{
group
,
n
,
1
_z
}}};
}
void
*
type
()
const
override
;
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
};
/* ===================== mkldnn qint8 matmul algo ===================== */
class
ConvBiasImpl
::
AlgoMkldnnMatmulQint8
final
:
public
AlgoBase
{
...
...
@@ -160,22 +145,19 @@ class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase {
public:
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
return
"MKLDNN_MATMUL_INT8"
;
}
bool
usable
(
FallbackConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
override
;
size_t
get_workspace
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_bundle
(
param
).
total_size_in_bytes
();
}
SmallVector
<
NCBKern
>
dispatch_kerns
(
FallbackConvBiasImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
size_t
group
=
param
.
filter_meta
.
group
;
return
{{
kern_mkldnn_matmul_s8x8x32
,
{
group
,
1
_z
,
1
_z
}}};
}
//! select matmul to the highest preference
bool
is_preferred
(
FallbackConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
bool
is_preferred
(
const
NCBKernSizeParam
&
param
)
const
override
;
void
*
type
()
const
override
;
};
...
...
dnn/src/x86/conv_bias/opr_impl.cpp
浏览文件 @
ba5a43b8
...
...
@@ -163,7 +163,7 @@ const char* ConvBiasImpl::get_algorithm_set_name() const {
}
bool
ConvBiasImpl
::
is_matmul_quantized_prefer
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
{
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
)
const
{
bool
conv_direct_chanwise_mkldnn_usable
=
true
;
if
(
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
||
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS32
)
{
...
...
dnn/src/x86/conv_bias/opr_impl.h
浏览文件 @
ba5a43b8
...
...
@@ -55,7 +55,7 @@ public:
const
char
*
get_algorithm_set_name
()
const
override
;
bool
is_matmul_quantized_prefer
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
override
;
const
ConvBiasImpl
::
NCBKernSizeParam
&
ncb_param
)
const
override
;
};
}
// namespace x86
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录