Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
df356635
MegEngine
项目概览
MegEngine 天元
/
MegEngine
接近 2 年 前同步成功
通知
414
Star
4708
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
df356635
编写于
8月 14, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
8月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/fallback): delete im2col duplicate code and fix nchw44 usable
GitOrigin-RevId: 1aa250e9e715639364746144139d712edd610c6e
上级
4a227083
变更
7
展开全部
隐藏空白更改
内联
并排
Showing
7 changed file
with
459 addition
and
495 deletion
+459
-495
dnn/src/arm_common/conv_bias/postprocess_helper.h
dnn/src/arm_common/conv_bias/postprocess_helper.h
+20
-18
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+69
-458
dnn/src/fallback/conv_bias/im2col/factory.h
dnn/src/fallback/conv_bias/im2col/factory.h
+4
-12
dnn/src/fallback/conv_bias/im2col/im2col_kerns.h
dnn/src/fallback/conv_bias/im2col/im2col_kerns.h
+364
-0
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
+1
-2
dnn/src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
...src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
+1
-2
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
+0
-3
未找到文件。
dnn/src/arm_common/conv_bias/postprocess_helper.h
浏览文件 @
df356635
...
...
@@ -100,6 +100,7 @@ namespace {
MIDOUT_END(); \
break; \
default: \
megdnn_throw("unknow biasmode"); \
break; \
}
...
...
@@ -282,24 +283,25 @@ struct PostProcess<opctype, opdtype, megdnn::PostprocessMode::QUANTIZED> {
reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \
dst_type, N* OC* OH* OW* pack_oc_size);
#define FOR_BIAS(_bias_mode, OH, OW) \
switch (_bias_mode) { \
case megdnn::BiasMode::NO_BIAS: \
break; \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
if (pack_oc_size == 1) { \
FOR_BINARY_BROADCAST(CONCAT_OP(AddOp)); \
} else { \
megdnn_assert(pack_oc_size == 4, \
"Only support nchw44 in ARM"); \
FOR_BINARY_BROADCAST_NCHW44(CONCAT_OP(AddOp)); \
} \
break; \
case megdnn::BiasMode::BIAS: \
FOR_BINARY(CONCAT_OP(AddOp)); \
break; \
default: \
break; \
#define FOR_BIAS(_bias_mode, OH, OW) \
switch (_bias_mode) { \
case megdnn::BiasMode::NO_BIAS: \
break; \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
if (pack_oc_size == 1) { \
FOR_BINARY_BROADCAST(CONCAT_OP(AddOp)); \
} else { \
megdnn_assert(pack_oc_size == 4, \
"Only support nchw44 in ARM"); \
FOR_BINARY_BROADCAST_NCHW44(CONCAT_OP(AddOp)); \
} \
break; \
case megdnn::BiasMode::BIAS: \
FOR_BINARY(CONCAT_OP(AddOp)); \
break; \
default: \
megdnn_throw("unknow biasmode"); \
break; \
}
template
<
typename
ctype
,
typename
dtype
>
...
...
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
df356635
此差异已折叠。
点击以展开。
dnn/src/fallback/conv_bias/im2col/factory.h
浏览文件 @
df356635
...
...
@@ -26,10 +26,9 @@ enum class StrategyType : uint32_t {
FLOAT
=
0
,
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
FLOAT_FP16
=
1
,
#e
lse
#e
ndif
#if !MEGDNN_DISABLE_FLOAT16
FLOAT16_FLOAT16
=
2
,
#endif
#endif
INT8x8x32
=
3
,
INT8x8x16
=
4
,
...
...
@@ -153,12 +152,10 @@ public:
cb1
(
dt_float32
,
dt_float32
,
StrategyType
::
FLOAT
);
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
cb1
(
dt_float16
,
__fp16
,
StrategyType
::
FLOAT_FP16
);
#e
lse
#e
ndif
#if !MEGDNN_DISABLE_FLOAT16
cb1
(
dt_float16
,
dt_float16
,
StrategyType
::
FLOAT16_FLOAT16
);
#endif
#endif
cb2
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_int8
,
dt_int32
,
dt_int32
,
StrategyType
::
INT8x8x32
);
...
...
@@ -256,8 +253,7 @@ public:
!
param
.
filter_meta
.
should_flip
)
{
MIDOUT_BEGIN
(
megdnn_fallback_im2col_factory_make_strategy
,
midout_iv
(
"DefaultStrategyType::8x12x1_fuse_packb_s2_nchw44"
_hash
))
{
midout_iv
(
"8x12x1_fuse_packb_s2_nchw44"
_hash
))
{
return
std
::
make_unique
<
StrategyFuseXx12x1Nchw44K3x3S2
<
float
,
float
,
...
...
@@ -284,14 +280,13 @@ public:
cb1
(
NCHW
,
DEFAULT
,
dt_float16
,
__fp16
,
PostprocessMode
::
FLOAT
,
"DefaultStrategyType::FLOAT_FP16"
_hash
);
break
;
#e
lse
#e
ndif
#if !MEGDNN_DISABLE_FLOAT16
case
StrategyType
::
FLOAT16_FLOAT16
:
cb1
(
NCHW
,
DEFAULT
,
dt_float16
,
dt_float16
,
PostprocessMode
::
NO_PROCESS
,
"DefaultStrategyType::FLOAT16_FLOAT16"
_hash
);
break
;
#endif
#endif
case
StrategyType
::
INT8x8x32
:
if
(
format
==
param
::
ConvBias
::
Format
::
NCHW
)
{
...
...
@@ -472,15 +467,12 @@ public:
cb1
(
NCHW
,
NO_PACK
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"NoPackStrategyType::FLOAT"
_hash
);
break
;
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#else
#if !MEGDNN_DISABLE_FLOAT16
case
StrategyType
::
FLOAT16_FLOAT16
:
cb1
(
NCHW
,
NO_PACK
,
dt_float16
,
dt_float16
,
PostprocessMode
::
NO_PROCESS
,
"NoPackStrategyType::FLOAT16_FLOAT16"
_hash
);
break
;
#endif
#endif
case
StrategyType
::
INT8x8x16
:
cb3
(
NCHW
,
NO_PACK
,
dt_int8
,
dt_int16
,
dt_int16
,
dt_int8
,
...
...
dnn/src/fallback/conv_bias/im2col/im2col_kerns.h
0 → 100644
浏览文件 @
df356635
/**
* \file dnn/src/fallback/conv_bias/im2col/im2col_kerns.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/naive/convolution/helper.h"
#include "src/fallback/conv_bias/im2col/factory.h"
#include "midout.h"
MIDOUT_DECL
(
megdnn_fallback_im2col
)
namespace
megdnn
{
namespace
fallback
{
namespace
im2col
{
/*!
* *\brief The index of all parts workspace in im2col workspace bundel
* *Through witch can convenient get the needed ptr
*/
struct
Im2colBundelIndex
{
static
constexpr
size_t
BUNDLE_THREAD_INDEX
=
2
_z
;
};
using
Pack_Mode
=
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
;
/*!
* *\brief Im2colKerns collects all the im2col kerns in it
*/
namespace
{
//! conv kernel
static
void
kerns
(
const
WorkspaceBundle
&
bundle
,
WorkspaceBundle
bundle_thread
,
const
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmul_kernsize_param
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
MatrixMulImpl
::
AlgoBase
::
MatmulDescription
&
matmul_desc
,
StrategyParam
strategyparam
,
fallback
::
ConvBiasImpl
::
NCBKernIndex
ncb_index
,
size_t
ohw_tile_size
,
StrategyBase
*
im2colstrategy
)
{
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
output_block_size
=
std
::
min
(
ohw_tile_size
,
strategyparam
.
ohw
-
ncb_index
.
ndrange_id
[
2
]
*
ohw_tile_size
);
size_t
output_block_oc_size
=
std
::
min
(
strategyparam
.
oc_tile_size
,
OC
-
ncb_index
.
ndrange_id
[
3
]
*
strategyparam
.
oc_tile_size
);
bundle_thread
.
set
(
static_cast
<
int8_t
*>
(
bundle
.
get
(
Im2colBundelIndex
::
BUNDLE_THREAD_INDEX
))
+
bundle_thread
.
total_size_in_bytes
()
*
ncb_index
.
thread_id
);
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmul_kernsize_param
;
strategyparam
.
batch_id
=
ncb_index
.
ndrange_id
[
0
];
strategyparam
.
group_id
=
ncb_index
.
ndrange_id
[
1
];
strategyparam
.
oc_cur_index
=
ncb_index
.
ndrange_id
[
3
]
*
strategyparam
.
oc_tile_size
;
strategyparam
.
oc_end_index
=
strategyparam
.
oc_cur_index
+
output_block_oc_size
;
strategyparam
.
ohw_cur_index
=
ncb_index
.
ndrange_id
[
2
]
*
ohw_tile_size
;
strategyparam
.
output_block_oc_size
=
output_block_oc_size
;
strategyparam
.
output_block_size
=
output_block_size
;
//! 1.Im2col
im2colstrategy
->
exec_im2col
(
bundle
,
bundle_thread
,
strategyparam
,
param
,
matmul_param
,
matmul_algo
);
//! 2.packb and matmul compute
im2colstrategy
->
exec_matmul
(
param
,
strategyparam
,
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
,
ncb_index
,
matmul_desc
);
//! 3.postprocess and copy dst if need
im2colstrategy
->
exec_postprocess
(
param
,
strategyparam
,
bundle_thread
);
}
}
// namespace
template
<
Pack_Mode
packmode
>
class
Im2colKerns
;
template
<
>
class
Im2colKerns
<
Pack_Mode
::
DEFAULT
>
{
public:
SmallVector
<
ConvBiasImpl
::
NCBKern
>
get_kerns
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
WorkspaceBundle
&
bundle
,
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
strategyparam
,
fallback
::
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
StrategyBase
*
im2colstrategy
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
,
size_t
pack_oc_size
)
{
auto
matmul_desc
=
matmul_algo
->
matmul_description
();
auto
kern_padding
=
[
bundle
,
im2colstrategy
,
pack_oc_size
=
pack_oc_size
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
pack_oc_size
);
};
auto
kern_packA
=
[
bundle
,
matmul_algo
,
matmul_param
,
im2colstrategy
,
strategyparam
=
strategyparam
,
matmul_desc
=
matmul_desc
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
packA_kern
(
bundle
,
param
,
matmul_param
,
matmul_algo
,
ncb_index
,
matmul_desc
,
strategyparam
);
};
auto
kern_compute_default
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
,
ohw_tile_size
,
strategyparam
,
matmul_desc
=
matmul_desc
,
im2colstrategy
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
BATCH
=
param
.
n
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
IC
=
param
.
filter_meta
.
icpg
;
size_t
PH
=
param
.
filter_meta
.
padding
[
0
];
size_t
PW
=
param
.
filter_meta
.
padding
[
1
];
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
packa_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
matmul_desc
.
innerblocksize
.
m
);
size_t
ohw_parallel_times
=
div_ceil
(
OH
*
OW
,
ohw_tile_size
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
if
(
!
is_enable_filter_preprocess
(
param
))
{
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
packa_parallel_times
}});
}
if
(
PH
!=
0
||
PW
!=
0
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
BATCH
,
GROUP
,
IC
/
pack_oc_size
}});
}
ret_kern
.
push_back
(
{
kern_compute_default
,
{
BATCH
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
pack_oc_size
=
pack_size
(
param
.
filter_meta
.
format
);
size_t
im2col
=
0
,
packb
=
0
,
bias_temp
=
0
;
bool
default_pack
=
matmul_algo
->
packmode
()
==
Pack_Mode
::
DEFAULT
;
megdnn_assert
(
default_pack
,
"only support default packa"
);
size_t
im2col_dst_size
=
IC
*
FH
*
FW
*
ohw_tile_size
*
sizeof
(
param
.
src_type
);
size_t
matmul_dst_size
=
pack_oc_size
*
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
//! matmul_dst and im2col_dst use the same memory
WorkspaceBundle
wb
=
matmul_algo
->
get_bundle
(
im2col_kern_param
);
packb
=
wb
.
get_size
(
1
);
im2col
=
std
::
max
(
im2col_dst_size
,
matmul_dst_size
);
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_temp
=
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
}
return
{
nullptr
,
{
packb
,
im2col
,
bias_temp
}};
}
};
template
<
>
class
Im2colKerns
<
Pack_Mode
::
ONLY_PACKA
>
{
public:
SmallVector
<
ConvBiasImpl
::
NCBKern
>
get_kerns
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
WorkspaceBundle
&
bundle
,
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
strategyparam
,
fallback
::
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
StrategyBase
*
im2colstrategy
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
,
size_t
pack_oc_size
)
{
auto
matmul_desc
=
matmul_algo
->
matmul_description
();
auto
kern_padding
=
[
bundle
,
im2colstrategy
,
pack_oc_size
=
pack_oc_size
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
pack_oc_size
);
};
auto
kern_packA
=
[
bundle
,
matmul_algo
,
matmul_param
,
im2colstrategy
,
strategyparam
=
strategyparam
,
matmul_desc
=
matmul_desc
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
packA_kern
(
bundle
,
param
,
matmul_param
,
matmul_algo
,
ncb_index
,
matmul_desc
,
strategyparam
);
};
auto
kern_compute_onlypackA
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
,
strategyparam
,
ohw_tile_size
,
matmul_desc
,
im2colstrategy
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
BATCH
=
param
.
n
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
IC
=
param
.
filter_meta
.
icpg
;
size_t
PH
=
param
.
filter_meta
.
padding
[
0
];
size_t
PW
=
param
.
filter_meta
.
padding
[
1
];
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
ohw_parallel_times
=
div_ceil
(
OH
*
OW
,
ohw_tile_size
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
if
(
!
is_enable_filter_preprocess
(
param
))
{
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
oc_parallel_times
}});
}
if
(
PH
!=
0
||
PW
!=
0
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
BATCH
,
GROUP
,
IC
/
pack_oc_size
}});
}
ret_kern
.
push_back
(
{
kern_compute_onlypackA
,
{
BATCH
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
im2col
=
0
,
packb
=
0
,
matmul_dst
=
0
,
bias_temp
=
0
;
bool
only_packA
=
matmul_algo
->
packmode
()
==
Pack_Mode
::
ONLY_PACKA
;
megdnn_assert
(
only_packA
,
"onlysupport onlypackA mode"
);
size_t
im2col_dst_size
=
IC
*
FH
*
FW
*
ohw_tile_size
*
sizeof
(
param
.
src_type
);
size_t
matmul_dst_size
=
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
//! matmul_dst and im2col_dst use the same memory
WorkspaceBundle
wb
=
matmul_algo
->
get_bundle
(
im2col_kern_param
);
packb
=
wb
.
get_size
(
1
);
im2col
=
im2col_dst_size
;
matmul_dst
=
matmul_dst_size
;
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_temp
=
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
}
return
{
nullptr
,
{
packb
,
im2col
,
matmul_dst
,
bias_temp
}};
}
};
template
<
>
class
Im2colKerns
<
Pack_Mode
::
NO_PACK
>
{
public:
SmallVector
<
ConvBiasImpl
::
NCBKern
>
get_kerns
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
WorkspaceBundle
&
bundle
,
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
strategyparam
,
fallback
::
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
StrategyBase
*
im2colstrategy
,
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
,
size_t
pack_oc_size
)
{
auto
matmul_desc
=
matmul_algo
->
matmul_description
();
auto
kern_padding
=
[
bundle
,
im2colstrategy
,
pack_oc_size
=
pack_oc_size
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
im2colstrategy
->
copy_padding_kern
(
bundle
,
param
,
ncb_index
,
pack_oc_size
);
};
auto
kern_compute_nopack
=
[
bundle
,
bundle_thread
,
matmul_param
,
matmul_algo
,
strategyparam
,
ohw_tile_size
,
matmul_desc
,
im2colstrategy
](
const
ConvBiasImpl
::
NCBKernParam
&
param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
mutable
{
bundle
.
set
(
param
.
workspace_ptr
);
kerns
(
bundle
,
bundle_thread
,
param
,
matmul_param
,
matmul_algo
,
matmul_desc
,
strategyparam
,
ncb_index
,
ohw_tile_size
,
im2colstrategy
);
};
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
BATCH
=
param
.
n
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
IC
=
param
.
filter_meta
.
icpg
;
size_t
PH
=
param
.
filter_meta
.
padding
[
0
];
size_t
PW
=
param
.
filter_meta
.
padding
[
1
];
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
ohw_parallel_times
=
div_ceil
(
OH
*
OW
,
ohw_tile_size
);
size_t
oc_parallel_times
=
div_ceil
<
size_t
>
(
OC
,
oc_tile_size
);
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
if
(
PH
!=
0
||
PW
!=
0
)
{
ret_kern
.
push_back
(
{
kern_padding
,
{
BATCH
,
GROUP
,
IC
/
pack_oc_size
}});
}
ret_kern
.
push_back
(
{
kern_compute_nopack
,
{
BATCH
,
GROUP
,
ohw_parallel_times
,
oc_parallel_times
}});
return
ret_kern
;
}
WorkspaceBundle
get_thread_bundle
(
const
fallback
::
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
fallback
::
MatrixMulImpl
::
KernSizeParam
&
im2col_kern_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
ohw_tile_size
,
size_t
oc_tile_size
)
{
size_t
IC
=
param
.
filter_meta
.
icpg
,
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
ohw
=
param
.
osz
[
0
]
*
param
.
osz
[
1
];
size_t
im2col
=
0
,
matmul_dst
=
0
,
bias_temp
=
0
,
matmul_compute
=
0
;
bool
no_pack
=
matmul_algo
->
packmode
()
==
Pack_Mode
::
NO_PACK
;
megdnn_assert
(
no_pack
,
"only support no pack"
);
bool
is_dst_8bit
=
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
);
size_t
im2col_dst_size
=
IC
*
FH
*
FW
*
ohw_tile_size
*
sizeof
(
param
.
src_type
);
size_t
matmul_dst_size
=
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
im2col
=
im2col_dst_size
;
if
(
is_dst_8bit
)
{
matmul_dst
=
matmul_dst_size
;
}
else
{
matmul_dst
=
ohw_tile_size
>=
ohw
?
0
:
matmul_dst_size
;
}
matmul_compute
=
matmul_algo
->
get_workspace
(
im2col_kern_param
);
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_temp
=
oc_tile_size
*
ohw_tile_size
*
sizeof
(
param
.
bias_type
);
}
return
{
nullptr
,
{
im2col
,
matmul_dst
,
bias_temp
,
matmul_compute
}};
}
};
}
// namespace im2col
}
// namespace fallback
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
浏览文件 @
df356635
...
...
@@ -192,12 +192,11 @@ INSTANTIAL_CLASS(dt_float32, dt_float32, dt_float32, dt_float32, dt_float32,
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#e
lse
#e
ndif
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
...
...
dnn/src/fallback/conv_bias/im2col/strategy_default_nchw44.cpp
浏览文件 @
df356635
...
...
@@ -108,13 +108,12 @@ INSTANTIAL_CLASS(dt_float32, dt_float32, dt_float32, dt_float32, dt_float32,
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#e
lse
#e
ndif
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_uint8
,
dt_qint32
,
dt_quint8
,
...
...
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
浏览文件 @
df356635
...
...
@@ -165,13 +165,10 @@ INSTANTIAL_CLASS(dt_int8, dt_int16, dt_int16, dt_int16, dt_int16,
megdnn
::
PostprocessMode
::
ADD_BIAS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_int32
,
dt_int32
,
megdnn
::
PostprocessMode
::
ADD_BIAS
)
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#else
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#undef INSTANTIAL_CLASS
}
// namespace megdnn
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录