Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c985204b
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c985204b
编写于
4月 16, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
5月 06, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn): add conv1x1 algo and tests
GitOrigin-RevId: 374a62cf12efb74fd92ee5d0ec8df7cfd40addba
上级
d8d3f405
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
1071 addition
and
2 deletion
+1071
-2
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
+230
-0
dnn/src/fallback/conv_bias/conv1x1/algos.h
dnn/src/fallback/conv_bias/conv1x1/algos.h
+56
-0
dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h
dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h
+99
-0
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
+214
-0
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h
+310
-0
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+8
-1
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+1
-0
dnn/src/x86/matrix_mul/algos.cpp
dnn/src/x86/matrix_mul/algos.cpp
+0
-1
dnn/test/common/conv_bias.cpp
dnn/test/common/conv_bias.cpp
+76
-0
dnn/test/common/conv_bias.h
dnn/test/common/conv_bias.h
+4
-0
dnn/test/x86/conv_bias.cpp
dnn/test/x86/conv_bias.cpp
+73
-0
未找到文件。
dnn/src/fallback/conv_bias/conv1x1/algos.cpp
0 → 100644
浏览文件 @
c985204b
/**
* \file dnn/src/fallback/conv_bias/conv1x1/algos.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/fallback/conv_bias/conv1x1/algos.h"
#include "src/common/opr_delegate.h"
#include "src/fallback/conv_bias/common.h"
#include "src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h"
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "megdnn/opr_param_defs.h"
#include "src/naive/convolution/helper.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#endif
#include "midout.h"
MIDOUT_DECL
(
megdnn_fallback_conv1x1
)
using
namespace
megdnn
;
using
namespace
fallback
;
#if MEGDNN_X86
using
namespace
x86
;
#endif
using
namespace
conv1x1
;
size_t
ConvBiasImpl
::
AlgoConv1x1
::
get_oc_tile_size_heuristic
(
const
NCBKernSizeParam
&
param
)
const
{
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
OC
=
param
.
filter_meta
.
ocpg
;
if
(
OH
*
OW
>=
56
*
56
||
OC
>=
64
)
return
m_oc_block_size
;
return
div_ceil
(
OC
,
param
.
nr_threads
);
}
size_t
ConvBiasImpl
::
AlgoConv1x1
::
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
compt_oc_block_size
=
get_oc_tile_size_heuristic
(
param
);
auto
matmul_param
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
compt_oc_block_size
);
auto
pack_mode
=
m_matmul_algo
->
packmode
();
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
0
,
0
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
>
dispatcher
;
return
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
)
.
total_size_in_bytes
();
}
MIDOUT_END
();
}
else
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
0
,
1
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
>
dispatcher
;
return
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
)
.
total_size_in_bytes
();
}
MIDOUT_END
();
}
else
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
0
,
2
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
>
dispatcher
;
return
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
)
.
total_size_in_bytes
();
}
MIDOUT_END
();
}
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoConv1x1
::
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
{
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kern
;
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
compt_oc_block_size
=
get_oc_tile_size_heuristic
(
param
);
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
BATCH
=
param
.
n
;
size_t
oc_blocks_per_group
=
div_ceil
(
OC
,
compt_oc_block_size
);
auto
matmul_param
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
compt_oc_block_size
);
WorkspaceBundle
whole_bundle
=
{
nullptr
,
{}};
WorkspaceBundle
thread_bundle
=
{
nullptr
,
{}};
WorkspaceBundle
matmul_bundle
=
{
nullptr
,
{}};
auto
pack_mode
=
m_matmul_algo
->
packmode
();
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
1
,
0
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
>
dispatcher
;
whole_bundle
=
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
);
matmul_bundle
=
m_matmul_algo
->
get_bundle
(
matmul_param
);
}
MIDOUT_END
();
}
else
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
1
,
1
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
>
dispatcher
;
whole_bundle
=
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
);
matmul_bundle
=
m_matmul_algo
->
get_bundle
(
matmul_param
);
}
MIDOUT_END
();
}
else
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
1
,
2
)
{
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
>
dispatcher
;
whole_bundle
=
dispatcher
.
get_bundle
(
param
,
matmul_param
,
m_matmul_algo
,
compt_oc_block_size
);
matmul_bundle
=
{
nullptr
,
{
0
,
0
,
m_matmul_algo
->
get_workspace
(
matmul_param
)}};
}
MIDOUT_END
();
}
//! get thread bundle
thread_bundle
=
get_thread_bundle
(
param
,
matmul_bundle
.
get_size
(
2
),
compt_oc_block_size
);
Conv1x1StrategyBase
*
conv1x1_strategy
=
Conv1x1Factory
::
make_conv1x1_strategy
(
param
,
pack_mode
,
opr
->
param
().
format
);
auto
kern_packA
=
[
this
,
whole_bundle
,
matmul_bundle
,
param
,
compt_oc_block_size
,
conv1x1_strategy
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
conv1x1_strategy
->
packA
(
whole_bundle
,
matmul_bundle
,
compt_oc_block_size
,
this
->
m_matmul_algo
,
param
,
ncb_param
,
std
::
move
(
ncb_index
));
};
auto
kern_packB
=
[
this
,
whole_bundle
,
matmul_bundle
,
param
,
conv1x1_strategy
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
conv1x1_strategy
->
packB
(
whole_bundle
,
matmul_bundle
,
this
->
m_matmul_algo
,
param
,
ncb_param
,
std
::
move
(
ncb_index
));
};
auto
kern_compt
=
[
this
,
whole_bundle
,
matmul_bundle
,
thread_bundle
,
param
,
compt_oc_block_size
,
conv1x1_strategy
](
const
NCBKernParam
&
ncb_param
,
const
NCBKernIndex
&
ncb_index
)
mutable
{
conv1x1_strategy
->
exec
(
whole_bundle
,
matmul_bundle
,
thread_bundle
,
compt_oc_block_size
,
this
->
m_matmul_algo
,
param
,
ncb_param
,
std
::
move
(
ncb_index
));
};
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
||
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
{
ret_kern
.
push_back
({
kern_packA
,
{
GROUP
,
oc_blocks_per_group
}});
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
)
{
ret_kern
.
push_back
({
kern_packB
,
{
1
}});
}
}
ret_kern
.
push_back
({
kern_compt
,
{
BATCH
,
GROUP
,
oc_blocks_per_group
}});
return
ret_kern
;
}
bool
ConvBiasImpl
::
AlgoConv1x1
::
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv1x1
,
0
,
2
)
{
//! only support nchw format
if
(
opr
->
param
().
format
!=
param
::
ConvBias
::
Format
::
NCHW
)
return
false
;
size_t
FH
=
param
.
filter_meta
.
spatial
[
0
],
FW
=
param
.
filter_meta
.
spatial
[
1
];
size_t
PH
=
param
.
filter_meta
.
padding
[
0
],
PW
=
param
.
filter_meta
.
padding
[
1
];
size_t
SH
=
param
.
filter_meta
.
stride
[
0
],
SW
=
param
.
filter_meta
.
stride
[
1
];
if
(
FH
!=
1
||
FW
!=
1
||
PH
||
PW
||
SH
!=
1
||
SW
!=
1
)
return
false
;
//! make sure 8x8x16 and 8x8x32 biasmode is nobias and nonlineMode
//! is identity otherwise return false mean that 8x8x32 and 8x8x16
//! not support PostProcess
if
(
param
.
src_type
.
enumv
()
==
param
.
filter_type
.
enumv
()
&&
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Int8
&&
(
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Int16
||
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Int32
))
&&
param
.
bias_mode
!=
megdnn
::
BiasMode
::
NO_BIAS
&&
param
.
nonlineMode
!=
megdnn
::
NonlineMode
::
IDENTITY
)
return
false
;
if
(
param
.
src_type
.
enumv
()
==
param
.
filter_type
.
enumv
()
&&
((
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
||
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS32
)
&&
param
.
bias_mode
!=
megdnn
::
BiasMode
::
NO_BIAS
&&
param
.
nonlineMode
!=
megdnn
::
NonlineMode
::
IDENTITY
)
return
false
;
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
MatrixMulImpl
::
KernSizeParam
matmul_param
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
get_oc_tile_size_heuristic
(
param
));
bool
matmulusable
=
m_matmul_algo
->
usable
(
matmul_param
);
return
matmulusable
&&
(
param
.
filter_meta
.
dilation
[
0
]
==
param
.
filter_meta
.
dilation
[
1
]
&&
param
.
filter_meta
.
dilation
[
0
]
==
1
)
&&
param
.
compute_mode
==
param
::
ConvBias
::
ComputeMode
::
DEFAULT
;
}
MIDOUT_END
();
return
false
;
}
dnn/src/fallback/conv_bias/conv1x1/algos.h
0 → 100644
浏览文件 @
c985204b
/**
* \file dnn/src/fallback/conv_bias/conv1x1/algos.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/thin/small_vector.h"
#include "src/common/utils.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h"
namespace
megdnn
{
namespace
fallback
{
class
ConvBiasImpl
::
AlgoConv1x1
final
:
public
AlgoBase
{
public:
AlgoConv1x1
(
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
oc_block_size
)
:
m_matmul_algo
(
matmul_algo
),
m_oc_block_size
(
oc_block_size
)
{}
bool
is_reproducible
()
const
override
{
return
true
;
}
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
m_name
=
ssprintf
(
"CONV1x1:%s:%zu"
,
m_matmul_algo
->
name
(),
m_oc_block_size
);
}
return
m_name
.
c_str
();
}
bool
usable
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
protected:
size_t
get_oc_tile_size_heuristic
(
const
NCBKernSizeParam
&
param
)
const
;
private:
MatrixMulImpl
::
AlgoBase
*
m_matmul_algo
;
mutable
std
::
string
m_name
;
mutable
size_t
m_oc_block_size
=
0
;
};
}
// namespace fallback
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h
0 → 100644
浏览文件 @
c985204b
/**
* \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h"
namespace
megdnn
{
namespace
fallback
{
namespace
conv1x1
{
namespace
{
//! get_thread_bundle
WorkspaceBundle
get_thread_bundle
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
size_t
matmul_c_size
,
size_t
oc_tile_size
)
{
//! for some cases, matmul result need temp space to store
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
bool
is_dst_8bit
=
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
);
size_t
matmul_dst_bytes_per_thread
=
is_dst_8bit
?
oc_tile_size
*
OH
*
OW
*
sizeof
(
param
.
bias_type
)
:
0
;
return
WorkspaceBundle
{
nullptr
,
{
matmul_c_size
,
matmul_dst_bytes_per_thread
}};
}
}
// anonymous namespace
template
<
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
>
class
Conv1x1Kerns
{
public:
//! get_bundle
WorkspaceBundle
get_bundle
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
oc_tile_size
)
{
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
BATCH
=
param
.
n
;
//! bundle per thread
//! matmul_param records a matmul with M = oc_tile_size, K = IC, N = OH
//! * OW this does not bother packb bytes
auto
matmul_bundle
=
matmul_algo
->
get_bundle
(
matmul_param
);
auto
thread_bundle
=
get_thread_bundle
(
param
,
matmul_bundle
.
get_size
(
2
),
oc_tile_size
);
//! size per thread
size_t
all_threads_bytes
=
thread_bundle
.
total_size_in_bytes
()
*
param
.
nr_threads
;
//! packa size = GROUP * packa_size_each_group
size_t
packa_bytes_per_oc_tile
=
matmul_bundle
.
get_size
(
0
);
size_t
oc_tiles_per_group
=
div_ceil
(
OC
,
oc_tile_size
);
size_t
all_packa_bytes
=
packa_bytes_per_oc_tile
*
oc_tiles_per_group
*
GROUP
;
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
)
return
WorkspaceBundle
{
nullptr
,
{
all_packa_bytes
,
0
,
all_threads_bytes
}};
//! packb size = N * GROUP * packb_size_per_group
size_t
packb_bytes_per_group
=
matmul_bundle
.
get_size
(
1
);
size_t
all_packb_bytes
=
packb_bytes_per_group
*
GROUP
*
BATCH
;
return
WorkspaceBundle
{
nullptr
,
{
all_packa_bytes
,
all_packb_bytes
,
all_threads_bytes
}};
}
};
template
<
>
class
Conv1x1Kerns
<
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
>
{
public:
//! get_bundle
WorkspaceBundle
get_bundle
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
MatrixMulImpl
::
KernSizeParam
&
matmul_param
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
size_t
oc_tile_size
)
{
size_t
matmul_size
=
matmul_algo
->
get_workspace
(
matmul_param
);
auto
thread_bundle
=
get_thread_bundle
(
param
,
matmul_size
,
oc_tile_size
);
//! size per thread
size_t
all_threads_bytes
=
thread_bundle
.
total_size_in_bytes
()
*
param
.
nr_threads
;
return
WorkspaceBundle
{
nullptr
,
{
0
,
0
,
all_threads_bytes
}};
}
};
}
// namespace conv1x1
}
// namespace fallback
}
// namespace megdnn
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.cpp
0 → 100644
浏览文件 @
c985204b
/**
* \file dnn/src/fallback/conv_bias/conv1x1/Conv1x1_strategy.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include <unordered_map>
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h"
#include "midout.h"
MIDOUT_DECL
(
megdnn_fallback_conv1x1_factory_strategy
)
namespace
megdnn
{
namespace
fallback
{
namespace
conv1x1
{
namespace
{
struct
StrategyHashParam
{
ConvBiasImpl
::
NCBKernSizeParam
param
;
param
::
ConvBias
::
Format
format
;
MatrixMulImpl
::
AlgoBase
::
PackMode
packmode
;
};
struct
StrategyHashParamHash
{
std
::
size_t
operator
()(
const
StrategyHashParam
&
sparam
)
const
{
constexpr
size_t
base
=
1
;
//! avoid hashkey is zero
std
::
size_t
result
=
static_cast
<
std
::
size_t
>
(
sparam
.
param
.
src_type
.
enumv
())
+
base
;
result
=
result
^
((
static_cast
<
std
::
size_t
>
(
sparam
.
param
.
dst_type
.
enumv
())
+
base
)
<<
3
);
result
=
result
^
((
static_cast
<
std
::
size_t
>
(
sparam
.
param
.
filter_type
.
enumv
())
+
base
)
<<
6
);
result
=
result
^
((
static_cast
<
std
::
size_t
>
(
sparam
.
param
.
bias_type
.
enumv
())
+
base
)
<<
9
);
result
=
result
^
((
static_cast
<
std
::
size_t
>
(
sparam
.
format
)
+
base
)
<<
12
);
result
=
result
^
((
static_cast
<
std
::
size_t
>
(
sparam
.
packmode
)
+
base
)
<<
15
);
return
result
;
};
};
struct
StrategyHashParamEqual
{
bool
operator
()(
const
StrategyHashParam
&
param1
,
const
StrategyHashParam
&
param2
)
const
{
bool
flags
=
true
;
flags
=
param1
.
param
.
src_type
==
param2
.
param
.
src_type
&&
flags
;
flags
=
param1
.
param
.
filter_type
==
param2
.
param
.
filter_type
&&
flags
;
flags
=
param1
.
param
.
bias_type
==
param2
.
param
.
bias_type
&&
flags
;
flags
=
param1
.
param
.
dst_type
==
param2
.
param
.
dst_type
&&
flags
;
flags
=
param1
.
format
==
param2
.
format
&&
flags
;
flags
=
param1
.
packmode
==
param2
.
packmode
&&
flags
;
return
flags
;
};
};
std
::
unique_ptr
<
Conv1x1StrategyBase
>
create_conv1x1_strategy
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
,
param
::
ConvBias
::
Format
format
)
{
MEGDNN_MARK_USED_VAR
(
format
);
#define cb1(_packmode, _dt, _post_ctype, _postprocess_mode, _midout_tag) \
MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \
midout_iv(_midout_tag)) { \
if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
return std::make_unique< \
Conv1x1Strategy<_dt, _dt, _dt, _post_ctype, _post_ctype, \
_postprocess_mode, _packmode>>(); \
} \
} \
MIDOUT_END()
#define cb2(_packmode, _i_src_type, _i_bias_type, _i_dst_type, _src_ctype, \
_bias_ctype, _dst_ctype, _postprocess_mode, _midout_tag) \
MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \
midout_iv(_midout_tag)) { \
if (param.filter_type.enumv() == param.src_type.enumv() && \
param.src_type.enumv() == DTypeTrait<_i_src_type>::enumv && \
param.dst_type.enumv() == DTypeTrait<_i_dst_type>::enumv) { \
return std::make_unique< \
Conv1x1Strategy<_src_ctype, _bias_ctype, _dst_ctype, \
DTypeTrait<_i_bias_type>::ctype, \
DTypeTrait<_i_dst_type>::ctype, \
_postprocess_mode, _packmode>>(); \
} \
} \
MIDOUT_END()
switch
(
pack_mode
)
{
case
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
:
cb1
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"Default::FLOAT"
_hash
);
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
cb1
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dt_float16
,
__fp16
,
PostprocessMode
::
FLOAT
,
"Default::FLOAT16_FP16"
_hash
);
#else
#if !MEGDNN_DISABLE_FLOAT16
cb1
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dt_float16
,
dt_float16
,
PostprocessMode
::
NO_PROCESS
,
"Default::FLOAT16_FLOAT16"
_hash
);
#endif
#endif
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dt_int8
,
dt_int32
,
dt_int32
,
dt_int8
,
dt_int32
,
dt_int32
,
PostprocessMode
::
NO_PROCESS
,
"Default::INT8x8x32_INT32"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dt_int8
,
dt_int16
,
dt_int16
,
dt_int8
,
dt_int16
,
dt_int16
,
PostprocessMode
::
NO_PROCESS
,
"Default::INT8x8x16_INT16"
_hash
);
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dtype
::
Quantized8Asymm
,
dtype
::
QuantizedS32
,
dtype
::
QuantizedS32
,
dt_uint8
,
dt_int32
,
dt_int32
,
PostprocessMode
::
NO_PROCESS
,
"Default::QUINT8x8x32_QINT32"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dtype
::
Quantized8Asymm
,
dtype
::
QuantizedS32
,
dtype
::
Quantized8Asymm
,
dt_uint8
,
dt_int32
,
dt_uint8
,
PostprocessMode
::
QUANTIZED
,
"Default::QUINT8x8x32_QUINT8"
_hash
);
#endif
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dtype
::
QuantizedS8
,
dtype
::
QuantizedS32
,
dtype
::
QuantizedS32
,
dt_int8
,
dt_int32
,
dt_int32
,
PostprocessMode
::
NO_PROCESS
,
"Default::QINT8x8x32_QINT32"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
,
dtype
::
QuantizedS8
,
dtype
::
QuantizedS32
,
dtype
::
QuantizedS8
,
dt_int8
,
dt_int32
,
dt_int8
,
PostprocessMode
::
QUANTIZED
,
"Default::QINT8x8x32_QINT8"
_hash
);
break
;
case
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
:
cb1
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
ONLY_PACKA
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"OnlyPackA::FLOAT"
_hash
);
break
;
case
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
:
cb1
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
,
dt_float32
,
dt_float32
,
PostprocessMode
::
FLOAT
,
"NoPack::FLOAT"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
,
dt_int8
,
dt_int16
,
dt_int16
,
dt_int8
,
dt_int16
,
dt_int16
,
PostprocessMode
::
NO_PROCESS
,
"NoPack::INT8x8x16_INT16"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
,
dt_int8
,
dt_int32
,
dt_int32
,
dt_int8
,
dt_int32
,
dt_int32
,
PostprocessMode
::
NO_PROCESS
,
"NoPack::INT8x8x32_INT32"
_hash
);
cb2
(
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
,
dtype
::
QuantizedS8
,
dtype
::
QuantizedS32
,
dtype
::
QuantizedS32
,
dt_int8
,
dt_int32
,
dt_int32
,
PostprocessMode
::
NO_PROCESS
,
"NoPack::QINT8x8x32_QINT32"
_hash
);
break
;
default:
megdnn_throw
(
"Invalid Pack Mode"
);
break
;
}
#undef cb1
#undef cb2
megdnn_throw
(
"Invalid Data Type"
);
return
nullptr
;
}
class
StrategyDelegationStorage
{
public:
Conv1x1StrategyBase
*
get
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
,
param
::
ConvBias
::
Format
format
)
{
MEGDNN_LOCK_GUARD
(
m_mtx
);
StrategyHashParam
sparam
;
sparam
.
param
=
param
;
sparam
.
format
=
format
;
sparam
.
packmode
=
pack_mode
;
if
(
m_map_strategies
.
find
(
sparam
)
==
m_map_strategies
.
end
())
{
auto
strategy
=
create_conv1x1_strategy
(
param
,
pack_mode
,
format
);
m_map_strategies
[
sparam
]
=
std
::
move
(
strategy
);
}
return
m_map_strategies
[
sparam
].
get
();
}
private:
std
::
mutex
m_mtx
;
std
::
unordered_map
<
StrategyHashParam
,
std
::
unique_ptr
<
Conv1x1StrategyBase
>
,
StrategyHashParamHash
,
StrategyHashParamEqual
>
m_map_strategies
;
};
}
// anonymous namespace
Conv1x1StrategyBase
*
Conv1x1Factory
::
make_conv1x1_strategy
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
,
param
::
ConvBias
::
Format
format
)
{
static
StrategyDelegationStorage
storage
;
return
storage
.
get
(
param
,
pack_mode
,
format
);
}
}
// namespace conv1x1
}
// namespace fallback
}
// namespace megdnn
dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h
0 → 100644
浏览文件 @
c985204b
/**
* \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/opr_param_defs.h"
#include "src/fallback/conv_bias/opr_impl.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#endif
namespace
megdnn
{
namespace
fallback
{
namespace
conv1x1
{
#if MEGDNN_X86
using
namespace
x86
;
#endif
namespace
{
//! get_matmul_kern_param
MatrixMulImpl
::
KernSizeParam
get_matmul_kern_param
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
size_t
n
,
size_t
m
)
{
size_t
M
=
m
;
size_t
N
=
n
;
size_t
K
=
param
.
filter_meta
.
icpg
;
//! K = IC
size_t
LDA
=
K
,
LDB
=
N
,
LDC
=
N
;
bool
is_dst_8bit
=
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
);
return
{
param
.
filter_type
,
param
.
src_type
,
is_dst_8bit
?
param
.
bias_type
:
param
.
dst_type
,
M
,
N
,
K
,
LDA
,
LDB
,
LDC
,
false
,
false
,
param
::
MatrixMul
::
ComputeMode
::
DEFAULT
,
param
::
MatrixMul
::
Format
::
DEFAULT
};
}
}
// namespace
class
Conv1x1StrategyBase
{
public:
virtual
void
packA
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
size_t
oc_tile_size
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
void
packB
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
void
exec
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
WorkspaceBundle
&
thread_bundle
,
size_t
oc_tile_size
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
~
Conv1x1StrategyBase
()
=
default
;
};
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
,
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
>
class
Conv1x1Strategy
:
public
Conv1x1StrategyBase
{
public:
void
packA
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
size_t
oc_tile_size
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
{
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
)
{
megdnn_log_error
(
"NoPack mode has no packA kernel"
);
return
;
}
whole_bundle
.
set
(
ncb_param
.
workspace_ptr
);
//! packa size per group
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
oc_tiles_per_group
=
div_ceil
(
OC
,
oc_tile_size
);
size_t
packa_bytes_per_oc_tile
=
matmul_bundle
.
get_size
(
0
);
size_t
packa_bytes_per_group
=
oc_tiles_per_group
*
packa_bytes_per_oc_tile
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
oc_tile_id_in_group
=
ncb_index
.
ndrange_id
[
1
];
size_t
oc_start
=
oc_tile_id_in_group
*
oc_tile_size
;
size_t
oc_end
=
oc_start
+
oc_tile_size
;
oc_end
=
(
oc_end
<=
OC
?
oc_end
:
OC
);
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
IC
=
param
.
filter_meta
.
icpg
;
MatrixMulImpl
::
KernParam
matmul_kern_param
;
static_cast
<
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_kern_param
)
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
oc_end
-
oc_start
);
size_t
bytes_offset_of_a_panel
=
group_id
*
packa_bytes_per_group
+
oc_tile_id_in_group
*
packa_bytes_per_oc_tile
;
size_t
numbers_offset_of_filter
=
oc_tile_size
*
IC
*
oc_tile_id_in_group
;
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
0
))
+
bytes_offset_of_a_panel
);
matmul_kern_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
ncb_param
.
filter
<
src_ctype
>
(
group_id
)
+
numbers_offset_of_filter
);
matmul_algo
->
pack_A
(
matmul_kern_param
,
a_panel
,
0
,
oc_end
-
oc_start
);
}
void
packB
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
{
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
DEFAULT
)
{
whole_bundle
.
set
(
ncb_param
.
workspace_ptr
);
//! packb size per group
size_t
packb_bytes_per_group
=
matmul_bundle
.
get_size
(
1
);
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
BATCH
=
param
.
n
;
size_t
SH
=
param
.
filter_meta
.
stride
[
0
];
size_t
SW
=
param
.
filter_meta
.
stride
[
1
];
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
OC
=
param
.
filter_meta
.
ocpg
;
MatrixMulImpl
::
KernParam
matmul_kern_param
;
static_cast
<
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_kern_param
)
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
OC
);
rep
(
batch
,
BATCH
)
{
rep
(
g
,
GROUP
)
{
if
(
SH
==
2
&&
SW
==
2
)
megdnn_throw
(
"no support for stride = 2"
);
size_t
bytes_offset_of_b_panel
=
batch
*
packb_bytes_per_group
*
GROUP
+
g
*
packb_bytes_per_group
;
src_ctype
*
b_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
1
))
+
bytes_offset_of_b_panel
);
matmul_kern_param
.
B_ptr
=
const_cast
<
src_ctype
*>
(
ncb_param
.
src
<
src_ctype
>
(
batch
,
g
));
matmul_algo
->
pack_B
(
matmul_kern_param
,
b_panel
,
0
,
OH
*
OW
);
}
}
}
else
{
megdnn_log_error
(
"OnlyPackA mode and NoPack mode has no packB kernel"
);
}
}
void
exec
(
WorkspaceBundle
&
whole_bundle
,
WorkspaceBundle
&
matmul_bundle
,
WorkspaceBundle
&
thread_bundle
,
size_t
oc_tile_size
,
const
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
ConvBiasImpl
::
NCBKernParam
&
ncb_param
,
const
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
{
whole_bundle
.
set
(
ncb_param
.
workspace_ptr
);
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
IC
=
param
.
filter_meta
.
icpg
;
//! packa bytes per group
size_t
oc_tiles_per_group
=
div_ceil
(
OC
,
oc_tile_size
);
size_t
packa_bytes_per_oc_tile
=
matmul_bundle
.
get_size
(
0
);
size_t
packa_bytes_per_group
=
packa_bytes_per_oc_tile
*
oc_tiles_per_group
;
//! packb bytes per group
size_t
packb_bytes_per_group
=
matmul_bundle
.
get_size
(
1
);
//! matmul bytes per thread
size_t
matmul_bytes_per_thread
=
thread_bundle
.
get_size
(
0
);
size_t
batch_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
group_id
=
ncb_index
.
ndrange_id
[
1
];
size_t
oc_tile_id_in_group
=
ncb_index
.
ndrange_id
[
2
];
size_t
thread_id
=
ncb_index
.
thread_id
;
size_t
GROUP
=
param
.
filter_meta
.
group
;
size_t
OH
=
param
.
osz
[
0
];
size_t
OW
=
param
.
osz
[
1
];
size_t
oc_start
=
oc_tile_size
*
oc_tile_id_in_group
;
size_t
oc_end
=
oc_start
+
oc_tile_size
;
oc_end
=
(
oc_end
<=
OC
?
oc_end
:
OC
);
MatrixMulImpl
::
KernParam
matmul_kern_param
;
static_cast
<
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_kern_param
)
=
get_matmul_kern_param
(
param
,
OH
*
OW
,
oc_end
-
oc_start
);
size_t
bytes_offset_of_a_panel
=
group_id
*
packa_bytes_per_group
+
oc_tile_id_in_group
*
packa_bytes_per_oc_tile
;
int8_t
*
a_panel
=
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
0
))
+
bytes_offset_of_a_panel
;
size_t
bytes_offset_of_b_panel
=
batch_id
*
packb_bytes_per_group
*
GROUP
+
group_id
*
packb_bytes_per_group
;
int8_t
*
b_panel
=
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
1
))
+
bytes_offset_of_b_panel
;
size_t
thread_offset
=
thread_bundle
.
total_size_in_bytes
()
*
thread_id
;
size_t
bytes_offset_of_matmul_dst_this_thread
=
thread_offset
+
thread_bundle
.
get_size
(
0
);
int8_t
*
matmul_temp_dst
=
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
2
))
+
bytes_offset_of_matmul_dst_this_thread
;
size_t
numbers_of_ncb_dst_offset
=
oc_tile_size
*
OH
*
OW
*
oc_tile_id_in_group
;
void
*
conv_bias_dst
=
static_cast
<
void
*>
(
ncb_param
.
dst
<
dst_ctype
>
(
batch_id
,
group_id
)
+
numbers_of_ncb_dst_offset
);
size_t
numbers_of_ncb_filter_offset
=
oc_tile_size
*
IC
*
oc_tile_id_in_group
;
matmul_kern_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
ncb_param
.
filter
<
src_ctype
>
(
group_id
)
+
numbers_of_ncb_filter_offset
);
matmul_kern_param
.
B_ptr
=
const_cast
<
src_ctype
*>
(
ncb_param
.
src
<
src_ctype
>
(
batch_id
,
group_id
));
matmul_kern_param
.
workspace_ptr
=
reinterpret_cast
<
int8_t
*>
(
whole_bundle
.
get
(
2
))
+
thread_offset
;
matmul_kern_param
.
workspace_size
=
matmul_bytes_per_thread
;
bool
is_dst_8bit
=
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
param
.
dst_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
);
void
*
matmul_dst
=
is_dst_8bit
?
matmul_temp_dst
:
conv_bias_dst
;
matmul_kern_param
.
C_ptr
=
matmul_dst
;
if
(
pack_mode
==
MatrixMulImpl
::
AlgoBase
::
PackMode
::
NO_PACK
)
{
auto
matmul_kern
=
matmul_algo
->
get_kern
(
matmul_kern_param
);
matmul_kern
(
matmul_kern_param
);
}
else
{
auto
matmul_kern_naked
=
matmul_algo
->
get_kern_naked
(
matmul_kern_param
);
matmul_kern_naked
(
matmul_kern_param
,
a_panel
,
b_panel
);
}
//! do postprocess
void
*
bias_ptr
=
nullptr
;
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
bias_ptr
=
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
ncb_param
.
bias
<
bias_ctype
>
(
batch_id
,
group_id
)
+
numbers_of_ncb_dst_offset
));
else
bias_ptr
=
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
ncb_param
.
bias
<
bias_ctype
>
(
batch_id
,
group_id
)
+
oc_start
));
PostProcess
<
op_ctype
,
op_dtype
,
postprocess_mode
>::
run
(
matmul_dst
,
bias_ptr
,
conv_bias_dst
,
param
.
bias_mode
,
param
.
nonlineMode
,
param
.
bias_type
,
param
.
dst_type
,
1
_z
,
oc_end
-
oc_start
,
OH
,
OW
);
}
};
class
Conv1x1Factory
{
public:
static
Conv1x1StrategyBase
*
make_conv1x1_strategy
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
MatrixMulImpl
::
AlgoBase
::
PackMode
pack_mode
,
param
::
ConvBias
::
Format
format
);
};
}
// namespace conv1x1
}
// namespace fallback
}
// namespace megdnn
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
c985204b
...
...
@@ -15,6 +15,7 @@
#include "src/common/opr_delegate.h"
#include "src/common/utils.h"
#include "src/fallback/conv_bias/algos.h"
#include "src/fallback/conv_bias/conv1x1/algos.h"
#include "src/fallback/conv_bias/im2col/algos.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/naive/convolution/algorithms.h"
...
...
@@ -54,7 +55,13 @@ public:
ohw_tile_size
));
all_algos
.
emplace_back
(
refhold
.
back
().
get
());
}
#if 1
for
(
size_t
oc_tile_size
:
{
24
,
48
})
{
refhold
.
emplace_back
(
new
AlgoConv1x1
(
static_cast
<
MatrixMulImpl
::
AlgoBase
*>
(
algo
),
oc_tile_size
));
all_algos
.
emplace_back
(
refhold
.
back
().
get
());
}
#if 0
//! As these algos maybe very slow, it will make fastrun search slow, so
//! we disable it, but for the test of strategyhelper, we just keep it.
//! FIXME: I do not know a better way to do it.
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
c985204b
...
...
@@ -248,6 +248,7 @@ protected:
private:
class
AlgoNaive
;
class
AlgoIm2col
;
class
AlgoConv1x1
;
class
AlgoWinogradF32
;
class
AlgoWinogradF32_4x4
;
class
AlgoWinogradQS8
;
...
...
dnn/src/x86/matrix_mul/algos.cpp
浏览文件 @
c985204b
...
...
@@ -438,7 +438,6 @@ size_t MatrixMulImpl::AlgoInt8x8x32SSEM4N8K2::get_workspace(
m
,
n
,
k
,
trans_a
,
trans_b
,
strategy
,
cacheline
)
.
get_workspace_size
();
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL
(
AlgoInt8x8x32SSEM4N8K2
,
megdnn_x86_matmul_kern
,
9
,
x86
::
matmul
::
gemm_sse_s8s8s32_4x8x2
,
dt_int8
,
dt_int32
,
dt_int16
);
...
...
dnn/test/common/conv_bias.cpp
浏览文件 @
c985204b
...
...
@@ -875,6 +875,82 @@ std::vector<conv_bias::TestArg> get_conv_bias_args(
return
args
;
}
std
::
vector
<
megdnn
::
test
::
conv_bias
::
TestArg
>
get_conv_bias_1x1_args
(
bool
no_bias
,
bool
no_nonlinemode
,
bool
quantized_nlmod
,
bool
only_broadcast_bias
)
{
using
namespace
conv_bias
;
using
Param
=
param
::
ConvBias
;
using
NLMode
=
param
::
ConvBias
::
NonlineMode
;
using
CONVMode
=
param
::
ConvBias
::
Mode
;
std
::
vector
<
TestArg
>
args
;
auto
pack
=
[
&
](
size_t
n
,
size_t
oc
,
size_t
ic
,
size_t
w
,
size_t
h
,
size_t
stride
,
NLMode
nlmode
,
CONVMode
convmode
)
{
Param
param
;
param
.
stride_h
=
stride
;
param
.
stride_w
=
stride
;
param
.
pad_h
=
0
;
param
.
pad_w
=
0
;
param
.
mode
=
convmode
;
param
.
nonlineMode
=
nlmode
;
args
.
emplace_back
(
param
,
TensorShape
{
n
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
1
,
1
},
TensorShape
{});
if
(
!
no_bias
)
{
args
.
emplace_back
(
param
,
TensorShape
{
n
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
1
,
1
},
TensorShape
{
1
,
oc
,
1
,
1
});
if
(
!
only_broadcast_bias
)
{
args
.
emplace_back
(
param
,
TensorShape
{
n
,
ic
,
h
,
w
},
TensorShape
{
oc
,
ic
,
1
,
1
},
TensorShape
{
n
,
oc
,
(
h
-
1
)
/
stride
+
1
,
(
w
-
1
)
/
stride
+
1
});
}
}
param
.
sparse
=
param
::
ConvBias
::
Sparse
::
GROUP
;
args
.
emplace_back
(
param
,
TensorShape
{
n
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
1
,
1
},
TensorShape
{});
if
(
!
no_bias
)
{
args
.
emplace_back
(
param
,
TensorShape
{
n
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
1
,
1
},
TensorShape
{
1
,
2
*
oc
,
1
,
1
});
if
(
!
only_broadcast_bias
)
{
args
.
emplace_back
(
param
,
TensorShape
{
n
,
2
*
ic
,
h
,
w
},
TensorShape
{
2
,
oc
,
ic
,
1
,
1
},
TensorShape
{
n
,
2
*
oc
,
(
h
-
1
)
/
stride
+
1
,
(
w
-
1
)
/
stride
+
1
});
}
}
};
std
::
vector
<
NLMode
>
nonlinemode
=
{
NLMode
::
IDENTITY
};
if
(
!
no_nonlinemode
)
{
nonlinemode
.
emplace_back
(
NLMode
::
RELU
);
nonlinemode
.
emplace_back
(
NLMode
::
H_SWISH
);
if
(
!
quantized_nlmod
)
{
nonlinemode
.
emplace_back
(
NLMode
::
SIGMOID
);
}
}
std
::
vector
<
CONVMode
>
convmodes
{
param
::
ConvBias
::
Mode
::
CONVOLUTION
,
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
};
for
(
size_t
n
:
{
1
,
2
})
for
(
size_t
oc
:
{
1
,
9
,
33
})
for
(
size_t
ic
:
{
1
,
16
,
64
})
for
(
size_t
size
:
{
7
,
14
,
28
})
for
(
auto
nlmode
:
nonlinemode
)
for
(
auto
convmode
:
convmodes
)
{
pack
(
n
,
oc
,
ic
,
size
,
size
,
1
,
nlmode
,
convmode
);
}
return
args
;
}
void
check_conv_bias
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
const
char
*
algo_name
)
{
using
namespace
conv_bias
;
...
...
dnn/test/common/conv_bias.h
浏览文件 @
c985204b
...
...
@@ -76,6 +76,10 @@ std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_args(
bool
no_nonlinemode
,
bool
quantized_nlmod
=
false
,
bool
only_broadcast_bias
=
false
);
std
::
vector
<
megdnn
::
test
::
conv_bias
::
TestArg
>
get_conv_bias_1x1_args
(
bool
no_bias
,
bool
no_nonlinemode
,
bool
quantized_nlmod
=
false
,
bool
only_broadcast_bias
=
false
);
void
check_conv_bias
(
std
::
vector
<
megdnn
::
test
::
conv_bias
::
TestArg
>
args
,
megdnn
::
Handle
*
handle
,
const
char
*
algo_name
);
...
...
dnn/test/x86/conv_bias.cpp
浏览文件 @
c985204b
...
...
@@ -919,6 +919,79 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) {
#undef cb
}
/**************************** Conv1x1 PackA *************************/
namespace
{
void
checker_conv_bias
(
std
::
vector
<
conv_bias
::
TestArg
>
args
,
Handle
*
handle
,
RNG
*
rng
,
float
epsilon
,
DType
type0
,
DType
type1
,
DType
type2
,
DType
type3
,
const
char
*
algo_name
)
{
using
namespace
conv_bias
;
Checker
<
ConvBias
>
checker
(
handle
);
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
algo_name
));
checker
.
set_dtype
(
0
,
type0
);
checker
.
set_dtype
(
1
,
type1
);
checker
.
set_dtype
(
2
,
type2
);
checker
.
set_dtype
(
4
,
type3
);
checker
.
set_epsilon
(
epsilon
);
if
(
NULL
!=
rng
)
{
checker
.
set_rng
(
0
,
rng
).
set_rng
(
1
,
rng
).
set_rng
(
2
,
rng
).
set_rng
(
3
,
rng
);
}
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
).
execs
(
{
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
}
// namespace
#if MEGDNN_X86_WITH_MKL
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_CONV1X1_S1_FP32_PACKA
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_conv_bias_1x1_args
(
false
,
false
);
check_conv_bias
(
args
,
handle
(),
"CONV1x1:X86_F32_MKL_PACKA:24"
);
}
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_CONV1X1_S1_FP32_BLAS
)
{
using
namespace
conv_bias
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_conv_bias_1x1_args
(
false
,
false
);
check_conv_bias
(
args
,
handle
(),
"CONV1x1:X86_F32_BLAS:48"
);
}
#endif
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_CONV1X1_S1_INT8X8X32
)
{
using
namespace
conv_bias
;
UniformIntRNG
rng
{
-
50
,
50
};
float
epsilon
=
0.001
;
std
::
vector
<
conv_bias
::
TestArg
>
args
=
get_conv_bias_1x1_args
(
true
,
true
);
#if MEGDNN_X86_WITH_MKL_DNN
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
checker_conv_bias
(
args
,
handle
(),
&
rng
,
epsilon
,
dtype
::
Int8
{},
dtype
::
Int8
{},
dtype
::
Int32
{},
dtype
::
Int32
{},
"CONV1x1:X86_INT8X8X32_MKLDNN:24"
);
}
#endif
#if MEGDNN_X86_WITH_VNNI
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
VNNI
))
{
checker_conv_bias
(
args
,
handle
(),
&
rng
,
epsilon
,
dtype
::
Int8
{},
dtype
::
Int8
{},
dtype
::
Int32
{},
dtype
::
Int32
{},
"CONV1x1:X86_INT8X8X32_VNNI:24"
);
}
#endif
if
(
x86
::
is_supported
(
x86
::
SIMDType
::
AVX2
))
{
checker_conv_bias
(
args
,
handle
(),
&
rng
,
epsilon
,
dtype
::
Int8
{},
dtype
::
Int8
{},
dtype
::
Int32
{},
dtype
::
Int32
{},
"CONV1x1:X86_INT8X8X32_AVX2_4X16X2:24"
);
checker_conv_bias
(
args
,
handle
(),
&
rng
,
epsilon
,
dtype
::
Int8
{},
dtype
::
Int8
{},
dtype
::
Int32
{},
dtype
::
Int32
{},
"CONV1x1:X86_INT8X8X32_AVX2_2X4X16:24"
);
}
checker_conv_bias
(
args
,
handle
(),
&
rng
,
epsilon
,
dtype
::
Int8
{},
dtype
::
Int8
{},
dtype
::
Int32
{},
dtype
::
Int32
{},
"CONV1x1:X86_INT8X8X32_SSE_4X8X2:48"
);
}
/************************* End Conv1x1 PackA ************************/
#endif
TEST_F
(
X86_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QINT8
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录