Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c4dfdbd2
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c4dfdbd2
编写于
3月 27, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
5月 06, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(dnn/fallback): refactor im2col
GitOrigin-RevId: b58770211e33c68267b2ed32811872bb7ee0696c
上级
86a3445e
变更
12
展开全部
隐藏空白更改
内联
并排
Showing
12 changed file
with
2185 addition
and
710 deletion
+2185
-710
dnn/src/fallback/conv_bias/im2col/algos.cpp
dnn/src/fallback/conv_bias/im2col/algos.cpp
+349
-686
dnn/src/fallback/conv_bias/im2col/algos.h
dnn/src/fallback/conv_bias/im2col/algos.h
+1
-2
dnn/src/fallback/conv_bias/im2col/factory.h
dnn/src/fallback/conv_bias/im2col/factory.h
+473
-0
dnn/src/fallback/conv_bias/im2col/strategy_base.h
dnn/src/fallback/conv_bias/im2col/strategy_base.h
+259
-0
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
+379
-0
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
+343
-0
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
+349
-0
dnn/src/fallback/convolution/img2col_helper.h
dnn/src/fallback/convolution/img2col_helper.h
+2
-2
dnn/src/x86/elemwise_helper/kimpl/op_unary_base.h
dnn/src/x86/elemwise_helper/kimpl/op_unary_base.h
+11
-7
dnn/src/x86/matrix_mul/algos.cpp
dnn/src/x86/matrix_mul/algos.cpp
+11
-5
dnn/src/x86/matrix_mul/algos.h
dnn/src/x86/matrix_mul/algos.h
+2
-2
dnn/test/x86/conv_bias.cpp
dnn/test/x86/conv_bias.cpp
+6
-6
未找到文件。
dnn/src/fallback/conv_bias/im2col/algos.cpp
浏览文件 @
c4dfdbd2
此差异已折叠。
点击以展开。
dnn/src/fallback/conv_bias/im2col/algos.h
浏览文件 @
c4dfdbd2
...
...
@@ -67,8 +67,7 @@ public:
}
auto
&&
fm
=
param
.
filter_meta
;
auto
OC
=
fm
.
ocpg
,
IC
=
fm
.
icpg
;
return
(
fm
.
spatial
[
0
]
==
fm
.
spatial
[
1
]
&&
fm
.
spatial
[
0
]
==
1
)
||
OC
>=
32
||
IC
>=
32
;
return
OC
>=
32
||
IC
>=
32
;
}
private:
...
...
dnn/src/fallback/conv_bias/im2col/factory.h
0 → 100644
浏览文件 @
c4dfdbd2
此差异已折叠。
点击以展开。
dnn/src/fallback/conv_bias/im2col/strategy_base.h
0 → 100644
浏览文件 @
c4dfdbd2
/**
* \file dnn/src/fallback/conv_bias/im2col/strategy_base.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "src/fallback/conv_bias/opr_impl.h"
namespace
megdnn
{
using
PackMode
=
fallback
::
MatrixMulImpl
::
AlgoBase
::
PackMode
;
struct
StrategyParam
{
size_t
batch_id
;
size_t
group_id
;
size_t
oc_tile_size
;
size_t
oc_cur_index
;
size_t
oc_end_index
;
size_t
ohw_cur_index
;
size_t
output_block_size
;
size_t
output_block_oc_size
;
size_t
ohw
;
size_t
block_m
;
size_t
block_n
;
size_t
block_k
;
bool
skip_copy_dst
;
bool
is_dst_8bit
;
bool
is_ohw_size_bigger
;
};
class
StrategyBase
{
public:
StrategyBase
()
=
default
;
virtual
~
StrategyBase
()
=
default
;
virtual
void
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
void
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
void
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
=
0
;
virtual
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
=
0
;
virtual
void
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
=
0
;
};
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
,
PackMode
packmode
>
class
Strategy
;
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_PACKB_INDEX
=
0
;
constexpr
static
size_t
THREAD_BUNDLE_IM2COL_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_BIAS_INDEX
=
2
;
Strategy
();
void
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
override
;
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
override
;
void
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
);
void
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
);
void
*
get_bias_temp_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
);
void
*
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
);
};
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_IM2COL_INDEX
=
0
;
constexpr
static
size_t
THREAD_BUNDLE_MATMULDST_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_BIAS_INDEX
=
2
;
constexpr
static
size_t
THREAD_BUNDLE_MATCOMP_INDEX
=
3
;
Strategy
();
void
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
*
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
);
inline
void
*
get_bias_temp_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
)
{
bias_ctype
*
bias_tmp_ptr
=
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
static_cast
<
bias_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_BIAS_INDEX
))
:
nullptr
;
return
bias_tmp_ptr
;
}
void
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
override
;
void
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
override
;
void
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
);
void
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
);
};
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
class
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>
:
public
StrategyBase
{
public:
constexpr
static
size_t
BUNDLE_PADDING_INDEX
=
0
;
constexpr
static
size_t
BUNDLE_PACKA_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_PACKB_INDEX
=
0
;
constexpr
static
size_t
THREAD_BUNDLE_IM2COL_INDEX
=
1
;
constexpr
static
size_t
THREAD_BUNDLE_MATMULDST_INDEX
=
2
;
constexpr
static
size_t
THREAD_BUNDLE_BIAS_INDEX
=
3
;
Strategy
();
void
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
override
;
void
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
override
;
void
*
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
);
inline
void
*
get_bias_temp_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
)
{
bias_ctype
*
bias_tmp_ptr
=
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
static_cast
<
bias_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_BIAS_INDEX
))
:
nullptr
;
return
bias_tmp_ptr
;
}
void
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
override
;
void
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
);
void
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
);
};
}
// namespace megdnn
dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
0 → 100644
浏览文件 @
c4dfdbd2
/**
* \file dnn/src/fallback/conv_bias/im2col/strategy_default.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/opr_param_defs.h"
#include "src/common/utils.h"
#include "src/fallback/conv_bias/im2col/strategy_base.h"
#include "src/fallback/convolution/img2col_helper.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#endif
using
namespace
megdnn
;
#if MEGDNN_X86
using
namespace
x86
;
#endif
namespace
megdnn
{
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
Strategy
()
:
StrategyBase
()
{}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
param
);
MEGDNN_MARK_USED_VAR
(
N
);
MEGDNN_MARK_USED_VAR
(
OC
);
MEGDNN_MARK_USED_VAR
(
OH
);
MEGDNN_MARK_USED_VAR
(
OW
);
MEGDNN_MARK_USED_VAR
(
FH
);
MEGDNN_MARK_USED_VAR
(
FW
);
MEGDNN_MARK_USED_VAR
(
SH
);
MEGDNN_MARK_USED_VAR
(
SW
);
size_t
IW2
=
IW
+
2
*
PW
;
size_t
IH2
=
IH
+
2
*
PH
;
size_t
batch_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
group_id
=
ncb_index
.
ndrange_id
[
1
];
size_t
channel_id
=
ncb_index
.
ndrange_id
[
2
];
size_t
padding_group_size
=
IH2
*
IW2
*
IC
;
size_t
workspace_channel_offset
=
IH2
*
IW2
*
channel_id
;
size_t
workspace_group_offset
=
group_id
*
padding_group_size
;
size_t
workspace_batch_offset
=
param
.
filter_meta
.
group
*
batch_id
*
padding_group_size
;
bundle
.
set
(
param
.
workspace_ptr
);
src_ctype
src_zp
=
static_cast
<
src_ctype
>
(
0
);
if
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
src_zp
=
param
.
src_type
.
param
<
dtype
::
Quantized8Asymm
>
().
zero_point
;
}
src_ctype
*
src
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
batch_id
,
group_id
,
channel_id
));
src_ctype
*
src2
;
src2
=
static_cast
<
src_ctype
*>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
workspace_group_offset
+
workspace_batch_offset
+
workspace_channel_offset
;
src_ctype
*
src2_ptr
=
src2
;
const
src_ctype
*
src_ptr
=
src
;
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
rep
(
ih
,
IH
)
{
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
std
::
memcpy
(
src2_ptr
,
src_ptr
,
sizeof
(
src_ctype
)
*
IW
);
src2_ptr
+=
IW
;
src_ptr
+=
IW
;
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
}
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
bundle
.
set
(
param
.
workspace_ptr
);
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
size_t
packed_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_algo
->
get_inner_block_size
().
k
)
*
matmul_algo
->
get_inner_block_size
().
m
*
matmul_algo
->
get_packA_type_size
();
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packed_per_oc_block_size
;
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
));
matmul_algo
->
pack_A
(
matmul_param
,
a_panel
,
ncb_index
.
ndrange_id
[
1
],
matmul_algo
->
get_inner_block_size
().
m
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
{
size_t
m_sh
=
param
.
filter_meta
.
stride
[
0
];
size_t
m_sw
=
param
.
filter_meta
.
stride
[
1
];
size_t
m_oc
=
param
.
filter_meta
.
ocpg
;
size_t
m_oh
=
param
.
osz
[
0
];
size_t
m_ow
=
param
.
osz
[
1
];
size_t
m_ic
=
param
.
filter_meta
.
icpg
;
size_t
m_ih
=
param
.
isz
[
0
]
+
param
.
filter_meta
.
padding
[
0
]
*
2
;
size_t
m_iw
=
param
.
isz
[
1
]
+
param
.
filter_meta
.
padding
[
1
]
*
2
;
size_t
m_fh
=
param
.
filter_meta
.
spatial
[
0
];
size_t
m_fw
=
param
.
filter_meta
.
spatial
[
1
];
size_t
m_is_xcorr
=
!
param
.
filter_meta
.
should_flip
;
size_t
input_offset
=
m_ih
*
m_iw
*
m_ic
*
(
sparam
.
group_id
+
param
.
filter_meta
.
group
*
sparam
.
batch_id
)
*
sizeof
(
src_ctype
);
src_ctype
*
src2
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
input_offset
);
bool
is_phpwzero
=
param
.
filter_meta
.
padding
[
0
]
==
0
&&
param
.
filter_meta
.
padding
[
1
]
==
0
;
if
(
is_phpwzero
)
{
src2
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
}
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
if
(
m_sh
==
1
&&
m_sw
==
1
)
{
if
(
m_is_xcorr
)
{
img2col
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
else
{
if
(
m_is_xcorr
)
{
img2col_stride
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col_stride
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
matmul_param
.
M
=
sparam
.
output_block_oc_size
;
matmul_param
.
N
=
sparam
.
output_block_size
;
matmul_param
.
LDB
=
sparam
.
output_block_size
;
matmul_param
.
LDC
=
sparam
.
output_block_size
;
matmul_param
.
B_ptr
=
im2col_dst
;
src_ctype
*
b_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_thread
.
get
(
THREAD_BUNDLE_PACKB_INDEX
)));
matmul_algo
->
pack_B
(
matmul_param
,
b_panel
,
0
,
matmul_param
.
N
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
if
(
sparam
.
is_dst_8bit
||
!
sparam
.
is_ohw_size_bigger
)
{
return
static_cast
<
void
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
}
else
{
bias_ctype
*
dst
=
param
.
dst
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
;
return
static_cast
<
void
*>
(
dst
);
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
size_t
packA_per_oc_block_size
=
round_up
(
matmul_param
.
K
,
matmul_algo
->
get_inner_block_size
().
k
)
*
sparam
.
oc_tile_size
*
matmul_algo
->
get_packA_type_size
();
size_t
packA_group_size
=
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
packA_group_size
+
ncb_index
.
ndrange_id
[
3
]
*
packA_per_oc_block_size
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
src_ctype
*
b_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle_thread
.
get
(
THREAD_BUNDLE_PACKB_INDEX
)));
matmul_param
.
M
=
sparam
.
output_block_oc_size
;
matmul_param
.
N
=
sparam
.
output_block_size
;
matmul_param
.
LDB
=
sparam
.
output_block_size
;
matmul_param
.
LDC
=
sparam
.
output_block_size
;
matmul_param
.
C_ptr
=
matmul_dst
;
auto
matmul_kern_naked
=
matmul_algo
->
get_kern_naked
(
matmul_param
);
matmul_kern_naked
(
matmul_param
,
a_panel
,
b_panel
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
copy_bias
(
param
,
bundle_thread
,
sparam
);
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
param
.
bias
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
void
*
bias_temp_ptr
=
get_bias_temp_ptr
(
param
,
bundle_thread
);
void
*
bias_preprocess_ptr
=
const_cast
<
void
*>
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
bias_temp_ptr
:
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
bias_ptr
+
sparam
.
oc_cur_index
)));
PostProcess
<
op_ctype
,
op_dtype
,
postprocess_mode
>::
run
(
matmul_dst
,
bias_preprocess_ptr
,
matmul_dst
,
param
.
bias_mode
,
param
.
nonlineMode
,
param
.
bias_type
,
param
.
dst_type
,
1
_z
,
sparam
.
output_block_oc_size
,
1
_z
,
sparam
.
output_block_size
);
copy_dst
(
param
,
matmul_dst
,
sparam
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
dst_ctype
*
dst_tmp_ptr
=
reinterpret_cast
<
dst_ctype
*>
(
const_cast
<
void
*>
(
matmul_dst
));
dst_ctype
*
dst
=
param
.
dst
<
dst_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
0
;
oc
<
sparam
.
output_block_oc_size
;
oc
++
)
{
std
::
memcpy
(
dst
,
dst_tmp_ptr
,
sizeof
(
dst_ctype
)
*
sparam
.
output_block_size
);
dst_tmp_ptr
+=
sparam
.
output_block_size
;
dst
+=
sparam
.
ohw
;
}
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
get_bias_temp_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
)
{
bias_ctype
*
bias_tmp_ptr
=
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
static_cast
<
bias_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_BIAS_INDEX
))
:
nullptr
;
return
bias_tmp_ptr
;
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
DEFAULT
>::
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
)
{
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
param
.
bias
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
bias_ctype
*
bias_temp_ptr
=
static_cast
<
bias_ctype
*>
(
get_bias_temp_ptr
(
param
,
bundle_thread
));
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_ctype
*
copy_dst
=
bias_temp_ptr
;
const
bias_ctype
*
copy_src
=
bias_ptr
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
sparam
.
oc_cur_index
;
oc
<
sparam
.
oc_end_index
;
oc
++
)
{
std
::
memcpy
(
copy_dst
,
copy_src
,
sizeof
(
bias_ctype
)
*
sparam
.
output_block_size
);
copy_dst
+=
sparam
.
output_block_size
;
copy_src
+=
sparam
.
ohw
;
}
}
}
#define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode, PackMode::DEFAULT>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#else
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_uint8
,
dt_qint32
,
dt_quint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int8
,
dt_qint32
,
dt_qint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_int32
,
dt_int32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int16
,
dt_int16
,
dt_int16
,
dt_int16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#undef INSTANTIAL_CLASS
}
// namespace megdnn
dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
0 → 100644
浏览文件 @
c4dfdbd2
/**
* \file dnn/src/fallback/conv_bias/im2col/strategy_nopack.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/opr_param_defs.h"
#include "src/common/utils.h"
#include "src/fallback/conv_bias/im2col/strategy_base.h"
#include "src/fallback/convolution/img2col_helper.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#endif
using
namespace
megdnn
;
#if MEGDNN_X86
using
namespace
x86
;
#endif
namespace
megdnn
{
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
Strategy
()
:
StrategyBase
()
{}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
param
);
MEGDNN_MARK_USED_VAR
(
N
);
MEGDNN_MARK_USED_VAR
(
OC
);
MEGDNN_MARK_USED_VAR
(
OH
);
MEGDNN_MARK_USED_VAR
(
OW
);
MEGDNN_MARK_USED_VAR
(
FH
);
MEGDNN_MARK_USED_VAR
(
FW
);
MEGDNN_MARK_USED_VAR
(
SH
);
MEGDNN_MARK_USED_VAR
(
SW
);
size_t
IW2
=
IW
+
2
*
PW
;
size_t
IH2
=
IH
+
2
*
PH
;
size_t
batch_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
group_id
=
ncb_index
.
ndrange_id
[
1
];
size_t
channel_id
=
ncb_index
.
ndrange_id
[
2
];
size_t
padding_group_size
=
IH2
*
IW2
*
IC
;
size_t
workspace_channel_offset
=
IH2
*
IW2
*
channel_id
;
size_t
workspace_group_offset
=
group_id
*
padding_group_size
;
size_t
workspace_batch_offset
=
param
.
filter_meta
.
group
*
batch_id
*
padding_group_size
;
bundle
.
set
(
param
.
workspace_ptr
);
src_ctype
src_zp
=
static_cast
<
src_ctype
>
(
0
);
if
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
src_zp
=
param
.
src_type
.
param
<
dtype
::
Quantized8Asymm
>
().
zero_point
;
}
src_ctype
*
src
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
batch_id
,
group_id
,
channel_id
));
src_ctype
*
src2
;
src2
=
static_cast
<
src_ctype
*>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
workspace_group_offset
+
workspace_batch_offset
+
workspace_channel_offset
;
src_ctype
*
src2_ptr
=
src2
;
const
src_ctype
*
src_ptr
=
src
;
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
rep
(
ih
,
IH
)
{
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
std
::
memcpy
(
src2_ptr
,
src_ptr
,
sizeof
(
src_ctype
)
*
IW
);
src2_ptr
+=
IW
;
src_ptr
+=
IW
;
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
}
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
MEGDNN_MARK_USED_VAR
(
bundle
);
MEGDNN_MARK_USED_VAR
(
param
);
MEGDNN_MARK_USED_VAR
(
matmulparam
);
MEGDNN_MARK_USED_VAR
(
matmul_algo
);
MEGDNN_MARK_USED_VAR
(
ncb_index
);
megdnn_throw
(
"nopack mode should not call packA_kern please check your code"
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
if
(
sparam
.
is_dst_8bit
||
!
sparam
.
is_ohw_size_bigger
)
{
return
static_cast
<
bias_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_MATMULDST_INDEX
));
}
else
{
bias_ctype
*
dst
=
param
.
dst
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
;
return
static_cast
<
void
*>
(
dst
);
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
MEGDNN_MARK_USED_VAR
(
bundle
);
MEGDNN_MARK_USED_VAR
(
ncb_index
);
matmul_param
.
workspace_ptr
=
bundle_thread
.
get
(
THREAD_BUNDLE_MATCOMP_INDEX
);
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
const
void
*
filter
=
param
.
filter
<
src_ctype
>
(
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
param
.
filter_meta
.
icpg
*
param
.
filter_meta
.
spatial
[
0
]
*
param
.
filter_meta
.
spatial
[
1
];
matmul_param
.
M
=
sparam
.
output_block_oc_size
;
matmul_param
.
N
=
sparam
.
output_block_size
;
matmul_param
.
LDB
=
sparam
.
output_block_size
;
matmul_param
.
LDC
=
sparam
.
output_block_size
;
matmul_param
.
A_ptr
=
filter
;
matmul_param
.
B_ptr
=
im2col_dst
;
matmul_param
.
C_ptr
=
matmul_dst
;
auto
matmul_kern
=
matmul_algo
->
get_kern
(
matmul_param
);
matmul_kern
(
matmul_param
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
{
MEGDNN_MARK_USED_VAR
(
matmul_param
);
MEGDNN_MARK_USED_VAR
(
matmul_algo
);
size_t
m_sh
=
param
.
filter_meta
.
stride
[
0
];
size_t
m_sw
=
param
.
filter_meta
.
stride
[
1
];
size_t
m_oc
=
param
.
filter_meta
.
ocpg
;
size_t
m_oh
=
param
.
osz
[
0
];
size_t
m_ow
=
param
.
osz
[
1
];
size_t
m_ic
=
param
.
filter_meta
.
icpg
;
size_t
m_ih
=
param
.
isz
[
0
]
+
param
.
filter_meta
.
padding
[
0
]
*
2
;
size_t
m_iw
=
param
.
isz
[
1
]
+
param
.
filter_meta
.
padding
[
1
]
*
2
;
size_t
m_fh
=
param
.
filter_meta
.
spatial
[
0
];
size_t
m_fw
=
param
.
filter_meta
.
spatial
[
1
];
size_t
m_is_xcorr
=
!
param
.
filter_meta
.
should_flip
;
size_t
input_offset
=
m_ih
*
m_iw
*
m_ic
*
(
sparam
.
group_id
+
param
.
filter_meta
.
group
*
sparam
.
batch_id
)
*
sizeof
(
src_ctype
);
src_ctype
*
src2
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
input_offset
);
bool
is_phpwzero
=
param
.
filter_meta
.
padding
[
0
]
==
0
&&
param
.
filter_meta
.
padding
[
1
]
==
0
;
if
(
is_phpwzero
)
{
src2
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
}
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
if
(
m_sh
==
1
&&
m_sw
==
1
)
{
if
(
m_is_xcorr
)
{
img2col
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
else
{
if
(
m_is_xcorr
)
{
img2col_stride
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col_stride
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
copy_bias
(
param
,
bundle_thread
,
sparam
);
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
param
.
bias
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
bias_ctype
*
bias_temp_ptr
=
static_cast
<
bias_ctype
*>
(
get_bias_temp_ptr
(
param
,
bundle_thread
));
PostProcess
<
op_ctype
,
op_dtype
,
postprocess_mode
>::
run
(
matmul_dst
,
const_cast
<
void
*>
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
bias_temp_ptr
:
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
bias_ptr
+
sparam
.
oc_cur_index
))),
matmul_dst
,
param
.
bias_mode
,
param
.
nonlineMode
,
param
.
bias_type
,
param
.
dst_type
,
1
_z
,
sparam
.
output_block_oc_size
,
1
_z
,
sparam
.
output_block_size
);
copy_dst
(
param
,
matmul_dst
,
sparam
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
dst_ctype
*
dst_tmp_ptr
=
reinterpret_cast
<
dst_ctype
*>
(
const_cast
<
void
*>
(
matmul_dst
));
dst_ctype
*
dst
=
param
.
dst
<
dst_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
0
;
oc
<
sparam
.
output_block_oc_size
;
oc
++
)
{
std
::
memcpy
(
dst
,
dst_tmp_ptr
,
sizeof
(
dst_ctype
)
*
sparam
.
output_block_size
);
dst_tmp_ptr
+=
sparam
.
output_block_size
;
dst
+=
sparam
.
ohw
;
}
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
NO_PACK
>::
copy_bias
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
)
{
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
param
.
bias
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
bias_ctype
*
bias_temp_ptr
=
static_cast
<
bias_ctype
*>
(
get_bias_temp_ptr
(
param
,
bundle_thread
));
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_ctype
*
copy_dst
=
bias_temp_ptr
;
const
bias_ctype
*
copy_src
=
bias_ptr
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
sparam
.
oc_cur_index
;
oc
<
sparam
.
oc_end_index
;
oc
++
)
{
std
::
memcpy
(
copy_dst
,
copy_src
,
sizeof
(
bias_ctype
)
*
sparam
.
output_block_size
);
copy_dst
+=
sparam
.
output_block_size
;
copy_src
+=
sparam
.
ohw
;
}
}
}
#define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode, PackMode::NO_PACK>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#else
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_uint8
,
dt_qint32
,
dt_quint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int8
,
dt_qint32
,
dt_qint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_int32
,
dt_int32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int16
,
dt_int16
,
dt_int16
,
dt_int16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
}
// namespace megdnn
dnn/src/fallback/conv_bias/im2col/strategy_onlypacka.cpp
0 → 100644
浏览文件 @
c4dfdbd2
/**
* \file dnn/src/fallback/conv_bias/im2col/algos.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/opr_param_defs.h"
#include "src/fallback/conv_bias/im2col/strategy_base.h"
#include "src/fallback/convolution/img2col_helper.h"
#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#endif
using
namespace
megdnn
;
#if MEGDNN_X86
using
namespace
x86
;
#endif
namespace
megdnn
{
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
Strategy
()
:
StrategyBase
()
{}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
copy_padding_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
UNPACK_CONV_F32_NCB_KERN_SIZES
(
param
);
MEGDNN_MARK_USED_VAR
(
N
);
MEGDNN_MARK_USED_VAR
(
OC
);
MEGDNN_MARK_USED_VAR
(
OH
);
MEGDNN_MARK_USED_VAR
(
OW
);
MEGDNN_MARK_USED_VAR
(
FH
);
MEGDNN_MARK_USED_VAR
(
FW
);
MEGDNN_MARK_USED_VAR
(
SH
);
MEGDNN_MARK_USED_VAR
(
SW
);
size_t
IW2
=
IW
+
2
*
PW
;
size_t
IH2
=
IH
+
2
*
PH
;
size_t
batch_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
group_id
=
ncb_index
.
ndrange_id
[
1
];
size_t
channel_id
=
ncb_index
.
ndrange_id
[
2
];
size_t
padding_group_size
=
IH2
*
IW2
*
IC
;
size_t
workspace_channel_offset
=
IH2
*
IW2
*
channel_id
;
size_t
workspace_group_offset
=
group_id
*
padding_group_size
;
size_t
workspace_batch_offset
=
param
.
filter_meta
.
group
*
batch_id
*
padding_group_size
;
bundle
.
set
(
param
.
workspace_ptr
);
src_ctype
src_zp
=
static_cast
<
src_ctype
>
(
0
);
if
(
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
src_zp
=
param
.
src_type
.
param
<
dtype
::
Quantized8Asymm
>
().
zero_point
;
}
src_ctype
*
src
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
batch_id
,
group_id
,
channel_id
));
src_ctype
*
src2
;
src2
=
static_cast
<
src_ctype
*>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
workspace_group_offset
+
workspace_batch_offset
+
workspace_channel_offset
;
src_ctype
*
src2_ptr
=
src2
;
const
src_ctype
*
src_ptr
=
src
;
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
rep
(
ih
,
IH
)
{
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
std
::
memcpy
(
src2_ptr
,
src_ptr
,
sizeof
(
src_ctype
)
*
IW
);
src2_ptr
+=
IW
;
src_ptr
+=
IW
;
if
(
PW
!=
0
)
rep
(
pw
,
PW
)
*
(
src2_ptr
++
)
=
src_zp
;
}
if
(
PH
!=
0
)
{
std
::
memset
(
src2_ptr
,
src_zp
,
sizeof
(
src_ctype
)
*
PH
*
IW2
);
src2_ptr
+=
PH
*
IW2
;
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
packA_kern
(
WorkspaceBundle
bundle
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernSizeParam
matmulparam
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
bundle
.
set
(
param
.
workspace_ptr
);
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
;
static_cast
<
fallback
::
MatrixMulImpl
::
KernSizeParam
&>
(
matmul_param
)
=
matmulparam
;
size_t
OC
=
param
.
filter_meta
.
ocpg
;
size_t
oc_tile_size
=
matmul_param
.
M
;
size_t
group_id
=
ncb_index
.
ndrange_id
[
0
];
size_t
output_block_oc_size
=
std
::
min
(
oc_tile_size
,
OC
-
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
);
size_t
oc_cur_index
=
ncb_index
.
ndrange_id
[
1
]
*
oc_tile_size
;
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
1
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
int8_t
*
a_panel
=
static_cast
<
int8_t
*>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
group_id
*
packA_group_size
+
a_panel_offset
;
matmul_param
.
A_ptr
=
const_cast
<
src_ctype
*>
(
param
.
filter
<
src_ctype
>
(
group_id
))
+
oc_cur_index
*
matmul_param
.
K
;
matmul_param
.
M
=
output_block_oc_size
;
matmul_algo
->
pack_A
(
matmul_param
,
a_panel
,
0
_z
,
0
_z
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
*
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
get_matmul_dst_ptr
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
WorkspaceBundle
&
bundle_thread
,
const
StrategyParam
&
sparam
)
{
if
(
sparam
.
is_dst_8bit
||
!
sparam
.
is_ohw_size_bigger
)
{
return
static_cast
<
void
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_MATMULDST_INDEX
));
}
else
{
bias_ctype
*
dst
=
param
.
dst
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
;
return
static_cast
<
void
*>
(
dst
);
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_matmul
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
,
const
fallback
::
ConvBiasImpl
::
NCBKernIndex
&
ncb_index
)
{
size_t
packA_group_size
=
bundle
.
get_size
(
BUNDLE_PACKA_INDEX
)
/
param
.
filter_meta
.
group
;
size_t
a_panel_offset
=
ncb_index
.
ndrange_id
[
3
]
*
matmul_algo
->
get_bundle
(
matmul_param
).
get_size
(
0
);
a_panel_offset
=
sparam
.
group_id
*
packA_group_size
+
a_panel_offset
;
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
src_ctype
*
a_panel
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PACKA_INDEX
))
+
a_panel_offset
);
src_ctype
*
b_panel
=
nullptr
;
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
matmul_param
.
M
=
sparam
.
output_block_oc_size
;
matmul_param
.
N
=
sparam
.
output_block_size
;
matmul_param
.
LDB
=
sparam
.
output_block_size
;
matmul_param
.
LDC
=
sparam
.
output_block_size
;
matmul_param
.
B_ptr
=
im2col_dst
;
matmul_param
.
C_ptr
=
matmul_dst
;
auto
matmul_kern
=
matmul_algo
->
get_kern_naked
(
matmul_param
);
matmul_kern
(
matmul_param
,
a_panel
,
b_panel
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_im2col
(
WorkspaceBundle
bundle
,
WorkspaceBundle
bundle_thread
,
const
StrategyParam
&
sparam
,
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
fallback
::
MatrixMulImpl
::
KernParam
matmul_param
,
fallback
::
MatrixMulImpl
::
AlgoBase
*
matmul_algo
)
{
MEGDNN_MARK_USED_VAR
(
matmul_param
);
MEGDNN_MARK_USED_VAR
(
matmul_algo
);
size_t
m_sh
=
param
.
filter_meta
.
stride
[
0
];
size_t
m_sw
=
param
.
filter_meta
.
stride
[
1
];
size_t
m_oc
=
param
.
filter_meta
.
ocpg
;
size_t
m_oh
=
param
.
osz
[
0
];
size_t
m_ow
=
param
.
osz
[
1
];
size_t
m_ic
=
param
.
filter_meta
.
icpg
;
size_t
m_ih
=
param
.
isz
[
0
]
+
param
.
filter_meta
.
padding
[
0
]
*
2
;
size_t
m_iw
=
param
.
isz
[
1
]
+
param
.
filter_meta
.
padding
[
1
]
*
2
;
size_t
m_fh
=
param
.
filter_meta
.
spatial
[
0
];
size_t
m_fw
=
param
.
filter_meta
.
spatial
[
1
];
size_t
m_is_xcorr
=
!
param
.
filter_meta
.
should_flip
;
size_t
input_offset
=
m_ih
*
m_iw
*
m_ic
*
(
sparam
.
group_id
+
param
.
filter_meta
.
group
*
sparam
.
batch_id
)
*
sizeof
(
src_ctype
);
src_ctype
*
src2
=
reinterpret_cast
<
src_ctype
*>
(
reinterpret_cast
<
uintptr_t
>
(
bundle
.
get
(
BUNDLE_PADDING_INDEX
))
+
input_offset
);
bool
is_phpwzero
=
param
.
filter_meta
.
padding
[
0
]
==
0
&&
param
.
filter_meta
.
padding
[
1
]
==
0
;
if
(
is_phpwzero
)
{
src2
=
const_cast
<
src_ctype
*>
(
param
.
src
<
src_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
}
src_ctype
*
im2col_dst
=
static_cast
<
src_ctype
*>
(
bundle_thread
.
get
(
THREAD_BUNDLE_IM2COL_INDEX
));
if
(
m_sh
==
1
&&
m_sw
==
1
)
{
if
(
m_is_xcorr
)
{
img2col
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
else
{
if
(
m_is_xcorr
)
{
img2col_stride
<
true
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
else
{
img2col_stride
<
false
>
(
src2
,
im2col_dst
,
m_oc
,
m_oh
,
m_ow
,
m_ic
,
m_ih
,
m_iw
,
m_fh
,
m_fw
,
m_sh
,
m_sw
,
sparam
.
ohw_cur_index
,
sparam
.
output_block_size
);
}
}
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
exec_postprocess
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
StrategyParam
&
sparam
,
WorkspaceBundle
bundle_thread
)
{
void
*
matmul_dst
=
get_matmul_dst_ptr
(
param
,
bundle_thread
,
sparam
);
const
bias_ctype
*
bias_ptr
=
static_cast
<
const
bias_ctype
*>
(
param
.
bias
<
bias_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
));
bias_ctype
*
bias_temp_ptr
=
static_cast
<
bias_ctype
*>
(
get_bias_temp_ptr
(
param
,
bundle_thread
));
if
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
)
{
bias_ctype
*
copy_dst
=
bias_temp_ptr
;
const
bias_ctype
*
copy_src
=
bias_ptr
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
sparam
.
oc_cur_index
;
oc
<
sparam
.
oc_end_index
;
oc
++
)
{
std
::
memcpy
(
copy_dst
,
copy_src
,
sizeof
(
bias_ctype
)
*
sparam
.
output_block_size
);
copy_dst
+=
sparam
.
output_block_size
;
copy_src
+=
sparam
.
ohw
;
}
}
PostProcess
<
op_ctype
,
op_dtype
,
postprocess_mode
>::
run
(
matmul_dst
,
const_cast
<
void
*>
(
param
.
bias_mode
==
megdnn
::
BiasMode
::
BIAS
?
bias_temp_ptr
:
static_cast
<
void
*>
(
const_cast
<
bias_ctype
*>
(
bias_ptr
+
sparam
.
oc_cur_index
))),
matmul_dst
,
param
.
bias_mode
,
param
.
nonlineMode
,
param
.
bias_type
,
param
.
dst_type
,
1
_z
,
sparam
.
output_block_oc_size
,
1
_z
,
sparam
.
output_block_size
);
copy_dst
(
param
,
matmul_dst
,
sparam
);
}
template
<
typename
src_ctype
,
typename
bias_ctype
,
typename
dst_ctype
,
typename
op_ctype
,
typename
op_dtype
,
megdnn
::
PostprocessMode
postprocess_mode
>
void
Strategy
<
src_ctype
,
bias_ctype
,
dst_ctype
,
op_ctype
,
op_dtype
,
postprocess_mode
,
PackMode
::
ONLY_PACKA
>::
copy_dst
(
const
fallback
::
ConvBiasImpl
::
NCBKernParam
&
param
,
const
void
*
matmul_dst
,
const
StrategyParam
&
sparam
)
{
if
(
!
sparam
.
skip_copy_dst
)
{
dst_ctype
*
dst_tmp_ptr
=
reinterpret_cast
<
dst_ctype
*>
(
const_cast
<
void
*>
(
matmul_dst
));
dst_ctype
*
dst
=
param
.
dst
<
dst_ctype
>
(
sparam
.
batch_id
,
sparam
.
group_id
)
+
sparam
.
oc_cur_index
*
sparam
.
ohw
+
sparam
.
ohw_cur_index
;
for
(
size_t
oc
=
0
;
oc
<
sparam
.
output_block_oc_size
;
oc
++
)
{
std
::
memcpy
(
dst
,
dst_tmp_ptr
,
sizeof
(
dst_ctype
)
*
sparam
.
output_block_size
);
dst_tmp_ptr
+=
sparam
.
output_block_size
;
dst
+=
sparam
.
ohw
;
}
}
}
#define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
_op_dtype, _postprocess_mode) \
template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, \
_op_ctype, _op_dtype, _postprocess_mode,PackMode::ONLY_PACKA>;
INSTANTIAL_CLASS
(
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
dt_float32
,
megdnn
::
PostprocessMode
::
FLOAT
)
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
__fp16
,
__fp16
,
megdnn
::
PostprocessMode
::
FLOAT
)
#else
#if !MEGDNN_DISABLE_FLOAT16
INSTANTIAL_CLASS
(
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
dt_float16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
#endif
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
//! x86 do not have uint8 matmul so only armv7 armv8 support uint8
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_uint8
,
dt_qint32
,
dt_quint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_uint8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#endif
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int8
,
dt_qint32
,
dt_qint8
,
megdnn
::
PostprocessMode
::
QUANTIZED
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_int32
,
dt_int32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int16
,
dt_int16
,
dt_int16
,
dt_int16
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
INSTANTIAL_CLASS
(
dt_int8
,
dt_int32
,
dt_int32
,
dt_qint32
,
dt_qint32
,
megdnn
::
PostprocessMode
::
NO_PROCESS
)
#undef INSTANTIAL_CLASS
}
// namespace megdnn
dnn/src/fallback/convolution/img2col_helper.h
浏览文件 @
c4dfdbd2
...
...
@@ -8,7 +8,6 @@
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include <cstddef>
#include "src/common/utils.h"
namespace
{
...
...
@@ -42,7 +41,8 @@ void img2col_stride(const dtype* __restrict src, dtype* __restrict dst,
}
}
//! add for im2col matmul multithread
//!add for im2col matmul multithread
template
<
bool
is_xcorr
,
typename
dtype
>
void
img2col_stride
(
const
dtype
*
__restrict
src
,
dtype
*
__restrict
dst
,
const
int
OC
,
const
int
OH
,
const
int
OW
,
const
int
IC
,
...
...
dnn/src/x86/elemwise_helper/kimpl/op_unary_base.h
浏览文件 @
c4dfdbd2
...
...
@@ -323,6 +323,7 @@ struct UnaryOpBase<SIMDType::NONE, dt_qint32, dt_qint8>
init
(
src_scale
,
dst_scale
);
}
};
template
<
>
struct
UnaryOpBase
<
SIMDType
::
NONE
,
dt_qint32
,
dt_quint8
>
:
OpBase
<
dt_qint32
,
dt_quint8
>
{
...
...
@@ -330,20 +331,24 @@ struct UnaryOpBase<SIMDType::NONE, dt_qint32, dt_quint8>
using
src_ctype
=
dt_qint32
;
using
dst_ctype
=
dt_quint8
;
float
scale
,
scale_src
,
scale_dst
;
void
init
(
float
src_scale
,
float
dst_scale
)
{
uint8_t
dzp
;
void
init
(
float
src_scale
,
float
dst_scale
,
uint8_t
dst_zp
)
{
scale_src
=
src_scale
;
scale_dst
=
1.
f
/
dst_scale
;
scale_dst
=
1.0
f
/
dst_scale
;
dzp
=
dst_zp
;
scale
=
src_scale
/
dst_scale
;
}
UnaryOpBase
(
DType
src_dtype
,
DType
dst_dtype
)
{
float
src_scale
=
src_dtype
.
param
<
dtype
::
QuantizedS32
>
().
scale
;
float
dst_scale
=
dst_dtype
.
param
<
dtype
::
QuantizedS8
>
().
scale
;
init
(
src_scale
,
dst_scale
);
float
dst_scale
=
dst_dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
scale
;
uint8_t
dst_zp
=
dst_dtype
.
param
<
dtype
::
Quantized8Asymm
>
().
zero_point
;
init
(
src_scale
,
dst_scale
,
dst_zp
);
}
UnaryOpBase
(
float
src_scale
,
float
dst_scale
)
{
init
(
src_scale
,
dst_scale
);
UnaryOpBase
(
float
src_scale
,
float
dst_scale
,
uint8_t
dst_zp
)
{
init
(
src_scale
,
dst_scale
,
dst_zp
);
}
};
#define OP_BASE(_simd_type, _simd_target, _simd_data_type, _func_prefix) \
template <> \
struct UnaryOpBase<_simd_type, dt_float32, dt_qint8> \
...
...
@@ -828,7 +833,6 @@ template <typename Op>
struct
UnaryQuantizationOp
<
SIMDType
::
NONE
,
dt_qint32
,
dt_quint8
,
Op
>
:
UnaryOpBase
<
SIMDType
::
NONE
,
dt_qint32
,
dt_quint8
>
{
using
UnaryOpBase
<
SIMDType
::
NONE
,
dt_qint32
,
dt_quint8
>::
UnaryOpBase
;
constexpr
static
size_t
SIMD_WIDTH
=
8
;
Op
op
;
void
operator
()(
const
dt_qint32
&
src
,
dt_quint8
*
dst
)
const
{
...
...
dnn/src/x86/matrix_mul/algos.cpp
浏览文件 @
c4dfdbd2
...
...
@@ -195,10 +195,10 @@ MatrixMulImpl::kern_t MatrixMulImpl::AlgoInt8x8x32Vnni::get_kern(
return
int8x8x32_kern_vnni
;
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_
PACKA
(
AlgoInt8x8x32Vnni
,
megdnn_x86_matmul_kern
,
5
,
x86
::
matmul
::
gemm_int8_vnni_12x32x4
,
dt_int8
,
dt_int32
,
dt_uint8
);
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_
DETAIL
(
AlgoInt8x8x32Vnni
,
megdnn_x86_matmul_kern
,
5
,
x86
::
matmul
::
gemm_int8_vnni_12x32x4
,
dt_int8
,
dt_int32
,
dt_uint8
);
#endif
/* ===================== Int8 mkldnn algo ===================== */
...
...
@@ -364,7 +364,9 @@ size_t MatrixMulImpl::AlgoInt8x8x32AVX2M4N16K2::get_workspace(
m
,
n
,
k
,
trans_a
,
trans_b
,
strategy
,
cacheline
)
.
get_workspace_size
();
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL
(
AlgoInt8x8x32AVX2M4N16K2
,
megdnn_x86_matmul_kern
,
8
,
x86
::
matmul
::
gemm_avx2_s8s8s32_4x16x2
,
dt_int8
,
dt_int32
,
dt_int16
);
MatrixMulImpl
::
kern_t
MatrixMulImpl
::
AlgoInt8x8x32AVX2M2N4K16
::
get_kern
(
const
KernSizeParam
&
)
const
{
...
...
@@ -437,6 +439,10 @@ size_t MatrixMulImpl::AlgoInt8x8x32SSEM4N8K2::get_workspace(
.
get_workspace_size
();
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL
(
AlgoInt8x8x32SSEM4N8K2
,
megdnn_x86_matmul_kern
,
9
,
x86
::
matmul
::
gemm_sse_s8s8s32_4x8x2
,
dt_int8
,
dt_int32
,
dt_int16
);
/*************************AlgoF32MK8_8x8********************/
MatrixMulImpl
::
kern_t
MatrixMulImpl
::
AlgoF32MK8_8x8
::
get_kern
(
const
KernSizeParam
&
)
const
{
...
...
dnn/src/x86/matrix_mul/algos.h
浏览文件 @
c4dfdbd2
...
...
@@ -68,7 +68,7 @@ public:
size_t
get_workspace
(
const
KernSizeParam
&
)
const
override
;
kern_t
get_kern
(
const
KernSizeParam
&
)
const
override
;
void
*
type
()
const
override
{
return
sm_x86_algo_type
;
}
PackMode
packmode
()
const
override
{
return
PackMode
::
NO_PACK
;
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL
();
};
class
MatrixMulImpl
::
AlgoInt8x8x32SSEM4N8K2
:
public
AlgoBase
{
...
...
@@ -79,7 +79,7 @@ public:
size_t
get_workspace
(
const
KernSizeParam
&
)
const
override
;
kern_t
get_kern
(
const
KernSizeParam
&
)
const
override
;
void
*
type
()
const
override
{
return
sm_x86_algo_type
;
}
PackMode
packmode
()
const
override
{
return
PackMode
::
NO_PACK
;
}
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL
();
};
class
MatrixMulImpl
::
AlgoF32MK8_8x8
:
public
AlgoBase
{
...
...
dnn/test/x86/conv_bias.cpp
浏览文件 @
c4dfdbd2
...
...
@@ -741,7 +741,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) {
TensorShape
{
oc
,
ic
,
kernel
,
kernel
},
TensorShape
{});
};
for
(
size_t
kernel
:
{
1
,
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
...
...
@@ -751,7 +751,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) {
run
(
oc
,
ic
,
size
,
size
,
kernel
,
p
,
nonline_mode
);
}
//! test OC block
run
(
2046
,
1
,
8
,
8
,
1
,
0
,
NonlineMode
::
IDENTITY
);
run
(
2046
,
1
,
8
,
8
,
2
,
0
,
NonlineMode
::
IDENTITY
);
Checker
<
ConvBias
>
checker
(
handle
());
UniformIntRNG
rng
{
-
50
,
50
};
...
...
@@ -826,7 +826,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32) {
(
w
+
2
*
p
-
kernel
)
/
param
.
stride_w
+
1
});
};
for
(
size_t
kernel
:
{
1
,
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
,
300
})
for
(
size_t
p
:
{
0
,
2
})
...
...
@@ -895,7 +895,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) {
(
w
+
2
*
param
.
pad_w
-
kernel
)
/
1
+
1
});
};
for
(
size_t
kernel
:
{
1
,
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
,
16
})
for
(
size_t
p
:
{
0
,
1
})
...
...
@@ -945,7 +945,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) {
TensorShape
{
1
,
oc
,
1
,
1
});
};
for
(
size_t
kernel
:
{
1
,
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
kernel
:
{
2
,
3
,
4
,
5
,
6
,
7
})
for
(
size_t
ic
:
{
1
,
4
,
8
,
16
})
for
(
size_t
oc
:
{
1
,
4
,
8
})
for
(
size_t
p
:
{
0
,
2
})
...
...
@@ -2183,7 +2183,7 @@ TEST_F(X86_BENCHMARK_MULTI_THREADS, BENCHMARK_CONVBIAS_IM2COL_INT8X8X32) {
std
::
vector
<
DType
>
data_type
=
{
dtype
::
Int8
(),
dtype
::
Int8
(),
dtype
::
Int32
(),
dtype
::
Int32
()};
std
::
string
algo_name
=
"IM2COLMATMUL:X86_INT8X8X32_AVX2_4X16X2"
;
std
::
string
algo_name
=
"IM2COLMATMUL:X86_INT8X8X32_AVX2_4X16X2
:192
"
;
// std::string algo_name = "IM2COLMATMUL:X86_INT8X8X32_AVX2_2X4X16";
// printf("Benchmark IM2COLMATMUL:X86_INT8X8X32_AVX2_4X16X2 algo\n");
benchmark_impl
(
param
,
shapes_and_computation
,
algo_name
,
RUNS
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录