Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
2aba0378
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2aba0378
编写于
7月 08, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb/dnn): fix group conv is_available
GitOrigin-RevId: b2799091689336cfc626315885f3d296fd13e70f
上级
4a92346b
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
248 addition
and
277 deletion
+248
-277
dnn/src/common/algo_chooser.h
dnn/src/common/algo_chooser.h
+15
-0
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+0
-7
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
+90
-81
dnn/src/cuda/conv_bias/group_conv.cpp
dnn/src/cuda/conv_bias/group_conv.cpp
+27
-19
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+4
-5
dnn/src/cuda/convolution/backward_data/group_conv.cpp
dnn/src/cuda/convolution/backward_data/group_conv.cpp
+6
-14
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
+6
-14
dnn/src/cuda/convolution/opr_impl.cpp
dnn/src/cuda/convolution/opr_impl.cpp
+8
-12
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
+6
-15
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
+7
-14
dnn/src/cuda/convolution3d/forward/group_conv.cpp
dnn/src/cuda/convolution3d/forward/group_conv.cpp
+6
-16
dnn/src/cuda/convolution3d/opr_impl.cpp
dnn/src/cuda/convolution3d/opr_impl.cpp
+12
-19
dnn/src/cuda/cudnn_wrapper.cpp
dnn/src/cuda/cudnn_wrapper.cpp
+1
-1
dnn/src/cuda/relayout_format/opr_impl.cpp
dnn/src/cuda/relayout_format/opr_impl.cpp
+1
-1
dnn/src/cuda/relayout_format/relayout_format.cpp
dnn/src/cuda/relayout_format/relayout_format.cpp
+35
-2
dnn/src/cuda/relayout_format/relayout_format.cu
dnn/src/cuda/relayout_format/relayout_format.cu
+0
-22
dnn/src/cuda/relayout_format/relayout_format.cuh
dnn/src/cuda/relayout_format/relayout_format.cuh
+0
-3
dnn/src/cuda/relayout_format/relayout_format.h
dnn/src/cuda/relayout_format/relayout_format.h
+3
-1
dnn/test/common/accuracy_shake_checker.h
dnn/test/common/accuracy_shake_checker.h
+3
-3
dnn/test/cuda/accuracy_shake.cpp
dnn/test/cuda/accuracy_shake.cpp
+6
-23
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+5
-5
dnn/test/cuda/relayout_format.cpp
dnn/test/cuda/relayout_format.cpp
+7
-0
未找到文件。
dnn/src/common/algo_chooser.h
浏览文件 @
2aba0378
...
...
@@ -74,6 +74,21 @@ std::vector<typename Opr::Algorithm*> get_all_algorithms(
return
ret
;
}
/*!
* \brief whether there is an algorithm from algo_pack() that is available for
* current size
*/
template
<
class
Opr
>
bool
has_available_algo
(
const
typename
Opr
::
AlgoBase
::
SizeArgs
&
args
)
{
for
(
auto
i
:
Opr
::
algo_pack
().
all_algos
)
{
if
(
i
->
is_available
(
args
))
{
return
true
;
}
}
return
false
;
}
/*!
* \brief a helper function to get an algorithm match attribute. If require a
* algorithm with specified attribute, and the given algorithm match that
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
2aba0378
...
...
@@ -454,8 +454,6 @@ public:
return
AlgoAttribute
::
REPRODUCIBLE
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
dst_pg
,
TensorLayout
&
bias_pg
);
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
private:
...
...
@@ -578,11 +576,6 @@ public:
const
OperatorBase
*
opr
)
const
override
;
private:
void
make_inner_layout
(
const
SizeArgs
&
args
,
TensorLayout
&
inner_src_layout
,
TensorLayout
&
inner_weight_layout
,
TensorLayout
&
inner_dst_layout
,
TensorLayout
&
inner_bias_layout
,
TensorLayout
&
inner_z_layout
)
const
;
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
...
...
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
浏览文件 @
2aba0378
...
...
@@ -14,6 +14,7 @@
#include "src/cuda/conv_bias/algo.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/relayout_format/opr_impl.h"
#include "src/cuda/relayout_format/relayout_format.h"
#include "src/cuda/utils.h"
using
namespace
megdnn
;
...
...
@@ -37,18 +38,21 @@ inline void deduce_reformat_layout(std::unique_ptr<RelayoutFormat>& relayout,
dst_layout
=
src_layout
;
}
}
}
// namespace
void
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
make_inner_layout
(
const
SizeArgs
&
args
,
TensorLayout
&
inner_src_layout
,
TensorLayout
&
inner_weight_layout
,
TensorLayout
&
inner_dst_layout
,
TensorLayout
&
inner_bias_layout
,
TensorLayout
&
inner_z_layout
)
const
{
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
sub_opr_config
(
const
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
TensorLayout
inner_src_layout
;
TensorLayout
inner_filter_layout
;
TensorLayout
inner_bias_layout
;
TensorLayout
inner_z_layout
;
TensorLayout
inner_dst_layout
;
auto
relayout_src
=
args
.
handle
->
create_operator
<
RelayoutFormat
>
();
deduce_reformat_layout
(
relayout_src
,
*
args
.
src_layout
,
inner_src_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
filter_layout
,
inner_
weight
_layout
,
inner_
filter
_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4_WEIGHT
);
bool
dst_float
=
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
if
(
dst_float
)
{
...
...
@@ -67,7 +71,32 @@ void ConvBiasForwardImpl::AlgoFallbackNCHWQS8::make_inner_layout(
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
}
};
megdnn
::
param
::
ConvBias
inner_conv_param
=
args
.
opr
->
param
();
if
(
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
)
{
inner_conv_param
.
format
=
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4_NCHW
;
}
else
{
inner_conv_param
.
format
=
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4
;
}
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
ret
;
ret
.
first
=
{
inner_src_layout
,
inner_filter_layout
,
inner_bias_layout
,
inner_z_layout
,
inner_dst_layout
};
ret
.
second
=
inner_conv_param
;
return
ret
;
}
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
ConvBiasForward
>>
prepare_sub_opr
(
const
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
convbias_opr
=
args
.
handle
->
create_operator
<
ConvBias
>
();
set_execution_policy
<
ConvBiasForward
,
ConvBiasForward
*>
(
args
.
opr
,
convbias_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
convbias_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
convbias_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
get_subopr_list
(
...
...
@@ -75,28 +104,12 @@ ConvBiasForwardImpl::AlgoFallbackNCHWQS8::get_subopr_list(
const
ConvBiasForwardImpl
*
o
=
static_cast
<
const
ConvBiasForwardImpl
*>
(
opr
);
SizeArgs
args
(
const_cast
<
ConvBiasForwardImpl
*>
(
o
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
],
layouts
[
3
],
layouts
[
4
],
nullptr
);
TensorLayout
inner_src_layout
;
TensorLayout
inner_weight_layout
;
TensorLayout
inner_dst_layout
;
TensorLayout
inner_bias_layout
;
TensorLayout
inner_z_layout
;
make_inner_layout
(
args
,
inner_src_layout
,
inner_weight_layout
,
inner_dst_layout
,
inner_bias_layout
,
inner_z_layout
);
Param
inner_conv_param
=
o
->
param
();
if
(
layouts
[
4
].
dtype
.
enumv
()
==
DTypeEnum
::
Float32
)
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4_NCHW
;
}
else
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4
;
}
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
inner_conv_param
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVBIAS_FORWARD
,
param_str
,
{
inner_src_layout
,
inner_weight_layout
,
inner_bias_layout
,
inner_z_layout
,
inner_dst_layout
}}};
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVBIAS_FORWARD
,
param_str
,
config
.
first
}};
}
bool
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
is_available
(
...
...
@@ -115,39 +128,46 @@ bool ConvBiasForwardImpl::AlgoFallbackNCHWQS8::is_available(
args
.
bias_layout
->
shape
[
2
]
==
1
&&
args
.
bias_layout
->
shape
[
3
]
==
1
);
bool
is_ok
=
is_format_ok
&&
is_version_ok
&&
is_dtype_ok
&&
is_bias_ok
;
return
is_ok
;
if
(
!
is_ok
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
ConvBiasForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
]};
bool
is_relayout_ok
=
true
;
if
(
args
.
dst_layout
->
dtype
.
enumv
()
!=
DTypeEnum
::
Float32
)
{
is_relayout_ok
=
relayout_format
::
RelayoutFormatFast
::
usable
(
config
.
first
[
4
],
*
args
.
dst_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW4_NCHW
);
}
return
is_relayout_ok
&&
has_available_algo
<
ConvBiasForwardImpl
>
(
sub_args
);
}
WorkspaceBundle
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
TensorLayout
inner_src_layout
;
TensorLayout
inner_weight_layout
;
TensorLayout
inner_dst_layout
;
TensorLayout
inner_bias_layout
;
TensorLayout
inner_z_layout
;
make_inner_layout
(
args
,
inner_src_layout
,
inner_weight_layout
,
inner_dst_layout
,
inner_bias_layout
,
inner_z_layout
);
Param
inner_conv_param
=
args
.
opr
->
param
();
auto
config
=
prepare_sub_opr
(
args
);
size_t
ws_dst
=
0
,
ws_bias
=
0
,
ws_z
=
0
;
if
(
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
)
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4_NCHW
;
}
else
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4
;
ws_dst
=
inner_dst_layout
.
span
().
dist_byte
();
ws_bias
=
inner_bias_layout
.
span
().
dist_byte
();
ws_z
=
inner_z_layout
.
span
().
dist_byte
();
if
(
args
.
dst_layout
->
dtype
.
enumv
()
!=
DTypeEnum
::
Float32
)
{
ws_bias
=
config
.
first
[
2
].
span
().
dist_byte
();
ws_z
=
config
.
first
[
3
].
span
().
dist_byte
();
ws_dst
=
config
.
first
[
4
].
span
().
dist_byte
();
}
auto
opr
=
args
.
handle
->
create_operator
<
ConvBiasForward
>
();
opr
->
param
()
=
inner_conv_param
;
set_execution_policy
<
ConvBiasForward
,
ConvBiasForward
*>
(
args
.
opr
,
opr
.
get
());
return
WorkspaceBundle
(
ptr
,
{
inner_src_layout
.
span
().
dist_byte
(),
inner_weight_layout
.
span
().
dist_byte
(),
ws_dst
,
ws_bias
,
ws_z
,
opr
->
get_workspace_in_bytes
(
inner_src_layout
,
inner_weight_layout
,
inner_bias_layout
,
inner_z_layout
,
inner_dst_layout
,
nullptr
)});
size_t
inner_ws
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
],
nullptr
);
return
WorkspaceBundle
(
ptr
,
{
config
.
first
[
0
].
span
().
dist_byte
(),
config
.
first
[
1
].
span
().
dist_byte
(),
ws_bias
,
ws_z
,
ws_dst
,
inner_ws
});
}
size_t
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
get_workspace_in_bytes
(
...
...
@@ -177,44 +197,33 @@ void ConvBiasForwardImpl::AlgoFallbackNCHWQS8::exec(
relayout_nchw4_nchw
->
param
()
=
nchw4_nchw_trans
;
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
TensorLayout
inner_src_layout
;
TensorLayout
inner_weight_layout
;
TensorLayout
inner_dst_layout
;
TensorLayout
inner_bias_layout
;
TensorLayout
inner_z_layout
;
make_inner_layout
(
args
,
inner_src_layout
,
inner_weight_layout
,
inner_dst_layout
,
inner_bias_layout
,
inner_z_layout
);
TensorND
inner_src
(
bundle
.
get
(
0
),
inner_src_layout
);
TensorND
inner_weight
(
bundle
.
get
(
1
),
inner_weight_layout
);
TensorND
inner_dst
(
bundle
.
get
(
2
),
inner_dst_layout
);
TensorND
inner_bias
(
bundle
.
get
(
3
),
inner_bias_layout
);
TensorND
inner_z
(
bundle
.
get
(
4
),
inner_z_layout
);
bool
dst_float
=
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
auto
config
=
prepare_sub_opr
(
args
);
TensorND
inner_src
(
bundle
.
get
(
0
),
config
.
first
[
0
]);
TensorND
inner_weight
(
bundle
.
get
(
1
),
config
.
first
[
1
]);
TensorND
inner_bias
(
bundle
.
get
(
2
),
config
.
first
[
2
]);
TensorND
inner_z
(
bundle
.
get
(
3
),
config
.
first
[
3
]);
TensorND
inner_dst
(
bundle
.
get
(
4
),
config
.
first
[
4
]);
Param
inner_conv_param
=
args
.
opr
->
param
();
inner_conv_param
.
format
=
dst_float
?
Param
::
Format
::
NCHW4_NCHW
:
Param
::
Format
::
NCHW4
;
auto
inner_opr
=
args
.
handle
->
create_operator
<
ConvBiasForward
>
();
inner_opr
->
param
()
=
inner_conv_param
;
set_execution_policy
<
ConvBiasForward
,
ConvBiasForward
*>
(
args
.
opr
,
inner_opr
.
get
());
bool
dst_float
=
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
relayout_nchw_nchw4
->
exec
(
*
args
.
src_tensor
,
inner_src
,
{});
relayout_weight
->
exec
(
*
args
.
filter_tensor
,
inner_weight
,
{});
if
(
dst_float
)
{
inner_opr
->
exec
(
inner_src
,
inner_weight
,
*
args
.
bias_tensor
,
*
args
.
z_tensor
,
*
args
.
dst_tensor
,
nullptr
,
config
.
second
->
exec
(
inner_src
,
inner_weight
,
*
args
.
bias_tensor
,
*
args
.
z_tensor
,
*
args
.
dst_tensor
,
nullptr
,
Workspace
((
dt_byte
*
)
bundle
.
get
(
5
),
bundle
.
get_size
(
5
)));
}
else
{
if
(
inner_bias
_
layout
.
ndim
>
0
)
{
if
(
inner_bias
.
layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
bias_tensor
,
inner_bias
,
{});
}
if
(
inner_z
_
layout
.
ndim
>
0
)
{
if
(
inner_z
.
layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
z_tensor
,
inner_z
,
{});
}
inner_opr
->
exec
(
inner_src
,
inner_weight
,
inner_bias
,
inner_z
,
inner_dst
,
config
.
second
->
exec
(
inner_src
,
inner_weight
,
inner_bias
,
inner_z
,
inner_dst
,
nullptr
,
Workspace
((
dt_byte
*
)
bundle
.
get
(
5
),
bundle
.
get_size
(
5
)));
relayout_nchw4_nchw
->
exec
(
inner_dst
,
*
args
.
dst_tensor
,
{});
...
...
dnn/src/cuda/conv_bias/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -21,20 +21,7 @@ namespace {
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
sub_opr_config
(
const
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
TensorLayout
src_pg
=
*
args
.
src_layout
;
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
filter_pg
=
*
args
.
filter_layout
;
TensorLayout
bias_pg
=
*
args
.
bias_layout
;
TensorLayout
z_pg
=
*
args
.
z_layout
;
TensorLayout
dst_pg
=
*
args
.
dst_layout
;
...
...
@@ -50,6 +37,8 @@ std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
"invalid conv format"
);
c_pos
=
3
;
}
filter_pg
.
remove_axis_inplace
(
0
);
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
bias_pg
.
ndim
=
0
;
dst_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -107,10 +96,27 @@ bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available(
param
.
format
==
param
::
ConvBias
::
Format
::
NCHW32
)
return
false
;
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
ConvBiasForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
]);
auto
dst_layout
=
*
args
.
dst_layout
;
if
(
dst_layout
.
dtype
.
enumv
()
!=
args
.
bias_layout
->
dtype
.
enumv
())
{
dst_layout
.
dtype
=
DType
();
args
.
opr
->
check_or_deduce_dtype_fwd
(
args
.
src_layout
->
dtype
,
args
.
filter_layout
->
dtype
,
dst_layout
.
dtype
);
}
auto
conv_args
=
args
;
conv_args
.
dst_layout
=
&
dst_layout
;
auto
config
=
prepare_sub_opr
(
conv_args
);
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
ConvBiasForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
]};
bool
ret
=
has_available_algo
<
ConvBiasForwardImpl
>
(
sub_args
);
return
ret
;
}
WorkspaceBundle
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
...
...
@@ -125,7 +131,9 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
sizes
.
push_back
(
dst_layout
.
span
().
dist_byte
());
}
auto
config
=
prepare_sub_opr
(
args
);
auto
conv_args
=
args
;
conv_args
.
dst_layout
=
&
dst_layout
;
auto
config
=
prepare_sub_opr
(
conv_args
);
size_t
mm_ws
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
],
nullptr
);
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
2aba0378
...
...
@@ -197,11 +197,10 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvBiasForwardImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
)){
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
if
(
sm_algo_pack
.
fallback_nchw_qs8
.
is_available_attribute
(
...
...
dnn/src/cuda/convolution/backward_data/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -19,21 +19,11 @@ using namespace convolution;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution
::
Param
>
sub_opr_config
(
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
diff_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
filter_pg
=
*
args
.
filter_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
TensorLayout
grad_pg
=
*
args
.
grad_layout
;
filter_pg
.
remove_axis_inplace
(
0
);
auto
nr_grp
=
args
.
filter_meta
.
group
;
size_t
c_pos
=
1
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -92,9 +82,11 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
ConvolutionBackwardDataImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]};
return
has_available_algo
<
ConvolutionBackwardDataImpl
>
(
sub_args
);
}
WorkspaceBundle
...
...
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -18,21 +18,11 @@ using namespace convolution;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution
::
Param
>
sub_opr_config
(
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
grad_layout
->
ndim
>
args
.
diff_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
grad_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
grad_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
grad_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
grad_layout
->
dtype
,
args
.
grad_layout
->
format
);
TensorLayout
filter_pg
=
*
args
.
grad_layout
;
TensorLayout
src_pg
=
*
args
.
src_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
filter_pg
.
remove_axis_inplace
(
0
);
auto
nr_grp
=
args
.
grad_filter_meta
.
group
;
size_t
c_pos
=
1
;
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -88,9 +78,11 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
ConvolutionBackwardFilterImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]};
return
has_available_algo
<
ConvolutionBackwardFilterImpl
>
(
sub_args
);
}
WorkspaceBundle
...
...
dnn/src/cuda/convolution/opr_impl.cpp
浏览文件 @
2aba0378
...
...
@@ -173,12 +173,10 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvolutionBackwardDataImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
if
(
args
.
filter_layout
->
dtype
.
enumv
()
!=
...
...
@@ -302,12 +300,10 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvolutionBackwardFilterImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
if
(
args
.
src_layout
->
dtype
.
enumv
()
!=
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
...
...
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -18,22 +18,11 @@ using namespace convolution3d;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardDataImpl
::
Param
>
sub_opr_config
(
const
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
grad_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
filter_pg
=
*
args
.
filter_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
TensorLayout
grad_pg
=
*
args
.
grad_layout
;
filter_pg
.
remove_axis_inplace
(
0
);
auto
nr_grp
=
args
.
filter_meta
.
group
;
size_t
c_pos
=
1
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -84,9 +73,11 @@ bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available(
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
Convolution3DBackwardDataImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]};
return
has_available_algo
<
Convolution3DBackwardDataImpl
>
(
sub_args
);
}
WorkspaceBundle
...
...
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -19,21 +19,12 @@ namespace {
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardFilterImpl
::
Param
>
sub_opr_config
(
const
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
grad_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
grad_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
grad_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
grad_layout
->
stride
[
idx
]);
}
TensorLayout
grad_pg
(
flt_shape
,
flt_stride
,
args
.
grad_layout
->
dtype
,
args
.
grad_layout
->
format
);
TensorLayout
grad_pg
=
*
args
.
grad_layout
;
TensorLayout
src_pg
=
*
args
.
src_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
grad_pg
.
remove_axis_inplace
(
0
);
auto
nr_grp
=
args
.
grad_filter_meta
.
group
;
size_t
c_pos
=
1
;
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -86,9 +77,11 @@ bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
Convolution3DBackwardFilterImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]};
return
has_available_algo
<
Convolution3DBackwardFilterImpl
>
(
sub_args
);
}
WorkspaceBundle
...
...
dnn/src/cuda/convolution3d/forward/group_conv.cpp
浏览文件 @
2aba0378
...
...
@@ -19,20 +19,7 @@ namespace {
std
::
pair
<
TensorLayoutArray
,
Convolution3DForwardImpl
::
Param
>
sub_opr_config
(
const
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
TensorLayout
src_pg
=
*
args
.
src_layout
;
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
filter_pg
=
*
args
.
filter_layout
;
TensorLayout
dst_pg
=
*
args
.
dst_layout
;
auto
nr_grp
=
args
.
filter_meta
.
group
;
...
...
@@ -45,6 +32,7 @@ std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> sub_opr_config(
"invalid conv format"
);
c_pos
=
4
;
}
filter_pg
.
remove_axis_inplace
(
0
);
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
dst_pg
.
shape
[
c_pos
]
/=
nr_grp
;
...
...
@@ -92,9 +80,11 @@ bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available(
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
AlgoBase
::
SizeArgs
sub_args
{
static_cast
<
Convolution3DForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]};
return
has_available_algo
<
Convolution3DForwardImpl
>
(
sub_args
);
}
WorkspaceBundle
...
...
dnn/src/cuda/convolution3d/opr_impl.cpp
浏览文件 @
2aba0378
...
...
@@ -89,13 +89,10 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DForwardImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DForwardImpl
>
(
...
...
@@ -189,12 +186,10 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardDataImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardDataImpl
>
(
...
...
@@ -272,12 +267,10 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardFilterImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
group
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
group
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardFilterImpl
>
(
...
...
dnn/src/cuda/cudnn_wrapper.cpp
浏览文件 @
2aba0378
...
...
@@ -467,7 +467,7 @@ CudnnAlgoPack::conv_bwd_data_algos() {
DEF_ALGO
(
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
,
true
,
true
),
DEF_ALGO
(
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
,
true
,
true
),
#if CUDNN_MAJOR >= 5
DEF_ALGO
(
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD
,
true
,
fals
e
),
DEF_ALGO
(
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD
,
true
,
tru
e
),
#if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1
DEF_ALGO
(
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
,
true
,
false
),
#endif
...
...
dnn/src/cuda/relayout_format/opr_impl.cpp
浏览文件 @
2aba0378
...
...
@@ -94,7 +94,7 @@ void RelayoutFormatImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
param
().
mode
==
Param
::
Mode
::
NCHW_NCHW4_WEIGHT
;
if
(
is_trans_4bits
||
is_nchw_nchw4
)
{
bool
is_usable
=
relayout_format
::
RelayoutFormatFast
::
usable
(
src
.
layout
,
dst
.
layout
);
src
.
layout
,
dst
.
layout
,
param
().
mode
);
megdnn_assert
(
is_usable
,
"RelayoutFormatFast kernel is not usable for "
"transforming %s(%s) to %s(%s)."
,
...
...
dnn/src/cuda/relayout_format/relayout_format.cpp
浏览文件 @
2aba0378
...
...
@@ -12,6 +12,9 @@
#include "src/cuda/relayout_format/relayout_format.cuh"
#include "src/cuda/relayout_format/relayout_format.h"
#include "src/common/utils.h"
#include "megdnn/dtype.h"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -35,8 +38,38 @@ inline void get_scale_zeropoint(const DType& tensor_dtype, float& scale,
}
// namespace
bool
relayout_format
::
RelayoutFormatFast
::
usable
(
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
dst_layout
)
{
return
relayout_format_cuda_usable
(
src_layout
,
dst_layout
);
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
dst_layout
,
const
RelayoutFormat
::
Param
::
Mode
&
mode
)
{
bool
is_all_continue
=
src_layout
.
is_contiguous
()
&&
dst_layout
.
is_contiguous
();
bool
is_all_int32
=
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS32
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS32
);
bool
is_all_int8
=
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Uint8
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
||
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
||
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
);
bool
is_all_int4
=
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS4
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS4
)
||
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized4Asymm
&&
dst_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized4Asymm
);
bool
is_nchw4_nchw_ok
=
true
;
if
(
mode
==
RelayoutFormat
::
Param
::
Mode
::
NCHW4_NCHW
)
{
is_nchw4_nchw_ok
=
(
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
||
src_layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
&&
src_layout
.
dtype
==
dst_layout
.
dtype
;
}
return
is_all_continue
&&
(
is_all_int32
||
is_all_int8
||
is_all_int4
)
&&
is_nchw4_nchw_ok
;
}
void
relayout_format
::
RelayoutFormatFast
::
exec
(
const
TensorND
&
src
,
...
...
dnn/src/cuda/relayout_format/relayout_format.cu
浏览文件 @
2aba0378
...
...
@@ -461,28 +461,6 @@ void relayout_format::relayout_format_cuda_nchw_nchwx(
}
}
bool
relayout_format
::
relayout_format_cuda_usable
(
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
dst_layout
)
{
bool
is_all_continue
=
src_layout
.
is_contiguous
()
&&
dst_layout
.
is_contiguous
();
bool
is_all_int32
=
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS32
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS32
);
bool
is_all_int8
=
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
Uint8
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS8
)
||
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
Quantized8Asymm
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS8
)
||
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS8
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS8
);
bool
is_all_int4
=
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS4
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
QuantizedS4
)
||
(
src_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
Quantized4Asymm
&&
dst_layout
.
dtype
.
enumv
().
ev
==
DTypeEnum
::
Ev
::
Quantized4Asymm
);
return
is_all_continue
&&
(
is_all_int32
||
is_all_int8
||
is_all_int4
);
}
void
relayout_format
::
relayout_format_cuda_nchwx_nchw
(
const
TensorND
&
src
,
const
TensorND
&
dst
,
const
cudaStream_t
&
stream
,
const
float
src_scale
,
const
float
dst_scale
,
...
...
dnn/src/cuda/relayout_format/relayout_format.cuh
浏览文件 @
2aba0378
...
...
@@ -25,9 +25,6 @@ void relayout_format_cuda_nchw_nchwx(
const
uint8_t
src_zero_point
=
0
,
const
uint8_t
dst_zero_point
=
0
,
const
int
group
=
1
);
bool
relayout_format_cuda_usable
(
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
dst_layout
);
void
relayout_format_cuda_nchw4_nchw
(
const
TensorND
&
src
,
const
TensorND
&
dst
,
const
cudaStream_t
&
stream
,
const
int
group
);
...
...
dnn/src/cuda/relayout_format/relayout_format.h
浏览文件 @
2aba0378
...
...
@@ -22,7 +22,9 @@ namespace relayout_format {
struct
RelayoutFormatFast
{
static
bool
usable
(
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
dst_layout
);
const
TensorLayout
&
dst_layout
,
const
RelayoutFormat
::
Param
::
Mode
&
mode
=
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
);
static
void
exec
(
const
TensorND
&
src
,
const
TensorND
&
dst
,
cudaStream_t
stream
,
RelayoutFormat
::
Param
::
Mode
mode
,
int
group
);
...
...
dnn/test/common/accuracy_shake_checker.h
浏览文件 @
2aba0378
...
...
@@ -164,9 +164,9 @@ public:
}
std
::
vector
<
Algorithm
::
Info
::
Desc
>
ret
;
megdnn_assert
(
layouts
.
size
()
==
OprTrait
<
Opr
>::
arity
);
for
(
auto
algo_info
:
AlgoProxy
<
Opr
,
OprTrait
<
Opr
>::
arity
>::
get_all_algorithms_info
(
opr
,
layouts
)
)
{
auto
vec
=
AlgoProxy
<
Opr
,
OprTrait
<
Opr
>::
arity
>::
get_all_algorithms_info
(
opr
,
layouts
);
for
(
auto
algo_info
:
vec
)
{
if
(
!
(
algo_info
.
attribute
&
AlgoAttribute
::
ACCURACY_DEPEND_ON_BATCH
)
&&
(
algo_info
.
attribute
&
AlgoAttribute
::
REPRODUCIBLE
)
&&
...
...
dnn/test/cuda/accuracy_shake.cpp
浏览文件 @
2aba0378
...
...
@@ -40,16 +40,8 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) {
{
64
,
64
,
30
,
30
},
{}});
ConvBias
::
Param
param
;
// group
param
.
sparse
=
ConvBias
::
Param
::
Sparse
::
GROUP
;
checker
.
set_param
(
param
);
checker
.
exec
({{
64
,
16
,
32
,
32
},
{
2
,
32
,
8
,
3
,
3
},
{},
{},
{}});
checker
.
exec
({{
64
,
16
,
32
,
32
},
{
2
,
32
,
8
,
3
,
3
},
{
1
,
64
,
1
,
1
},
{},
{}});
checker
.
exec
({{
64
,
16
,
32
,
32
},
{
2
,
32
,
8
,
3
,
3
},
{
1
,
64
,
1
,
1
},
{
64
,
64
,
30
,
30
},
{}});
// FIXME currently group conv cannot get the attribute of it's subopr, so we
// just ignore group conv here.
}
TEST_F
(
CUDA
,
SHAKE_CONV_BIAS_FORWARD_QS8_NCHW
)
{
...
...
@@ -248,15 +240,10 @@ TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) {
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
default_rng
)
.
set_rng
(
1
,
&
default_rng
);
// ConvolutionBackwardData
checker
.
exec
({{
8
,
16
,
3
,
3
},
{
64
,
8
,
5
,
5
},
{
64
,
16
,
7
,
7
}});
// group
ConvolutionBackwardData
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
checker
.
set_param
(
param
);
checker
.
exec
({{
2
,
16
,
32
,
3
,
3
},
{
2
,
32
,
5
,
5
},
{
2
,
64
,
7
,
7
}});
checker
.
exec
({{
2
,
8
,
32
,
3
,
3
},
{
64
,
16
,
19
,
19
},
{
64
,
64
,
21
,
21
}});
// FIXME currently group conv cannot get the attribute of it's subopr, so we
// just ignore group conv here.
}
TEST_F
(
CUDA
,
SHAKE_CONVOLUTION_BACKWARD_FILTER
)
{
...
...
@@ -266,14 +253,10 @@ TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) {
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
default_rng
)
.
set_rng
(
1
,
&
default_rng
);
// ConvolutionBackwardFilter
checker
.
exec
({{
2
,
64
,
7
,
7
},
{
2
,
32
,
5
,
5
},
{
32
,
64
,
3
,
3
}});
// group
ConvolutionBackwardFilter
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
checker
.
set_param
(
param
);
checker
.
exec
({{
2
,
64
,
7
,
7
},
{
2
,
32
,
5
,
5
},
{
2
,
16
,
32
,
3
,
3
}});
// FIXME currently group conv cannot get the attribute of it's subopr, so we
// just ignore group conv here.
}
}
// namespace test
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
2aba0378
...
...
@@ -226,11 +226,11 @@ TEST_F(CUDA, CONV_BIAS_NCHW_QS8) {
ConvBias
::
Param
param
;
param
.
format
=
ConvBias
::
Param
::
Format
::
NCHW
;
checker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
(
2.5
f
))
.
set_dtype
(
1
,
dtype
::
QuantizedS8
(
2.5
f
))
.
set_dtype
(
2
,
dtype
::
QuantizedS32
(
6.25
f
))
.
set_dtype
(
3
,
dtype
::
QuantizedS8
(
0.25
f
))
.
set_dtype
(
4
,
dtype
::
QuantizedS8
(
0.25
f
))
checker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
(
1.
f
))
.
set_dtype
(
1
,
dtype
::
QuantizedS8
(
1.
f
))
.
set_dtype
(
2
,
dtype
::
QuantizedS32
(
1.
f
))
.
set_dtype
(
3
,
dtype
::
QuantizedS8
(
1.
f
))
.
set_dtype
(
4
,
dtype
::
QuantizedS8
(
1.
f
))
.
set_rng
(
0
,
&
int_rng
)
.
set_rng
(
1
,
&
int_rng
)
.
set_rng
(
2
,
&
int_rng
)
...
...
dnn/test/cuda/relayout_format.cpp
浏览文件 @
2aba0378
...
...
@@ -37,6 +37,7 @@ TEST_F(CUDA, RELAYOUT_FORMAT) {
TEST_F
(
CUDA
,
RELAYOUT_FORMAT_NCHW4_NCHW
)
{
Checker
<
RelayoutFormat
>
checker
(
handle_cuda
());
UniformIntRNG
rng
{
-
50
,
50
};
UniformIntRNG
u8_rng
{
0
,
255
};
param
::
RelayoutFormat
param
;
param
.
mode
=
param
::
RelayoutFormat
::
Mode
::
NCHW4_NCHW
;
...
...
@@ -46,6 +47,12 @@ TEST_F(CUDA, RELAYOUT_FORMAT_NCHW4_NCHW) {
.
set_param
(
param
)
.
execs
({{
1
,
1
,
2
,
2
,
4
},
{}});
checker
.
set_dtype
(
0
,
dtype
::
Quantized8Asymm
{
1.
f
,
128
})
.
set_dtype
(
1
,
dtype
::
Quantized8Asymm
{
1.
f
,
128
})
.
set_rng
(
0
,
&
u8_rng
)
.
set_param
(
param
)
.
execs
({{
1
,
1
,
2
,
2
,
4
},
{}});
checker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
{
0.1
f
})
.
set_dtype
(
1
,
dtype
::
QuantizedS8
{
0.1
f
})
.
set_rng
(
0
,
&
rng
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录