Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
633016a9
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
633016a9
编写于
6月 10, 2021
作者:
M
Megvii Engine Team
提交者:
huangxinda
7月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn/cuda): fix AlgoFallbackNCHWQS8 to support Float32 dst
GitOrigin-RevId: 06f90f5cf384bc4ddb2f97860e4f530ee9a85705
上级
e6caa9ff
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
139 addition
and
31 deletion
+139
-31
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
+53
-29
dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
+1
-2
dnn/test/cuda/conv_bias_int8.cpp
dnn/test/cuda/conv_bias_int8.cpp
+85
-0
未找到文件。
dnn/src/cuda/conv_bias/conv_nchwqs8.cpp
浏览文件 @
633016a9
...
...
@@ -50,15 +50,23 @@ void ConvBiasForwardImpl::AlgoFallbackNCHWQS8::make_inner_layout(
deduce_reformat_layout
(
relayout_src
,
*
args
.
filter_layout
,
inner_weight_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4_WEIGHT
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
dst_layout
,
inner_dst_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
bias_layout
,
inner_bias_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
z_layout
,
inner_z_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
bool
dst_float
=
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
if
(
dst_float
)
{
inner_dst_layout
=
*
args
.
dst_layout
;
inner_bias_layout
=
*
args
.
bias_layout
;
inner_z_layout
=
*
args
.
z_layout
;
}
else
{
deduce_reformat_layout
(
relayout_src
,
*
args
.
dst_layout
,
inner_dst_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
bias_layout
,
inner_bias_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
deduce_reformat_layout
(
relayout_src
,
*
args
.
z_layout
,
inner_z_layout
,
RelayoutFormat
::
Param
::
Mode
::
NCHW_NCHW4
,
0
,
args
.
filter_meta
.
group
);
}
};
bool
ConvBiasForwardImpl
::
AlgoFallbackNCHWQS8
::
is_available
(
...
...
@@ -70,8 +78,7 @@ bool ConvBiasForwardImpl::AlgoFallbackNCHWQS8::is_available(
auto
&&
param
=
args
.
opr
->
param
();
bool
is_format_ok
=
param
.
format
==
param
::
ConvBias
::
Format
::
NCHW
;
bool
is_version_ok
=
CUDNN_VERSION
>=
7500
;
bool
is_dtype_ok
=
args
.
src_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
bool
is_dtype_ok
=
args
.
src_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
bool
is_bias_ok
=
args
.
bias_layout
->
ndim
==
0
||
(
args
.
bias_layout
->
ndim
==
4
&&
args
.
bias_layout
->
shape
[
0
]
==
1
&&
...
...
@@ -90,17 +97,23 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoFallbackNCHWQS8::get_workspace_bundle(
TensorLayout
inner_z_layout
;
make_inner_layout
(
args
,
inner_src_layout
,
inner_weight_layout
,
inner_dst_layout
,
inner_bias_layout
,
inner_z_layout
);
auto
opr
=
args
.
handle
->
create_operator
<
ConvBiasForward
>
();
Param
inner_conv_param
=
args
.
opr
->
param
();
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4
;
size_t
ws_dst
=
0
,
ws_bias
=
0
,
ws_z
=
0
;
if
(
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
)
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4_NCHW
;
}
else
{
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4
;
ws_dst
=
inner_dst_layout
.
span
().
dist_byte
();
ws_bias
=
inner_bias_layout
.
span
().
dist_byte
();
ws_z
=
inner_z_layout
.
span
().
dist_byte
();
}
auto
opr
=
args
.
handle
->
create_operator
<
ConvBiasForward
>
();
opr
->
param
()
=
inner_conv_param
;
return
WorkspaceBundle
(
ptr
,
{
inner_src_layout
.
span
().
dist_byte
(),
inner_weight_layout
.
span
().
dist_byte
(),
inner_dst_layout
.
span
().
dist_byte
(),
inner_bias_layout
.
span
().
dist_byte
(),
inner_z_layout
.
span
().
dist_byte
(),
opr
->
get_workspace_in_bytes
(
inner_src_layout
,
inner_weight_layout
,
return
WorkspaceBundle
(
ptr
,
{
inner_src_layout
.
span
().
dist_byte
(),
inner_weight_layout
.
span
().
dist_byte
(),
ws_dst
,
ws_bias
,
ws_z
,
opr
->
get_workspace_in_bytes
(
inner_src_layout
,
inner_weight_layout
,
inner_bias_layout
,
inner_z_layout
,
inner_dst_layout
,
nullptr
)});
}
...
...
@@ -145,22 +158,33 @@ void ConvBiasForwardImpl::AlgoFallbackNCHWQS8::exec(
TensorND
inner_bias
(
bundle
.
get
(
3
),
inner_bias_layout
);
TensorND
inner_z
(
bundle
.
get
(
4
),
inner_z_layout
);
bool
dst_float
=
args
.
dst_layout
->
dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
Param
inner_conv_param
=
args
.
opr
->
param
();
inner_conv_param
.
format
=
Param
::
Format
::
NCHW4
;
inner_conv_param
.
format
=
dst_float
?
Param
::
Format
::
NCHW4_NCHW
:
Param
::
Format
::
NCHW4
;
auto
inner_opr
=
args
.
handle
->
create_operator
<
ConvBiasForward
>
();
inner_opr
->
param
()
=
inner_conv_param
;
relayout_nchw_nchw4
->
exec
(
*
args
.
src_tensor
,
inner_src
,
{});
relayout_weight
->
exec
(
*
args
.
filter_tensor
,
inner_weight
,
{});
if
(
inner_bias_layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
bias_tensor
,
inner_bias
,
{});
}
if
(
inner_z_layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
z_tensor
,
inner_z
,
{});
if
(
dst_float
)
{
inner_opr
->
exec
(
inner_src
,
inner_weight
,
*
args
.
bias_tensor
,
*
args
.
z_tensor
,
*
args
.
dst_tensor
,
nullptr
,
Workspace
((
dt_byte
*
)
bundle
.
get
(
5
),
bundle
.
get_size
(
5
)));
}
else
{
if
(
inner_bias_layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
bias_tensor
,
inner_bias
,
{});
}
if
(
inner_z_layout
.
ndim
>
0
)
{
relayout_nchw_nchw4
->
exec
(
*
args
.
z_tensor
,
inner_z
,
{});
}
inner_opr
->
exec
(
inner_src
,
inner_weight
,
inner_bias
,
inner_z
,
inner_dst
,
nullptr
,
Workspace
((
dt_byte
*
)
bundle
.
get
(
5
),
bundle
.
get_size
(
5
)));
relayout_nchw4_nchw
->
exec
(
inner_dst
,
*
args
.
dst_tensor
,
{});
}
inner_opr
->
exec
(
inner_src
,
inner_weight
,
inner_bias
,
inner_z
,
inner_dst
,
nullptr
,
Workspace
((
dt_byte
*
)
bundle
.
get
(
5
),
bundle
.
get_size
(
5
)));
relayout_nchw4_nchw
->
exec
(
inner_dst
,
*
args
.
dst_tensor
,
{});
}
// vim: syntax=cpp.doxygen
dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
浏览文件 @
633016a9
...
...
@@ -192,8 +192,7 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::exec(
dst_dtype
=
args
.
dst_layout
->
dtype
;
megdnn_assert
(
(
src_dtype
.
category
()
==
dst_dtype
.
category
())
||
(
args
.
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW4_NCHW
&&
src_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
(
src_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
dst_dtype
.
enumv
()
==
DTypeEnum
::
Float32
));
megdnn_assert
(
src_dtype
.
category
()
==
filter_dtype
.
category
());
...
...
dnn/test/cuda/conv_bias_int8.cpp
浏览文件 @
633016a9
...
...
@@ -28,6 +28,15 @@ namespace megdnn {
namespace
test
{
namespace
conv
{
TEST_F
(
CUDA
,
CONV_BIAS_INT8_NCHW4_CUDNN_CONVOLUTION
)
{
require_compute_capability
(
7
,
5
);
conv_bias
::
check_conv_bias
(
dtype
::
QuantizedS8
{
1.2
f
},
dtype
::
QuantizedS8
{
1.3
f
},
dtype
::
QuantizedS32
{
1.2
f
*
1.3
f
},
dtype
::
QuantizedS8
{
1.3
f
},
handle_cuda
(),
"DEFAULT:CUDNN:ConvBiasActivation:"
,
param
::
ConvBias
::
Format
::
NCHW4
);
}
TEST_F
(
CUDA
,
CONV_BIAS_INT8_NCHW4_1x1
)
{
require_compute_capability
(
6
,
1
);
conv_bias
::
check_conv_bias
(
...
...
@@ -689,6 +698,82 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) {
}
TEST_F
(
CUDA
,
FALLBACK_CONV_QS8
)
{
require_compute_capability_eq
(
7
,
5
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
auto
check
=
[
&
checker
](
const
std
::
string
&&
algo
)
{
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBiasForward
>
(
algo
.
c_str
()));
UniformIntRNG
rng
{
-
3
,
3
};
UniformIntRNG
bias_rng
{
-
50
,
50
};
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_rng
(
2
,
&
bias_rng
)
.
set_rng
(
3
,
&
rng
)
.
set_dtype
(
0
,
dtype
::
QuantizedS8
{
1.2
f
})
.
set_dtype
(
1
,
dtype
::
QuantizedS8
{
1.3
f
})
.
set_dtype
(
2
,
dtype
::
QuantizedS32
{
1.2
f
*
1.3
f
})
.
set_dtype
(
3
,
dtype
::
QuantizedS8
{
19.990229
f
})
.
set_dtype
(
4
,
dtype
::
QuantizedS8
{
19.990228
f
})
.
set_epsilon
(
1e-3
)
.
set_max_avg_error
(
1e-1
)
.
set_max_avg_biased_error
(
1e-3
);
param
::
ConvBias
param
;
param
.
pad_h
=
param
.
pad_w
=
1
;
param
.
stride_h
=
param
.
stride_w
=
2
;
param
.
format
=
param
::
ConvBias
::
Format
::
NCHW
;
checker
.
set_param
(
param
).
execs
({{
16
,
15
,
14
,
14
},
{
28
,
15
,
3
,
3
},
{
1
,
28
,
1
,
1
},
{
16
,
28
,
7
,
7
},
{}});
checker
.
set_param
(
param
).
execs
({{
16
,
32
,
14
,
14
},
{
32
,
32
,
3
,
3
},
{
1
,
32
,
1
,
1
},
{},
{}});
};
check
(
"FALLBACK_CONV_NCHW_QS8"
);
}
TEST_F
(
CUDA
,
FALLBACK_CONV_QS8_F32
)
{
require_compute_capability_eq
(
7
,
5
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
auto
check
=
[
&
checker
](
const
std
::
string
&&
algo
)
{
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBiasForward
>
(
algo
.
c_str
()));
UniformIntRNG
rng
{
-
3
,
3
};
UniformFloatRNG
bias_rng
{
-
50.
f
,
50.
f
};
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_rng
(
2
,
&
bias_rng
)
.
set_rng
(
3
,
&
rng
)
.
set_dtype
(
0
,
dtype
::
QuantizedS8
{
1.2
f
})
.
set_dtype
(
1
,
dtype
::
QuantizedS8
{
1.3
f
})
.
set_dtype
(
2
,
dtype
::
Float32
{})
.
set_dtype
(
3
,
dtype
::
Float32
{})
.
set_dtype
(
4
,
dtype
::
Float32
{})
.
set_epsilon
(
1e-3
)
.
set_max_avg_error
(
1e-1
)
.
set_max_avg_biased_error
(
1e-3
);
param
::
ConvBias
param
;
param
.
pad_h
=
param
.
pad_w
=
1
;
param
.
stride_h
=
param
.
stride_w
=
2
;
param
.
format
=
param
::
ConvBias
::
Format
::
NCHW
;
checker
.
set_param
(
param
).
execs
({{
16
,
15
,
14
,
14
},
{
28
,
15
,
3
,
3
},
{
1
,
28
,
1
,
1
},
{
16
,
28
,
7
,
7
},
{}});
checker
.
set_param
(
param
).
execs
({{
16
,
32
,
14
,
14
},
{
32
,
32
,
3
,
3
},
{
1
,
32
,
1
,
1
},
{},
{}});
};
check
(
"FALLBACK_CONV_NCHW_QS8"
);
}
TEST_F
(
CUDA
,
CUTLASS_CONV_BIAS_INT8_WEIGHT_PREPROCESS
)
{
require_compute_capability
(
6
,
1
);
Checker
<
ConvBiasForward
,
OprWeightPreprocessProxy
<
ConvBiasForward
>>
checker
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录