Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
37455714
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
37455714
编写于
7月 21, 2022
作者:
X
xiaoxiaohehe001
提交者:
GitHub
7月 21, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle inference] Add conv_fusion_fp16 (#44435)
* convfusionfp16 * convfusionfp16 * convfusionfp16
上级
0243c6ca
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
64 addition
and
3 deletion
+64
-3
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+58
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+6
-3
未找到文件。
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
浏览文件 @
37455714
...
...
@@ -284,6 +284,27 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
return
;
}
// conv_weight fp32 --> fp16
auto
*
conv_weight_tensor
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
tensor_type
=
conv_weight_tensor
->
dtype
();
if
(
tensor_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
framework
::
Tensor
weight_float_tensor
;
weight_float_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_float_tensor
.
Resize
(
conv_weight_tensor
->
dims
());
auto
*
weight_float_data
=
weight_float_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
data
=
conv_weight_tensor
->
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
conv_weight_tensor
->
numel
();
i
++
)
{
weight_float_data
[
i
]
=
static_cast
<
float
>
(
data
[
i
]);
}
conv_weight_tensor
->
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_float_tensor
,
platform
::
CPUPlace
(),
conv_weight_tensor
);
}
// Get batch norm bias
auto
*
bn_bias_tensor
=
scope
->
FindVar
(
bn_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
...
...
@@ -319,6 +340,43 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
epsilon
,
conv_type
());
if
(
tensor_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
{
framework
::
Tensor
weight_float16_tensor
;
weight_float16_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
weight_float16_tensor
.
Resize
(
conv_weight_tensor
->
dims
());
auto
*
weight_float16_data
=
weight_float16_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
auto
*
data
=
conv_weight_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
conv_weight_tensor
->
numel
();
i
++
)
{
weight_float16_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
conv_weight_tensor
->
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_float16_tensor
,
platform
::
CPUPlace
(),
conv_weight_tensor
);
}
{
framework
::
Tensor
eltwise_y_in_float16_tensor
;
eltwise_y_in_float16_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
eltwise_y_in_float16_tensor
.
Resize
(
eltwise_y_in_tensor
->
dims
());
auto
*
eltwise_y_in_float16_data
=
eltwise_y_in_float16_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
auto
*
data
=
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
eltwise_y_in_tensor
->
numel
();
i
++
)
{
eltwise_y_in_float16_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
eltwise_y_in_tensor
->
clear
();
paddle
::
framework
::
TensorCopySync
(
eltwise_y_in_float16_tensor
,
platform
::
CPUPlace
(),
eltwise_y_in_tensor
);
}
}
// with MKL-DNN fuse conv+bn into conv with bias
// without MKL-DNN fuse conv+bn into conv+elementwise_add
if
(
fuse_option
==
FUSE_MKLDNN
)
{
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
37455714
...
...
@@ -154,9 +154,12 @@ const std::vector<std::string> kLiteSubgraphPasses({
// support fp16/bf16 precision, temporarily use low precision pass to prevent
// running errors. After fusion operator supports low precision, delete this.
const
std
::
vector
<
std
::
string
>
kGpuLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
};
"conv_bn_fuse_pass"
,
"conv_eltwiseadd_bn_fuse_pass"
,
"conv_elementwise_add_act_fuse_pass"
,
"conv_elementwise_add2_act_fuse_pass"
,
"conv_elementwise_add_fuse_pass"
};
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录