Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
28ea9aad
P
Paddle
项目概览
PaddlePaddle
/
Paddle
11 个月 前同步成功
通知
2292
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
28ea9aad
编写于
12月 14, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
12月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle Inference] rewrite convert_to_mixed_precision (#48853)
上级
b9fad5da
变更
15
展开全部
隐藏空白更改
内联
并排
Showing
15 changed file
with
324 addition
and
950 deletion
+324
-950
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-1
paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
+163
-159
paddle/fluid/framework/ir/auto_mixed_precision_pass.h
paddle/fluid/framework/ir/auto_mixed_precision_pass.h
+26
-12
paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc
.../fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc
+1
-1
paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
.../fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
+1
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-1
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+8
-7
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+8
-8
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+64
-740
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
...id/inference/analysis/passes/convert_to_mixed_precision.h
+39
-9
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+4
-4
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-4
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+1
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+2
-1
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
28ea9aad
...
...
@@ -103,7 +103,7 @@ pass_library(delete_c_identity_op_pass inference)
pass_library
(
preln_residual_bias_fuse_pass inference
)
pass_library
(
delete_fill_constant_op_pass inference
)
pass_library
(
constant_folding_pass inference
)
pass_library
(
float_to_half
_pass inference
)
pass_library
(
auto_mixed_precision
_pass inference
)
pass_library
(
conv2d_fusion_layout_transfer_pass inference
)
pass_library
(
simplify_with_basic_ops_pass base
)
pass_library
(
fc_elementwise_layernorm_fuse_pass base
)
...
...
paddle/fluid/framework/ir/
float_to_half
_pass.cc
→
paddle/fluid/framework/ir/
auto_mixed_precision
_pass.cc
浏览文件 @
28ea9aad
此差异已折叠。
点击以展开。
paddle/fluid/framework/ir/
float_to_half
_pass.h
→
paddle/fluid/framework/ir/
auto_mixed_precision
_pass.h
浏览文件 @
28ea9aad
...
...
@@ -27,13 +27,13 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
class
FloatToHalf
Pass
:
public
FusePassBase
{
class
AutoMixedPrecision
Pass
:
public
FusePassBase
{
public:
using
VarType
=
framework
::
proto
::
VarType
;
public:
FloatToHalf
Pass
()
=
default
;
~
FloatToHalf
Pass
()
=
default
;
AutoMixedPrecision
Pass
()
=
default
;
~
AutoMixedPrecision
Pass
()
=
default
;
protected:
void
ApplyImpl
(
Graph
*
graph
)
const
override
;
...
...
@@ -43,10 +43,6 @@ class FloatToHalfPass : public FusePassBase {
void
SetDefaultBlacklist
()
const
;
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
DataType
precision
,
phi
::
Backend
backend
=
phi
::
Backend
::
GPU
)
const
;
void
SetOpUniqueType
()
const
;
void
RestoreOpOriginType
()
const
;
...
...
@@ -70,9 +66,13 @@ class FloatToHalfPass : public FusePassBase {
void
ConvertWeightsData
()
const
;
private:
mutable
bool
keep_io_types_
;
mutable
bool
skip_pass_
{
false
};
mutable
bool
keep_io_types_
{
false
};
// float16 or bfloat16 now
mutable
phi
::
DataType
half_precision_
;
mutable
phi
::
DataType
low_precision_
{
phi
::
DataType
::
FLOAT16
};
mutable
phi
::
Backend
backend_
{
phi
::
Backend
::
GPU
};
mutable
std
::
unordered_set
<
std
::
string
>
black_list_
;
...
...
@@ -84,12 +84,26 @@ class FloatToHalfPass : public FusePassBase {
mutable
std
::
vector
<
std
::
vector
<
Node
*>>
all_op_nodes_
;
// op's unique type -> the op's origin type
mutable
std
::
unordered_map
<
std
::
string
,
std
::
string
>
op_original_type_
;
// op's unique type -> whether the op run at
half
precision
mutable
std
::
unordered_set
<
std
::
string
>
op_run_
half
_
;
// op's unique type -> whether the op run at
low
precision
mutable
std
::
unordered_set
<
std
::
string
>
op_run_
low_precision
_
;
mutable
std
::
unordered_set
<
std
::
string
>
vars_convert_to_
half
_
;
mutable
std
::
unordered_set
<
std
::
string
>
vars_convert_to_
low_precision
_
;
};
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
DoInsertCastOp
(
Graph
*
graph
,
Node
*
var_node
,
Node
*
op_node
,
proto
::
VarType
::
Type
from_type
,
proto
::
VarType
::
Type
to_type
,
framework
::
BlockDesc
*
block_desc
,
int
*
suffix
,
std
::
unordered_map
<
Node
*
,
Node
*>*
cache
);
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc
浏览文件 @
28ea9aad
...
...
@@ -142,7 +142,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
bool
is_fp16_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
))
==
phi
::
DataType
::
FLOAT16
||
Get
<
bool
>
(
"enable_gpu_
half
"
);
Get
<
bool
>
(
"enable_gpu_
mixed
"
);
bool
cutlass_enable
=
false
;
#ifdef PADDLE_WITH_CUTLASS
...
...
paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
浏览文件 @
28ea9aad
...
...
@@ -165,7 +165,7 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
bool
is_fp16_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
))
==
phi
::
DataType
::
FLOAT16
||
Get
<
bool
>
(
"enable_gpu_
half
"
);
Get
<
bool
>
(
"enable_gpu_
mixed
"
);
constexpr
int
CUTLASS_NHWC_ALIGNMENT
=
8
;
if
(
is_fp16_precision
)
{
#ifdef PADDLE_WITH_CUTLASS
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
28ea9aad
...
...
@@ -365,7 +365,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
MixedBlackList
,
std
::
unordered_set
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
enable_gpu_
half
,
EnableGPUHalf
,
bool
);
DECL_ARGUMENT_FIELD
(
enable_gpu_
mixed
,
EnableGPUMixed
,
bool
);
DECL_ARGUMENT_FIELD
(
mixed_precision_mode
,
MixedPrecisionMode
,
int
);
// cinn compiler related
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
28ea9aad
...
...
@@ -45,8 +45,10 @@ IRPassManager::IRPassManager(Argument *argument) {
void
IRPassManager
::
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
)
{
// For graph_viz_pass
std
::
string
pre_pass
;
int
pass_num
=
0
;
for
(
const
std
::
string
&
pass_name
:
passes
)
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_name
);
pass
->
Set
(
"use_varseqlen"
,
new
bool
(
argument
->
tensorrt_use_varseqlen
()));
...
...
@@ -87,14 +89,14 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
// mixed precision related
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
// Mixed precision related.
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
pass
->
Set
(
"enable_gpu_
half"
,
new
bool
(
argument
->
enable_gpu_half
()));
pass
->
Set
(
"enable_gpu_
mixed"
,
new
bool
(
argument
->
enable_gpu_mixed
()));
pass
->
Set
(
"mixed_precision_mode"
,
new
int
(
argument
->
mixed_precision_mode
()));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
@@ -210,6 +212,7 @@ void IRPassManager::CreatePasses(Argument *argument,
new
std
::
vector
<
std
::
string
>
(
argument
->
tensorrt_disabled_ops
()));
pass
->
Set
(
"trt_use_dla"
,
new
bool
(
argument
->
tensorrt_use_dla
()));
pass
->
Set
(
"trt_dla_core"
,
new
int
(
argument
->
tensorrt_dla_core
()));
// Setting the disable_trt_plugin_fp16 to true means that TRT plugin will
// not run fp16.
pass
->
Set
(
"disable_trt_plugin_fp16"
,
...
...
@@ -238,8 +241,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"root_predictor_id"
,
new
int
(
argument
->
root_predictor_id
()));
}
else
if
(
pass_name
==
"build_cinn_pass"
)
{
pass
->
Set
(
"is_inference_stage"
,
new
bool
(
argument
->
use_cinn_compiler
()));
}
if
(
pass_name
==
"lite_subgraph_pass"
)
{
}
else
if
(
pass_name
==
"lite_subgraph_pass"
)
{
bool
lite_enable_int8
=
argument
->
lite_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"program"
,
...
...
@@ -287,8 +289,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"nnadapter_model_cache_token"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
nnadapter_model_cache_token
()));
}
if
(
pass_name
==
"fc_fuse_pass"
)
{
}
else
if
(
pass_name
==
"fc_fuse_pass"
)
{
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
bool
fc_mkldnn_pass
=
0
;
for
(
const
std
::
string
&
pass_n
:
passes
)
{
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
28ea9aad
...
...
@@ -83,14 +83,14 @@ void OutputProcess(framework::ir::Graph *graph,
backend
,
precision
,
blacklist
))
{
Add
CastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
var_to_cast_op_map
);
Insert
CastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
block_desc
,
&
suffix
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
浏览文件 @
28ea9aad
...
...
@@ -13,7 +13,7 @@ cc_library(
cc_library
(
convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass
)
DEPS analysis_pass ir_graph_build_pass
auto_mixed_precision_pass
)
cc_library
(
ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
浏览文件 @
28ea9aad
此差异已折叠。
点击以展开。
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
浏览文件 @
28ea9aad
...
...
@@ -15,14 +15,12 @@
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
...
...
@@ -30,20 +28,52 @@ namespace paddle {
namespace
inference
{
namespace
analysis
{
class
ConvertToMixedPrecisionPass
{
public:
explicit
ConvertToMixedPrecisionPass
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
Run
();
private:
void
LoadModel
();
void
SaveMixedModel
();
private:
std
::
string
model_file_
;
std
::
string
params_file_
;
std
::
string
mixed_model_file_
;
std
::
string
mixed_params_file_
;
phi
::
DataType
mixed_precision_
;
phi
::
Backend
backend_
;
bool
keep_io_types_
;
std
::
unordered_set
<
std
::
string
>
black_list_
;
framework
::
Scope
scope_
;
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
main_graph_
{
nullptr
};
};
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
);
const
std
::
unordered_set
<
std
::
string
>&
black
_
list
);
void
Add
CastOp
(
void
Insert
CastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
ir
::
Node
*
var_
node
,
framework
::
ir
::
Node
*
op_node
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
);
int
*
suffix
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
visited
);
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
28ea9aad
...
...
@@ -99,7 +99,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
// default
}
else
if
(
precision_mode
==
Precision
::
kHalf
||
precision_mode
==
Precision
::
kBf16
)
{
enable_gpu_
half
_
=
true
;
enable_gpu_
mixed
_
=
true
;
}
else
{
LOG
(
ERROR
)
<<
"The Paddle-GPU inference currently only supports "
...
...
@@ -396,7 +396,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// Mixed precision related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_gpu_
half
_
);
CP_MEMBER
(
enable_gpu_
mixed
_
);
CP_MEMBER
(
mixed_precision_mode_
);
CP_MEMBER
(
enable_memory_optim_
);
...
...
@@ -1017,7 +1017,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
params_file_
;
ss
<<
use_gpu_
;
ss
<<
enable_gpu_
half
_
;
ss
<<
enable_gpu_
mixed
_
;
ss
<<
use_external_stream_
;
ss
<<
exec_stream_
;
ss
<<
use_fc_padding_
;
...
...
@@ -1234,7 +1234,7 @@ std::string AnalysisConfig::Summary() {
os
.
InsertRow
({
"use_gpu"
,
use_gpu_
?
"true"
:
"false"
});
if
(
use_gpu_
)
{
os
.
InsertRow
({
"gpu_device_id"
,
std
::
to_string
(
gpu_device_id_
)});
os
.
InsertRow
({
"enable_gpu_
half_"
,
std
::
to_string
(
enable_gpu_half
_
)});
os
.
InsertRow
({
"enable_gpu_
mixed_"
,
std
::
to_string
(
enable_gpu_mixed
_
)});
os
.
InsertRow
({
"memory_pool_init_size"
,
std
::
to_string
(
memory_pool_init_size_mb_
)
+
"MB"
});
os
.
InsertRow
(
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
28ea9aad
...
...
@@ -1277,10 +1277,10 @@ void AnalysisPredictor::PrepareArgument() {
if
(
!
config_
.
ir_optim
())
{
argument_
.
SetEnableIrOptim
(
false
);
if
(
config_
.
enable_gpu_
half
_
)
{
if
(
config_
.
enable_gpu_
mixed
_
)
{
argument_
.
SetEnableIrOptim
(
true
);
pass_builder
->
ClearPasses
();
pass_builder
->
AppendPass
(
"
float_to_half
_pass"
);
pass_builder
->
AppendPass
(
"
auto_mixed_precision
_pass"
);
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode with no ir "
"optimization."
;
...
...
@@ -1291,7 +1291,7 @@ void AnalysisPredictor::PrepareArgument() {
if
(
config_
.
ir_debug_
)
{
pass_builder
->
TurnOnDebug
();
}
if
(
config_
.
enable_gpu_
half
_
)
{
if
(
config_
.
enable_gpu_
mixed
_
)
{
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode."
;
}
}
...
...
@@ -1303,7 +1303,7 @@ void AnalysisPredictor::PrepareArgument() {
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
argument_
.
SetEnableGPU
Half
(
config_
.
enable_gpu_half
_
);
argument_
.
SetEnableGPU
Mixed
(
config_
.
enable_gpu_mixed
_
);
argument_
.
SetMixedPrecisionMode
(
static_cast
<
int
>
(
paddle
::
ConvertPrecision
(
config_
.
mixed_precision_mode_
)));
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
28ea9aad
...
...
@@ -1049,7 +1049,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
enable_gpu_
half
_
{
false
};
bool
enable_gpu_
mixed
_
{
false
};
bool
thread_local_stream_
{
false
};
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
28ea9aad
...
...
@@ -245,7 +245,8 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"float_to_half_pass"
,
//
"constant_folding_pass"
,
//
"auto_mixed_precision_pass"
,
//
});
use_gpu_
=
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录