Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c7694b82
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c7694b82
编写于
6月 29, 2022
作者:
W
Wilber
提交者:
GitHub
6月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
inference support mixed-precision model [1]. (#43814)
* inference add convert to mixed model ability.
上级
8fa8e17e
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
846 addition
and
28 deletion
+846
-28
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+4
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+5
-0
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+452
-0
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
...id/inference/analysis/passes/convert_to_mixed_precision.h
+59
-0
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+62
-27
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+2
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+100
-0
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+8
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+12
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+14
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+14
-0
paddle/fluid/inference/utils/CMakeLists.txt
paddle/fluid/inference/utils/CMakeLists.txt
+4
-0
paddle/fluid/inference/utils/model_utils.cc
paddle/fluid/inference/utils/model_utils.cc
+74
-0
paddle/fluid/inference/utils/model_utils.h
paddle/fluid/inference/utils/model_utils.h
+31
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
c7694b82
...
@@ -36,6 +36,7 @@
...
@@ -36,6 +36,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -328,6 +329,9 @@ struct Argument {
...
@@ -328,6 +329,9 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
use_npu
,
UseNpu
,
bool
);
DECL_ARGUMENT_FIELD
(
use_npu
,
UseNpu
,
bool
);
DECL_ARGUMENT_FIELD
(
npu_device_id
,
NPUDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
npu_device_id
,
NPUDeviceId
,
int
);
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
private:
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
};
};
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
c7694b82
...
@@ -86,6 +86,8 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -86,6 +86,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_tuned_dynamic_shape
();
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
std
::
string
dot_file_path
;
std
::
string
dot_file_path
;
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
浏览文件 @
c7694b82
...
@@ -10,6 +10,10 @@ cc_library(
...
@@ -10,6 +10,10 @@ cc_library(
memory_optim_pass
memory_optim_pass
SRCS memory_optimize_pass.cc
SRCS memory_optimize_pass.cc
DEPS analysis_pass zero_copy_tensor
)
DEPS analysis_pass zero_copy_tensor
)
cc_library
(
convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass
)
cc_library
(
cc_library
(
ir_params_sync_among_devices_pass
ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc
SRCS ir_params_sync_among_devices_pass.cc
...
@@ -46,6 +50,7 @@ cc_library(
...
@@ -46,6 +50,7 @@ cc_library(
ir_params_sync_among_devices_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
memory_optim_pass
convert_to_mixed_precision
inference_op_replace_pass
inference_op_replace_pass
ir_graph_to_program_pass
ir_graph_to_program_pass
ir_graph_clean_pass
)
ir_graph_clean_pass
)
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/tensor_meta.h"
using
namespace
paddle
::
framework
;
// NOLINT
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
{
bool
IsKernelSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
auto
kernels
=
phi
::
KernelFactory
::
Instance
().
kernels
();
if
(
kernels
.
find
(
op_type
)
==
kernels
.
end
())
{
return
false
;
}
phi
::
KernelKey
kernel_key
(
backend
,
layout
,
data_type
);
return
phi
::
KernelFactory
::
Instance
().
HasKernel
(
op_type
,
kernel_key
);
}
bool
GpuKernelSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
bool
res
=
IsKernelSupportPrecision
(
op_type
,
phi
::
Backend
::
GPU
,
data_type
,
layout
);
res
|=
IsKernelSupportPrecision
(
op_type
,
phi
::
Backend
::
GPUDNN
,
data_type
,
layout
);
return
res
;
}
// Just process special cases.
bool
OutShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_node
=
var_node
->
inputs
[
0
];
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm's input and output (variance and mean) are the same.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Output
(
"MeanOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"VarianceOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedMean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedVariance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
return
false
;
}
// Just process special cases for weights conversion.
bool
WeightsShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_nodes
=
var_node
->
outputs
;
for
(
auto
*
op_node
:
op_nodes
)
{
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm op's bias, mean, scale and variance just be float32, so we can
// not convert the dtype.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Mean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Scale"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Variance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
}
return
false
;
}
void
ConvertTensorDtype
(
framework
::
ir
::
Graph
*
graph
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
,
bool
keep_io_types
,
phi
::
Backend
backend
,
phi
::
DataType
tensor_dtype
)
{
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
tensor_dtype
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
tensor_dtype
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only support "
"fp16 and bf16."
,
static_cast
<
int
>
(
tensor_dtype
)));
}
int
num_low_precision
=
0
;
int
suffix
=
0
;
framework
::
BlockDesc
*
block_desc
{
nullptr
};
std
::
vector
<
framework
::
ir
::
Node
*>
output_nodes
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
cast_map
;
for
(
auto
*
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
auto
phi_op_type
=
phi
::
TransToPhiKernelName
(
op_type
);
// LOG(INFO) << "process op " << op_type << ", corresponding phi type is "
// << phi_op_type;
// 1. set input dtype.
if
(
op_type
==
"feed"
)
{
block_desc
=
op_node
->
Op
()
->
Block
();
auto
feed_var
=
op_node
->
outputs
[
0
]
->
Var
();
if
(
!
keep_io_types
&&
feed_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
feed_var
->
SetDataType
(
to_type
);
}
}
else
if
(
op_type
==
"fetch"
)
{
auto
*
fetch_var
=
op_node
->
inputs
[
0
];
output_nodes
.
push_back
(
fetch_var
);
continue
;
}
// 2. if op support fp16/bf16 and not in blacklist.
// - cast weight to fp16/bf16.
// - add cast op if the input dtype is not fp16/bf16.
// - set output dtype.
else
if
(
blacklist
.
count
(
phi_op_type
)
==
0
)
{
// NOLINT
bool
support_precision
=
OpSupportPrecision
(
phi_op_type
,
backend
,
tensor_dtype
,
blacklist
);
VLOG
(
2
)
<<
"phi_op_type "
<<
phi_op_type
<<
" support low precision "
<<
support_precision
;
if
(
support_precision
)
{
++
num_low_precision
;
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
WeightsShouldNotConvert
(
in_node
))
continue
;
in_var
->
SetDataType
(
to_type
);
}
else
if
(
!
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
!=
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var
->
GetDataType
(),
to_type
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
for
(
auto
*
out_node
:
op_node
->
outputs
)
{
auto
*
out_var
=
out_node
->
Var
();
if
(
out_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
OutShouldNotConvert
(
out_node
))
continue
;
out_var
->
SetDataType
(
to_type
);
}
}
}
else
{
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
!
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
!=
framework
::
proto
::
VarType
::
FP32
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var
->
GetDataType
(),
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
}
}
// 3. check op not support fp16/bf16 or in blacklist.
// - add cast op if the input dtype is not fp32.
else
{
// NOLINT
// trt pass should explicitle add cast op is input is bf16/tf32, etc.
if
(
op_node
->
Name
()
==
"tensorrt_engine"
)
continue
;
for
(
auto
*
in_node
:
op_node
->
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
in_var
->
GetDataType
()
==
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
}
}
// 4. if output_op's dtype is not compatible to output dtype, then just insert
// cast.
for
(
auto
*
node
:
output_nodes
)
{
auto
var
=
node
->
Var
();
if
(
keep_io_types
&&
var
->
GetDataType
()
==
to_type
)
{
// fp16/bf16 -> fp32.
AddCastOp
(
graph
,
node
,
node
->
outputs
[
0
],
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
else
if
(
!
keep_io_types
&&
var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
// fp32 -> fp16/bf16
AddCastOp
(
graph
,
node
,
node
->
outputs
[
0
],
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
if
(
num_low_precision
)
LOG
(
INFO
)
<<
"--- detected "
<<
num_low_precision
<<
" low precision ops"
;
}
}
// namespace
bool
OpSupportPrecision
(
const
std
::
string
&
phi_op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
)
{
bool
support_precision
=
false
;
if
(
blacklist
.
count
(
phi_op_type
)
==
0
)
{
if
(
backend
==
phi
::
Backend
::
GPU
)
support_precision
=
GpuKernelSupportPrecision
(
phi_op_type
,
precision
);
else
support_precision
=
IsKernelSupportPrecision
(
phi_op_type
,
backend
,
precision
);
}
return
support_precision
;
}
void
AddCastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
)
{
auto
update_cast_desc
=
[
&
](
framework
::
OpDesc
&
desc
,
const
std
::
string
&
x_name
,
const
std
::
string
&
out_name
,
const
int
in_dtype
,
const
int
out_dtype
)
{
desc
.
SetType
(
"cast"
);
desc
.
SetInput
(
"X"
,
{
x_name
});
desc
.
SetOutput
(
"Out"
,
{
out_name
});
desc
.
SetAttr
(
"in_dtype"
,
in_dtype
);
desc
.
SetAttr
(
"out_dtype"
,
out_dtype
);
desc
.
SetAttr
(
"use_mkldnn"
,
false
);
desc
.
SetAttr
(
"with_quant_attr"
,
false
);
desc
.
Flush
();
};
if
(
map
->
count
(
node
)
==
0
)
{
// insert cast op before node.
std
::
string
cast_input_name
=
node
->
Var
()
->
Name
();
std
::
string
cast_output_name
=
node
->
Var
()
->
Name
()
+
"_cast.tmp_"
+
std
::
to_string
((
*
suffix
)
++
);
CHECK_NOTNULL
(
block_desc
);
framework
::
OpDesc
cast_op_desc
(
block_desc
);
update_cast_desc
(
cast_op_desc
,
cast_input_name
,
cast_output_name
,
static_cast
<
int
>
(
from_type
),
static_cast
<
int
>
(
to_type
));
auto
*
cast_op_node
=
graph
->
CreateOpNode
(
&
cast_op_desc
);
auto
*
cast_output_vardesc
=
block_desc
->
Var
(
cast_output_name
);
cast_output_vardesc
->
SetPersistable
(
false
);
cast_output_vardesc
->
SetDataType
(
to_type
);
cast_output_vardesc
->
SetShape
(
node
->
Var
()
->
GetShape
());
auto
*
cast_output_node
=
graph
->
CreateVarNode
(
cast_output_vardesc
);
IR_NODE_LINK_TO
(
cast_op_node
,
cast_output_node
);
(
*
map
)[
node
]
=
cast_output_node
;
}
next_op
->
Op
()
->
RenameInput
(
node
->
Name
(),
map
->
at
(
node
)
->
Name
());
IR_NODE_LINK_TO
(
node
,
map
->
at
(
node
)
->
inputs
[
0
]);
IR_NODE_LINK_TO
(
map
->
at
(
node
),
next_op
);
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
{
paddle
::
CPUPlace
place
;
framework
::
Executor
executor
(
place
);
framework
::
Scope
scope
;
auto
program_desc
=
inference
::
Load
(
&
executor
,
&
scope
,
model_file
,
params_file
);
auto
graph
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
new
framework
::
ir
::
Graph
(
*
program_desc
));
ConvertTensorDtype
(
graph
.
get
(),
black_list
,
keep_io_types
,
backend
,
mixed_precision
);
framework
::
ProgramDesc
mixed_program_desc
;
framework
::
ir
::
GraphToProgram
(
*
graph
,
&
mixed_program_desc
);
auto
parameters
=
scope
.
LocalVarNames
();
std
::
sort
(
parameters
.
begin
(),
parameters
.
end
());
auto
serialize_params
=
[](
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>&
params
)
->
std
::
string
{
std
::
ostringstream
os
;
platform
::
CPUDeviceContext
ctx
;
for
(
const
auto
&
param
:
params
)
{
VLOG
(
3
)
<<
"Serialize param: "
<<
param
;
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
param
),
platform
::
errors
::
NotFound
(
"Block should already have a '%s' variable"
,
param
));
auto
*
tensor
=
scope
->
FindVar
(
param
)
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
SerializeToStream
(
os
,
*
tensor
,
ctx
);
}
return
os
.
str
();
};
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
node
->
IsOp
())
continue
;
auto
*
op_desc
=
node
->
Op
();
if
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
)
continue
;
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Mean"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Scale"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Variance"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
}
}
for
(
const
auto
&
param_name
:
parameters
)
{
auto
*
var
=
scope
.
FindLocalVar
(
param_name
);
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
mixed_tensor
;
mixed_tensor
.
Resize
(
t
->
dims
());
auto
*
data
=
t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
if
(
mixed_precision
==
phi
::
DataType
::
FLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
mixed_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
auto
*
mixed_data
=
mixed_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
mixed_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
mixed_tensor
,
place
,
t
);
}
else
if
(
mixed_precision
==
phi
::
DataType
::
BFLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
mixed_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
BFLOAT16
);
auto
*
mixed_data
=
mixed_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
mixed_data
[
i
]
=
static_cast
<
bfloat16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
mixed_tensor
,
place
,
t
);
}
}
}
auto
StrToBinary
=
[](
const
std
::
string
&
path
,
const
std
::
string
&
str
)
{
std
::
ofstream
file
(
path
.
c_str
(),
std
::
ios
::
binary
);
file
.
write
(
str
.
c_str
(),
str
.
size
());
file
.
close
();
};
StrToBinary
(
mixed_model_file
,
mixed_program_desc
.
Proto
()
->
SerializeAsString
());
StrToBinary
(
mixed_params_file
,
serialize_params
(
&
scope
,
parameters
));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
bool
OpSupportPrecision
(
const
std
::
string
&
phi_op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
);
void
AddCastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
);
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{});
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
c7694b82
...
@@ -14,10 +14,16 @@
...
@@ -14,10 +14,16 @@
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include <unordered_set>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -106,34 +112,63 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
...
@@ -106,34 +112,63 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
if
(
with_dynamic_shape
)
{
if
(
with_dynamic_shape
)
{
reserve_cpu_weights
=
true
;
reserve_cpu_weights
=
true
;
}
}
for
(
auto
&
var_name
:
all_vars
)
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
}
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
CPUPlace
cpu_place
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
graph
))
{
framework
::
LoDTensor
temp_tensor
;
if
(
!
node
->
IsOp
())
continue
;
temp_tensor
.
Resize
(
t
->
dims
());
if
(
node
->
Op
()
->
Type
()
==
"feed"
||
node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
auto
*
var_node
:
node
->
inputs
)
{
if
(
!
var_node
->
Var
()
->
Persistable
())
continue
;
// Copy the parameter data to a tmp tensor.
auto
var_name
=
var_node
->
Var
()
->
Name
();
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
if
(
std
::
count
(
// Reallocation the space on GPU
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
t
->
clear
();
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
// Copy parameter data to newly allocated GPU space.
}
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
var_data_type
=
var_node
->
Var
()
->
GetDataType
();
VLOG
(
5
)
<<
"var_name is "
<<
var_name
<<
", data type is "
<<
var_data_type
;
if
(
var_data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP16
)
{
framework
::
Tensor
half_tensor
;
half_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
half_tensor
.
Resize
(
t
->
dims
());
auto
*
half_data
=
half_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
auto
*
data
=
t
->
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
half_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
half_tensor
,
place
,
t
);
}
else
if
(
var_data_type
==
paddle
::
framework
::
proto
::
VarType
::
BF16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
BFLOAT16
);
bf16_tensor
.
Resize
(
t
->
dims
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
platform
::
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
auto
*
data
=
t
->
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
bf16_data
[
i
]
=
static_cast
<
platform
::
bfloat16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
bf16_tensor
,
place
,
t
);
}
else
{
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
}
}
}
}
}
}
}
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
c7694b82
...
@@ -82,6 +82,7 @@ if(WITH_ONNXRUNTIME)
...
@@ -82,6 +82,7 @@ if(WITH_ONNXRUNTIME)
ir_pass_manager
ir_pass_manager
op_compatible_info
op_compatible_info
infer_io_utils
infer_io_utils
model_utils
onnxruntime
onnxruntime
paddle2onnx
)
paddle2onnx
)
else
()
else
()
...
@@ -90,7 +91,7 @@ else()
...
@@ -90,7 +91,7 @@ else()
SRCS analysis_predictor.cc resource_manager.cc infer_context.cc
SRCS analysis_predictor.cc resource_manager.cc infer_context.cc
${
mkldnn_quantizer_src
}
${
mkldnn_quantizer_src
}
DEPS
${
inference_deps
}
zero_copy_tensor ir_pass_manager op_compatible_info
DEPS
${
inference_deps
}
zero_copy_tensor ir_pass_manager op_compatible_info
infer_io_utils
)
infer_io_utils
model_utils
)
endif
()
endif
()
cc_test
(
cc_test
(
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
c7694b82
...
@@ -36,12 +36,15 @@
...
@@ -36,12 +36,15 @@
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/infer_context.h"
#include "paddle/fluid/inference/api/infer_context.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/model_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
...
@@ -50,6 +53,8 @@
...
@@ -50,6 +53,8 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/utils/string/split.h"
#include "paddle/utils/string/split.h"
...
@@ -102,6 +107,43 @@ bool IsPersistable(const framework::VarDesc *var) {
...
@@ -102,6 +107,43 @@ bool IsPersistable(const framework::VarDesc *var) {
}
}
return
false
;
return
false
;
}
}
phi
::
DataType
ConvertPrecision
(
AnalysisConfig
::
Precision
precision
)
{
switch
(
precision
)
{
case
AnalysisConfig
::
Precision
::
kFloat32
:
return
phi
::
DataType
::
FLOAT32
;
case
AnalysisConfig
::
Precision
::
kHalf
:
return
phi
::
DataType
::
FLOAT16
;
case
AnalysisConfig
::
Precision
::
kBf16
:
return
phi
::
DataType
::
BFLOAT16
;
case
AnalysisConfig
::
Precision
::
kInt8
:
return
phi
::
DataType
::
INT8
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support precision. We now only support "
"Float32, Half, Bfloat16 and Int8"
));
return
phi
::
DataType
::
FLOAT32
;
}
}
phi
::
Backend
ConvertBackend
(
AnalysisConfig
::
Backend
backend
)
{
switch
(
backend
)
{
case
AnalysisConfig
::
Backend
::
kGPU
:
// NOTE: phi also support phi::Backend::GPUDNN.
return
phi
::
Backend
::
GPU
;
case
AnalysisConfig
::
Backend
::
kNPU
:
return
phi
::
Backend
::
NPU
;
case
AnalysisConfig
::
Backend
::
kXPU
:
return
phi
::
Backend
::
XPU
;
case
AnalysisConfig
::
Backend
::
kCPU
:
return
phi
::
Backend
::
CPU
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support backend, we now only support GPU, XPU, "
"NPU and CPU."
));
return
phi
::
Backend
::
CPU
;
}
}
}
// namespace
}
// namespace
bool
PaddleTensorToLoDTensor
(
const
PaddleTensor
&
pt
,
bool
PaddleTensorToLoDTensor
(
const
PaddleTensor
&
pt
,
...
@@ -476,6 +518,8 @@ bool AnalysisPredictor::PrepareProgram(
...
@@ -476,6 +518,8 @@ bool AnalysisPredictor::PrepareProgram(
// if enable_ir_optim_ is false,
// if enable_ir_optim_ is false,
// the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will
// the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will
// not be executed.
// not be executed.
model_precision_
=
paddle
::
inference
::
GetModelPrecision
(
*
inference_program_
);
OptimizeInferenceProgram
();
OptimizeInferenceProgram
();
}
else
{
}
else
{
// If the program is passed from external, no need to optimize it, this
// If the program is passed from external, no need to optimize it, this
...
@@ -1129,6 +1173,40 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -1129,6 +1173,40 @@ void AnalysisPredictor::PrepareArgument() {
#endif
#endif
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
LOG
(
INFO
)
<<
"Model is mixed precision type with "
<<
model_precision_
<<
", we will use a new PassStrategy. Note that only the GPU "
"backend is supported for now."
;
passes
.
clear
();
if
(
config_
.
tensorrt_engine_enabled
())
{
for
(
const
auto
&
pass
:
kTrtLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
}
}
else
if
(
config_
.
use_gpu
())
{
for
(
const
auto
&
pass
:
kGpuLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
}
}
const
auto
&
deleted_passes
=
config_
.
pass_builder
()
->
GetAllDeletedPasses
();
for
(
const
auto
&
it
:
deleted_passes
)
{
auto
iterator
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
it
);
if
(
iterator
!=
passes
.
end
())
{
passes
.
erase
(
iterator
);
}
}
if
(
config_
.
ir_debug_
)
{
auto
it
=
std
::
begin
(
passes
);
while
(
it
!=
std
::
end
(
passes
))
{
if
(
*
it
!=
"graph_viz_pass"
)
{
it
=
passes
.
insert
(
it
+
1
,
"graph_viz_pass"
);
}
else
{
++
it
;
}
}
}
}
if
(
!
config_
.
ir_optim
())
{
if
(
!
config_
.
ir_optim
())
{
passes
.
clear
();
passes
.
clear
();
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed"
;
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed"
;
...
@@ -1137,6 +1215,8 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -1137,6 +1215,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
}
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
@@ -2112,6 +2192,26 @@ std::string UpdateDllFlag(const char *name, const char *value) {
...
@@ -2112,6 +2192,26 @@ std::string UpdateDllFlag(const char *name, const char *value) {
return
paddle
::
UpdateDllFlag
(
name
,
value
);
return
paddle
::
UpdateDllFlag
(
name
,
value
);
}
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
PrecisionType
mixed_precision
,
BackendType
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
{
auto
phi_backend
=
paddle
::
ConvertBackend
(
backend
);
auto
phi_precision
=
paddle
::
ConvertPrecision
(
mixed_precision
);
paddle
::
inference
::
analysis
::
ConvertToMixedPrecision
(
model_file
,
params_file
,
mixed_model_file
,
mixed_params_file
,
phi_precision
,
phi_backend
,
keep_io_types
,
black_list
);
}
}
// namespace paddle_infer
}
// namespace paddle_infer
namespace
paddle_infer
{
namespace
paddle_infer
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
c7694b82
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/phi/common/data_type.h"
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#endif
#endif
...
@@ -478,6 +479,8 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -478,6 +479,8 @@ class AnalysisPredictor : public PaddlePredictor {
std
::
vector
<
framework
::
OpDesc
*>
fetches_
;
std
::
vector
<
framework
::
OpDesc
*>
fetches_
;
std
::
map
<
size_t
,
std
::
string
>
idx2fetches_
;
std
::
map
<
size_t
,
std
::
string
>
idx2fetches_
;
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
#if PADDLE_WITH_MKLDNN
#if PADDLE_WITH_MKLDNN
// Helper class to perform quantization
// Helper class to perform quantization
class
MkldnnQuantizer
;
class
MkldnnQuantizer
;
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
c7694b82
...
@@ -167,6 +167,14 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -167,6 +167,14 @@ struct PD_INFER_DECL AnalysisConfig {
kFloat32
=
0
,
///< fp32
kFloat32
=
0
,
///< fp32
kInt8
,
///< int8
kInt8
,
///< int8
kHalf
,
///< fp16
kHalf
,
///< fp16
kBf16
,
///< bf16
};
enum
class
Backend
{
kCPU
=
0
,
kGPU
,
kXPU
,
kNPU
,
};
};
///
///
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
c7694b82
...
@@ -25,6 +25,7 @@ limitations under the License. */
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <unordered_set>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
@@ -46,6 +47,7 @@ namespace paddle_infer {
...
@@ -46,6 +47,7 @@ namespace paddle_infer {
using
PrecisionType
=
paddle
::
AnalysisConfig
::
Precision
;
using
PrecisionType
=
paddle
::
AnalysisConfig
::
Precision
;
using
Config
=
paddle
::
AnalysisConfig
;
using
Config
=
paddle
::
AnalysisConfig
;
using
DistConfig
=
paddle
::
DistConfig
;
using
DistConfig
=
paddle
::
DistConfig
;
using
BackendType
=
paddle
::
AnalysisConfig
::
Backend
;
///
///
/// \class Predictor
/// \class Predictor
...
@@ -183,6 +185,16 @@ PD_INFER_DECL std::tuple<int, int, int> GetTrtCompileVersion();
...
@@ -183,6 +185,16 @@ PD_INFER_DECL std::tuple<int, int, int> GetTrtCompileVersion();
PD_INFER_DECL
std
::
tuple
<
int
,
int
,
int
>
GetTrtRuntimeVersion
();
PD_INFER_DECL
std
::
tuple
<
int
,
int
,
int
>
GetTrtRuntimeVersion
();
PD_INFER_DECL
std
::
string
UpdateDllFlag
(
const
char
*
name
,
const
char
*
value
);
PD_INFER_DECL
std
::
string
UpdateDllFlag
(
const
char
*
name
,
const
char
*
value
);
PD_INFER_DECL
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
PrecisionType
mixed_precision
,
BackendType
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{});
namespace
services
{
namespace
services
{
///
///
/// \class PredictorPool
/// \class PredictorPool
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
c7694b82
...
@@ -52,6 +52,7 @@ std::string PaddlePassBuilder::DebugString() {
...
@@ -52,6 +52,7 @@ std::string PaddlePassBuilder::DebugString() {
}
}
void
PaddlePassBuilder
::
DeletePass
(
const
std
::
string
&
pass_type
)
{
void
PaddlePassBuilder
::
DeletePass
(
const
std
::
string
&
pass_type
)
{
deleted_passes_
.
insert
(
pass_type
);
auto
it
=
std
::
begin
(
passes_
);
auto
it
=
std
::
begin
(
passes_
);
while
(
it
!=
std
::
end
(
passes_
))
{
while
(
it
!=
std
::
end
(
passes_
))
{
if
(
*
it
==
pass_type
)
{
if
(
*
it
==
pass_type
)
{
...
@@ -149,6 +150,19 @@ const std::vector<std::string> kLiteSubgraphPasses({
...
@@ -149,6 +150,19 @@ const std::vector<std::string> kLiteSubgraphPasses({
#endif
#endif
});
});
// TODO(inference): Most of the existing pass fusion operators do not
// support fp16/bf16 precision, temporarily use low precision pass to prevent
// running errors. After fusion operator supports low precision, delete this.
const
std
::
vector
<
std
::
string
>
kGpuLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
};
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
"tensorrt_subgraph_pass"
,
};
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
passes_
.
assign
({
// "identity_scale_op_clean_pass", //
// "identity_scale_op_clean_pass", //
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
c7694b82
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle_infer_declare.h" // NOLINT
#include "paddle_infer_declare.h" // NOLINT
...
@@ -106,6 +107,10 @@ class PD_INFER_DECL PaddlePassBuilder {
...
@@ -106,6 +107,10 @@ class PD_INFER_DECL PaddlePassBuilder {
return
passes
;
return
passes
;
}
}
const
std
::
unordered_set
<
std
::
string
>
&
GetAllDeletedPasses
()
const
{
return
deleted_passes_
;
}
protected:
protected:
/// \cond Protected
/// \cond Protected
std
::
vector
<
std
::
string
>
analysis_passes_
{
std
::
vector
<
std
::
string
>
analysis_passes_
{
...
@@ -116,6 +121,7 @@ class PD_INFER_DECL PaddlePassBuilder {
...
@@ -116,6 +121,7 @@ class PD_INFER_DECL PaddlePassBuilder {
"adjust_cudnn_workspace_size_pass"
,
"adjust_cudnn_workspace_size_pass"
,
"inference_op_replace_pass"
}};
"inference_op_replace_pass"
}};
std
::
vector
<
std
::
string
>
passes_
;
std
::
vector
<
std
::
string
>
passes_
;
std
::
unordered_set
<
std
::
string
>
deleted_passes_
;
/// \endcond
/// \endcond
};
};
...
@@ -177,6 +183,8 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -177,6 +183,8 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool
use_ipu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_custom_device_
{
false
};
bool
use_custom_device_
{
false
};
bool
use_gpu_low_precision_
{
false
};
/// \endcond
/// \endcond
};
};
...
@@ -328,4 +336,10 @@ PD_INFER_DECL extern const std::vector<std::string> kDlnneSubgraphPasses;
...
@@ -328,4 +336,10 @@ PD_INFER_DECL extern const std::vector<std::string> kDlnneSubgraphPasses;
/// \brief List of lite subgraph passes.
/// \brief List of lite subgraph passes.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
/// \brief TODO(inference): Most of the existing pass fusion operators do not
/// support fp16/bf16 precision, temporarily use low precision pass to prevent
/// running errors. After fusion operator supports low precision, delete this.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kGpuLowerPrecisionPasses
;
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
;
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/utils/CMakeLists.txt
浏览文件 @
c7694b82
...
@@ -10,6 +10,10 @@ cc_library(
...
@@ -10,6 +10,10 @@ cc_library(
infer_io_utils
infer_io_utils
SRCS io_utils.cc
SRCS io_utils.cc
DEPS paddle_inference_api lod_tensor shape_range_info_proto
)
DEPS paddle_inference_api lod_tensor shape_range_info_proto
)
cc_library
(
model_utils
SRCS model_utils.cc
DEPS proto_desc enforce
)
cc_test
(
cc_test
(
infer_io_utils_tester
infer_io_utils_tester
SRCS io_utils_tester.cc
SRCS io_utils_tester.cc
...
...
paddle/fluid/inference/utils/model_utils.cc
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/utils/model_utils.h"
#include <set>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
using
paddle
::
framework
::
proto
::
VarType
;
// Get all model's weights and return the data_type, e.g., fp16/bf16 or fp32.
phi
::
DataType
GetModelPrecision
(
const
framework
::
ProgramDesc
&
program
)
{
std
::
set
<
VarType
::
Type
>
model_types
{
VarType
::
FP32
,
VarType
::
FP16
,
VarType
::
BF16
,
};
phi
::
DataType
ret
=
phi
::
DataType
::
FLOAT32
;
size_t
block_size
=
program
.
Size
();
for
(
size_t
i
=
0
;
i
<
block_size
;
++
i
)
{
const
auto
&
block
=
program
.
Block
(
i
);
for
(
auto
*
var
:
block
.
AllVars
())
{
if
(
!
(
var
->
GetType
()
==
VarType
::
LOD_TENSOR
||
var
->
GetType
()
==
VarType
::
LOD_TENSOR_ARRAY
))
continue
;
if
(
!
var
->
Persistable
())
continue
;
auto
t
=
var
->
GetDataType
();
if
(
!
model_types
.
count
(
t
))
continue
;
if
(
t
==
VarType
::
FP16
)
{
if
(
ret
!=
phi
::
DataType
::
FLOAT32
&&
ret
!=
phi
::
DataType
::
FLOAT16
)
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"The model's weights already has been set %s type, but also has "
"%s type, which is an error, please check the model."
,
ret
,
phi
::
DataType
::
FLOAT16
));
}
ret
=
phi
::
DataType
::
FLOAT16
;
}
else
if
(
t
==
VarType
::
BF16
)
{
if
(
ret
!=
phi
::
DataType
::
FLOAT32
&&
ret
!=
phi
::
DataType
::
BFLOAT16
)
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"The model's weights already has been set %s type, but also has "
"%s type, which is an error, please check the model."
,
ret
,
phi
::
DataType
::
BFLOAT16
));
}
ret
=
phi
::
DataType
::
BFLOAT16
;
}
}
}
return
ret
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/utils/model_utils.h
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
// Get all model's weights and return the data_type, e.g., fp16/bf16 or fp32.
phi
::
DataType
GetModelPrecision
(
const
framework
::
ProgramDesc
&
program
);
}
// namespace inference
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录