Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c7694b82
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c7694b82
编写于
6月 29, 2022
作者:
W
Wilber
提交者:
GitHub
6月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
inference support mixed-precision model [1]. (#43814)
* inference add convert to mixed model ability.
上级
8fa8e17e
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
846 addition
and
28 deletion
+846
-28
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+4
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+5
-0
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+452
-0
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
...id/inference/analysis/passes/convert_to_mixed_precision.h
+59
-0
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+62
-27
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+2
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+100
-0
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+8
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+12
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+14
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+14
-0
paddle/fluid/inference/utils/CMakeLists.txt
paddle/fluid/inference/utils/CMakeLists.txt
+4
-0
paddle/fluid/inference/utils/model_utils.cc
paddle/fluid/inference/utils/model_utils.cc
+74
-0
paddle/fluid/inference/utils/model_utils.h
paddle/fluid/inference/utils/model_utils.h
+31
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
c7694b82
...
...
@@ -36,6 +36,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -328,6 +329,9 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
use_npu
,
UseNpu
,
bool
);
DECL_ARGUMENT_FIELD
(
npu_device_id
,
NPUDeviceId
,
int
);
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
};
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
c7694b82
...
...
@@ -86,6 +86,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
std
::
string
dot_file_path
;
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
浏览文件 @
c7694b82
...
...
@@ -10,6 +10,10 @@ cc_library(
memory_optim_pass
SRCS memory_optimize_pass.cc
DEPS analysis_pass zero_copy_tensor
)
cc_library
(
convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass
)
cc_library
(
ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc
...
...
@@ -46,6 +50,7 @@ cc_library(
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
convert_to_mixed_precision
inference_op_replace_pass
ir_graph_to_program_pass
ir_graph_clean_pass
)
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/tensor_meta.h"
using
namespace
paddle
::
framework
;
// NOLINT
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
{
bool
IsKernelSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
auto
kernels
=
phi
::
KernelFactory
::
Instance
().
kernels
();
if
(
kernels
.
find
(
op_type
)
==
kernels
.
end
())
{
return
false
;
}
phi
::
KernelKey
kernel_key
(
backend
,
layout
,
data_type
);
return
phi
::
KernelFactory
::
Instance
().
HasKernel
(
op_type
,
kernel_key
);
}
bool
GpuKernelSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
bool
res
=
IsKernelSupportPrecision
(
op_type
,
phi
::
Backend
::
GPU
,
data_type
,
layout
);
res
|=
IsKernelSupportPrecision
(
op_type
,
phi
::
Backend
::
GPUDNN
,
data_type
,
layout
);
return
res
;
}
// Just process special cases.
bool
OutShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_node
=
var_node
->
inputs
[
0
];
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm's input and output (variance and mean) are the same.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Output
(
"MeanOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"VarianceOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedMean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedVariance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
return
false
;
}
// Just process special cases for weights conversion.
bool
WeightsShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_nodes
=
var_node
->
outputs
;
for
(
auto
*
op_node
:
op_nodes
)
{
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm op's bias, mean, scale and variance just be float32, so we can
// not convert the dtype.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Mean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Scale"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Variance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
}
return
false
;
}
void
ConvertTensorDtype
(
framework
::
ir
::
Graph
*
graph
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
,
bool
keep_io_types
,
phi
::
Backend
backend
,
phi
::
DataType
tensor_dtype
)
{
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
tensor_dtype
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
tensor_dtype
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only support "
"fp16 and bf16."
,
static_cast
<
int
>
(
tensor_dtype
)));
}
int
num_low_precision
=
0
;
int
suffix
=
0
;
framework
::
BlockDesc
*
block_desc
{
nullptr
};
std
::
vector
<
framework
::
ir
::
Node
*>
output_nodes
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
cast_map
;
for
(
auto
*
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
auto
phi_op_type
=
phi
::
TransToPhiKernelName
(
op_type
);
// LOG(INFO) << "process op " << op_type << ", corresponding phi type is "
// << phi_op_type;
// 1. set input dtype.
if
(
op_type
==
"feed"
)
{
block_desc
=
op_node
->
Op
()
->
Block
();
auto
feed_var
=
op_node
->
outputs
[
0
]
->
Var
();
if
(
!
keep_io_types
&&
feed_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
feed_var
->
SetDataType
(
to_type
);
}
}
else
if
(
op_type
==
"fetch"
)
{
auto
*
fetch_var
=
op_node
->
inputs
[
0
];
output_nodes
.
push_back
(
fetch_var
);
continue
;
}
// 2. if op support fp16/bf16 and not in blacklist.
// - cast weight to fp16/bf16.
// - add cast op if the input dtype is not fp16/bf16.
// - set output dtype.
else
if
(
blacklist
.
count
(
phi_op_type
)
==
0
)
{
// NOLINT
bool
support_precision
=
OpSupportPrecision
(
phi_op_type
,
backend
,
tensor_dtype
,
blacklist
);
VLOG
(
2
)
<<
"phi_op_type "
<<
phi_op_type
<<
" support low precision "
<<
support_precision
;
if
(
support_precision
)
{
++
num_low_precision
;
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
WeightsShouldNotConvert
(
in_node
))
continue
;
in_var
->
SetDataType
(
to_type
);
}
else
if
(
!
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
!=
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var
->
GetDataType
(),
to_type
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
for
(
auto
*
out_node
:
op_node
->
outputs
)
{
auto
*
out_var
=
out_node
->
Var
();
if
(
out_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
OutShouldNotConvert
(
out_node
))
continue
;
out_var
->
SetDataType
(
to_type
);
}
}
}
else
{
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
!
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
!=
framework
::
proto
::
VarType
::
FP32
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var
->
GetDataType
(),
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
}
}
// 3. check op not support fp16/bf16 or in blacklist.
// - add cast op if the input dtype is not fp32.
else
{
// NOLINT
// trt pass should explicitle add cast op is input is bf16/tf32, etc.
if
(
op_node
->
Name
()
==
"tensorrt_engine"
)
continue
;
for
(
auto
*
in_node
:
op_node
->
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
in_var
->
GetDataType
()
==
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
}
}
// 4. if output_op's dtype is not compatible to output dtype, then just insert
// cast.
for
(
auto
*
node
:
output_nodes
)
{
auto
var
=
node
->
Var
();
if
(
keep_io_types
&&
var
->
GetDataType
()
==
to_type
)
{
// fp16/bf16 -> fp32.
AddCastOp
(
graph
,
node
,
node
->
outputs
[
0
],
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix
,
block_desc
,
&
cast_map
);
}
else
if
(
!
keep_io_types
&&
var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
// fp32 -> fp16/bf16
AddCastOp
(
graph
,
node
,
node
->
outputs
[
0
],
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
cast_map
);
}
}
if
(
num_low_precision
)
LOG
(
INFO
)
<<
"--- detected "
<<
num_low_precision
<<
" low precision ops"
;
}
}
// namespace
bool
OpSupportPrecision
(
const
std
::
string
&
phi_op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
)
{
bool
support_precision
=
false
;
if
(
blacklist
.
count
(
phi_op_type
)
==
0
)
{
if
(
backend
==
phi
::
Backend
::
GPU
)
support_precision
=
GpuKernelSupportPrecision
(
phi_op_type
,
precision
);
else
support_precision
=
IsKernelSupportPrecision
(
phi_op_type
,
backend
,
precision
);
}
return
support_precision
;
}
void
AddCastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
)
{
auto
update_cast_desc
=
[
&
](
framework
::
OpDesc
&
desc
,
const
std
::
string
&
x_name
,
const
std
::
string
&
out_name
,
const
int
in_dtype
,
const
int
out_dtype
)
{
desc
.
SetType
(
"cast"
);
desc
.
SetInput
(
"X"
,
{
x_name
});
desc
.
SetOutput
(
"Out"
,
{
out_name
});
desc
.
SetAttr
(
"in_dtype"
,
in_dtype
);
desc
.
SetAttr
(
"out_dtype"
,
out_dtype
);
desc
.
SetAttr
(
"use_mkldnn"
,
false
);
desc
.
SetAttr
(
"with_quant_attr"
,
false
);
desc
.
Flush
();
};
if
(
map
->
count
(
node
)
==
0
)
{
// insert cast op before node.
std
::
string
cast_input_name
=
node
->
Var
()
->
Name
();
std
::
string
cast_output_name
=
node
->
Var
()
->
Name
()
+
"_cast.tmp_"
+
std
::
to_string
((
*
suffix
)
++
);
CHECK_NOTNULL
(
block_desc
);
framework
::
OpDesc
cast_op_desc
(
block_desc
);
update_cast_desc
(
cast_op_desc
,
cast_input_name
,
cast_output_name
,
static_cast
<
int
>
(
from_type
),
static_cast
<
int
>
(
to_type
));
auto
*
cast_op_node
=
graph
->
CreateOpNode
(
&
cast_op_desc
);
auto
*
cast_output_vardesc
=
block_desc
->
Var
(
cast_output_name
);
cast_output_vardesc
->
SetPersistable
(
false
);
cast_output_vardesc
->
SetDataType
(
to_type
);
cast_output_vardesc
->
SetShape
(
node
->
Var
()
->
GetShape
());
auto
*
cast_output_node
=
graph
->
CreateVarNode
(
cast_output_vardesc
);
IR_NODE_LINK_TO
(
cast_op_node
,
cast_output_node
);
(
*
map
)[
node
]
=
cast_output_node
;
}
next_op
->
Op
()
->
RenameInput
(
node
->
Name
(),
map
->
at
(
node
)
->
Name
());
IR_NODE_LINK_TO
(
node
,
map
->
at
(
node
)
->
inputs
[
0
]);
IR_NODE_LINK_TO
(
map
->
at
(
node
),
next_op
);
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
{
paddle
::
CPUPlace
place
;
framework
::
Executor
executor
(
place
);
framework
::
Scope
scope
;
auto
program_desc
=
inference
::
Load
(
&
executor
,
&
scope
,
model_file
,
params_file
);
auto
graph
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
new
framework
::
ir
::
Graph
(
*
program_desc
));
ConvertTensorDtype
(
graph
.
get
(),
black_list
,
keep_io_types
,
backend
,
mixed_precision
);
framework
::
ProgramDesc
mixed_program_desc
;
framework
::
ir
::
GraphToProgram
(
*
graph
,
&
mixed_program_desc
);
auto
parameters
=
scope
.
LocalVarNames
();
std
::
sort
(
parameters
.
begin
(),
parameters
.
end
());
auto
serialize_params
=
[](
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>&
params
)
->
std
::
string
{
std
::
ostringstream
os
;
platform
::
CPUDeviceContext
ctx
;
for
(
const
auto
&
param
:
params
)
{
VLOG
(
3
)
<<
"Serialize param: "
<<
param
;
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
param
),
platform
::
errors
::
NotFound
(
"Block should already have a '%s' variable"
,
param
));
auto
*
tensor
=
scope
->
FindVar
(
param
)
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
SerializeToStream
(
os
,
*
tensor
,
ctx
);
}
return
os
.
str
();
};
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
node
->
IsOp
())
continue
;
auto
*
op_desc
=
node
->
Op
();
if
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
)
continue
;
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Mean"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Scale"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Variance"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
}
}
for
(
const
auto
&
param_name
:
parameters
)
{
auto
*
var
=
scope
.
FindLocalVar
(
param_name
);
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
mixed_tensor
;
mixed_tensor
.
Resize
(
t
->
dims
());
auto
*
data
=
t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
if
(
mixed_precision
==
phi
::
DataType
::
FLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
mixed_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
auto
*
mixed_data
=
mixed_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
mixed_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
mixed_tensor
,
place
,
t
);
}
else
if
(
mixed_precision
==
phi
::
DataType
::
BFLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
mixed_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
BFLOAT16
);
auto
*
mixed_data
=
mixed_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
mixed_data
[
i
]
=
static_cast
<
bfloat16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
mixed_tensor
,
place
,
t
);
}
}
}
auto
StrToBinary
=
[](
const
std
::
string
&
path
,
const
std
::
string
&
str
)
{
std
::
ofstream
file
(
path
.
c_str
(),
std
::
ios
::
binary
);
file
.
write
(
str
.
c_str
(),
str
.
size
());
file
.
close
();
};
StrToBinary
(
mixed_model_file
,
mixed_program_desc
.
Proto
()
->
SerializeAsString
());
StrToBinary
(
mixed_params_file
,
serialize_params
(
&
scope
,
parameters
));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
bool
OpSupportPrecision
(
const
std
::
string
&
phi_op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
);
void
AddCastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
);
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{});
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
c7694b82
...
...
@@ -14,10 +14,16 @@
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include <unordered_set>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -106,34 +112,63 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
if
(
with_dynamic_shape
)
{
reserve_cpu_weights
=
true
;
}
for
(
auto
&
var_name
:
all_vars
)
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
}
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
// Copy the parameter data to a tmp tensor.
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
// Reallocation the space on GPU
t
->
clear
();
// Copy parameter data to newly allocated GPU space.
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
graph
))
{
if
(
!
node
->
IsOp
())
continue
;
if
(
node
->
Op
()
->
Type
()
==
"feed"
||
node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
for
(
auto
*
var_node
:
node
->
inputs
)
{
if
(
!
var_node
->
Var
()
->
Persistable
())
continue
;
auto
var_name
=
var_node
->
Var
()
->
Name
();
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
}
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
var_data_type
=
var_node
->
Var
()
->
GetDataType
();
VLOG
(
5
)
<<
"var_name is "
<<
var_name
<<
", data type is "
<<
var_data_type
;
if
(
var_data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP16
)
{
framework
::
Tensor
half_tensor
;
half_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
half_tensor
.
Resize
(
t
->
dims
());
auto
*
half_data
=
half_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
auto
*
data
=
t
->
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
half_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
half_tensor
,
place
,
t
);
}
else
if
(
var_data_type
==
paddle
::
framework
::
proto
::
VarType
::
BF16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
BFLOAT16
);
bf16_tensor
.
Resize
(
t
->
dims
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
platform
::
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
auto
*
data
=
t
->
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
bf16_data
[
i
]
=
static_cast
<
platform
::
bfloat16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
bf16_tensor
,
place
,
t
);
}
else
{
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
}
}
}
}
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
c7694b82
...
...
@@ -82,6 +82,7 @@ if(WITH_ONNXRUNTIME)
ir_pass_manager
op_compatible_info
infer_io_utils
model_utils
onnxruntime
paddle2onnx
)
else
()
...
...
@@ -90,7 +91,7 @@ else()
SRCS analysis_predictor.cc resource_manager.cc infer_context.cc
${
mkldnn_quantizer_src
}
DEPS
${
inference_deps
}
zero_copy_tensor ir_pass_manager op_compatible_info
infer_io_utils
)
infer_io_utils
model_utils
)
endif
()
cc_test
(
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
c7694b82
...
...
@@ -36,12 +36,15 @@
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/infer_context.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/model_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
...
...
@@ -50,6 +53,8 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/utils/string/split.h"
...
...
@@ -102,6 +107,43 @@ bool IsPersistable(const framework::VarDesc *var) {
}
return
false
;
}
phi
::
DataType
ConvertPrecision
(
AnalysisConfig
::
Precision
precision
)
{
switch
(
precision
)
{
case
AnalysisConfig
::
Precision
::
kFloat32
:
return
phi
::
DataType
::
FLOAT32
;
case
AnalysisConfig
::
Precision
::
kHalf
:
return
phi
::
DataType
::
FLOAT16
;
case
AnalysisConfig
::
Precision
::
kBf16
:
return
phi
::
DataType
::
BFLOAT16
;
case
AnalysisConfig
::
Precision
::
kInt8
:
return
phi
::
DataType
::
INT8
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support precision. We now only support "
"Float32, Half, Bfloat16 and Int8"
));
return
phi
::
DataType
::
FLOAT32
;
}
}
phi
::
Backend
ConvertBackend
(
AnalysisConfig
::
Backend
backend
)
{
switch
(
backend
)
{
case
AnalysisConfig
::
Backend
::
kGPU
:
// NOTE: phi also support phi::Backend::GPUDNN.
return
phi
::
Backend
::
GPU
;
case
AnalysisConfig
::
Backend
::
kNPU
:
return
phi
::
Backend
::
NPU
;
case
AnalysisConfig
::
Backend
::
kXPU
:
return
phi
::
Backend
::
XPU
;
case
AnalysisConfig
::
Backend
::
kCPU
:
return
phi
::
Backend
::
CPU
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support backend, we now only support GPU, XPU, "
"NPU and CPU."
));
return
phi
::
Backend
::
CPU
;
}
}
}
// namespace
bool
PaddleTensorToLoDTensor
(
const
PaddleTensor
&
pt
,
...
...
@@ -476,6 +518,8 @@ bool AnalysisPredictor::PrepareProgram(
// if enable_ir_optim_ is false,
// the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will
// not be executed.
model_precision_
=
paddle
::
inference
::
GetModelPrecision
(
*
inference_program_
);
OptimizeInferenceProgram
();
}
else
{
// If the program is passed from external, no need to optimize it, this
...
...
@@ -1129,6 +1173,40 @@ void AnalysisPredictor::PrepareArgument() {
#endif
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
LOG
(
INFO
)
<<
"Model is mixed precision type with "
<<
model_precision_
<<
", we will use a new PassStrategy. Note that only the GPU "
"backend is supported for now."
;
passes
.
clear
();
if
(
config_
.
tensorrt_engine_enabled
())
{
for
(
const
auto
&
pass
:
kTrtLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
}
}
else
if
(
config_
.
use_gpu
())
{
for
(
const
auto
&
pass
:
kGpuLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
}
}
const
auto
&
deleted_passes
=
config_
.
pass_builder
()
->
GetAllDeletedPasses
();
for
(
const
auto
&
it
:
deleted_passes
)
{
auto
iterator
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
it
);
if
(
iterator
!=
passes
.
end
())
{
passes
.
erase
(
iterator
);
}
}
if
(
config_
.
ir_debug_
)
{
auto
it
=
std
::
begin
(
passes
);
while
(
it
!=
std
::
end
(
passes
))
{
if
(
*
it
!=
"graph_viz_pass"
)
{
it
=
passes
.
insert
(
it
+
1
,
"graph_viz_pass"
);
}
else
{
++
it
;
}
}
}
}
if
(
!
config_
.
ir_optim
())
{
passes
.
clear
();
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed"
;
...
...
@@ -1137,6 +1215,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
@@ -2112,6 +2192,26 @@ std::string UpdateDllFlag(const char *name, const char *value) {
return
paddle
::
UpdateDllFlag
(
name
,
value
);
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
PrecisionType
mixed_precision
,
BackendType
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
{
auto
phi_backend
=
paddle
::
ConvertBackend
(
backend
);
auto
phi_precision
=
paddle
::
ConvertPrecision
(
mixed_precision
);
paddle
::
inference
::
analysis
::
ConvertToMixedPrecision
(
model_file
,
params_file
,
mixed_model_file
,
mixed_params_file
,
phi_precision
,
phi_backend
,
keep_io_types
,
black_list
);
}
}
// namespace paddle_infer
namespace
paddle_infer
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
c7694b82
...
...
@@ -18,6 +18,7 @@
#include <memory>
#include <string>
#include <vector>
#include "paddle/phi/common/data_type.h"
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#endif
...
...
@@ -478,6 +479,8 @@ class AnalysisPredictor : public PaddlePredictor {
std
::
vector
<
framework
::
OpDesc
*>
fetches_
;
std
::
map
<
size_t
,
std
::
string
>
idx2fetches_
;
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
#if PADDLE_WITH_MKLDNN
// Helper class to perform quantization
class
MkldnnQuantizer
;
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
c7694b82
...
...
@@ -167,6 +167,14 @@ struct PD_INFER_DECL AnalysisConfig {
kFloat32
=
0
,
///< fp32
kInt8
,
///< int8
kHalf
,
///< fp16
kBf16
,
///< bf16
};
enum
class
Backend
{
kCPU
=
0
,
kGPU
,
kXPU
,
kNPU
,
};
///
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
c7694b82
...
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
...
...
@@ -46,6 +47,7 @@ namespace paddle_infer {
using
PrecisionType
=
paddle
::
AnalysisConfig
::
Precision
;
using
Config
=
paddle
::
AnalysisConfig
;
using
DistConfig
=
paddle
::
DistConfig
;
using
BackendType
=
paddle
::
AnalysisConfig
::
Backend
;
///
/// \class Predictor
...
...
@@ -183,6 +185,16 @@ PD_INFER_DECL std::tuple<int, int, int> GetTrtCompileVersion();
PD_INFER_DECL
std
::
tuple
<
int
,
int
,
int
>
GetTrtRuntimeVersion
();
PD_INFER_DECL
std
::
string
UpdateDllFlag
(
const
char
*
name
,
const
char
*
value
);
PD_INFER_DECL
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
PrecisionType
mixed_precision
,
BackendType
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{});
namespace
services
{
///
/// \class PredictorPool
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
c7694b82
...
...
@@ -52,6 +52,7 @@ std::string PaddlePassBuilder::DebugString() {
}
void
PaddlePassBuilder
::
DeletePass
(
const
std
::
string
&
pass_type
)
{
deleted_passes_
.
insert
(
pass_type
);
auto
it
=
std
::
begin
(
passes_
);
while
(
it
!=
std
::
end
(
passes_
))
{
if
(
*
it
==
pass_type
)
{
...
...
@@ -149,6 +150,19 @@ const std::vector<std::string> kLiteSubgraphPasses({
#endif
});
// TODO(inference): Most of the existing pass fusion operators do not
// support fp16/bf16 precision, temporarily use low precision pass to prevent
// running errors. After fusion operator supports low precision, delete this.
const
std
::
vector
<
std
::
string
>
kGpuLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
};
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
"tensorrt_subgraph_pass"
,
};
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
// "identity_scale_op_clean_pass", //
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
c7694b82
...
...
@@ -16,6 +16,7 @@
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle_infer_declare.h" // NOLINT
...
...
@@ -106,6 +107,10 @@ class PD_INFER_DECL PaddlePassBuilder {
return
passes
;
}
const
std
::
unordered_set
<
std
::
string
>
&
GetAllDeletedPasses
()
const
{
return
deleted_passes_
;
}
protected:
/// \cond Protected
std
::
vector
<
std
::
string
>
analysis_passes_
{
...
...
@@ -116,6 +121,7 @@ class PD_INFER_DECL PaddlePassBuilder {
"adjust_cudnn_workspace_size_pass"
,
"inference_op_replace_pass"
}};
std
::
vector
<
std
::
string
>
passes_
;
std
::
unordered_set
<
std
::
string
>
deleted_passes_
;
/// \endcond
};
...
...
@@ -177,6 +183,8 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_custom_device_
{
false
};
bool
use_gpu_low_precision_
{
false
};
/// \endcond
};
...
...
@@ -328,4 +336,10 @@ PD_INFER_DECL extern const std::vector<std::string> kDlnneSubgraphPasses;
/// \brief List of lite subgraph passes.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
/// \brief TODO(inference): Most of the existing pass fusion operators do not
/// support fp16/bf16 precision, temporarily use low precision pass to prevent
/// running errors. After fusion operator supports low precision, delete this.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kGpuLowerPrecisionPasses
;
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
;
}
// namespace paddle
paddle/fluid/inference/utils/CMakeLists.txt
浏览文件 @
c7694b82
...
...
@@ -10,6 +10,10 @@ cc_library(
infer_io_utils
SRCS io_utils.cc
DEPS paddle_inference_api lod_tensor shape_range_info_proto
)
cc_library
(
model_utils
SRCS model_utils.cc
DEPS proto_desc enforce
)
cc_test
(
infer_io_utils_tester
SRCS io_utils_tester.cc
...
...
paddle/fluid/inference/utils/model_utils.cc
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/utils/model_utils.h"
#include <set>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
using
paddle
::
framework
::
proto
::
VarType
;
// Get all model's weights and return the data_type, e.g., fp16/bf16 or fp32.
phi
::
DataType
GetModelPrecision
(
const
framework
::
ProgramDesc
&
program
)
{
std
::
set
<
VarType
::
Type
>
model_types
{
VarType
::
FP32
,
VarType
::
FP16
,
VarType
::
BF16
,
};
phi
::
DataType
ret
=
phi
::
DataType
::
FLOAT32
;
size_t
block_size
=
program
.
Size
();
for
(
size_t
i
=
0
;
i
<
block_size
;
++
i
)
{
const
auto
&
block
=
program
.
Block
(
i
);
for
(
auto
*
var
:
block
.
AllVars
())
{
if
(
!
(
var
->
GetType
()
==
VarType
::
LOD_TENSOR
||
var
->
GetType
()
==
VarType
::
LOD_TENSOR_ARRAY
))
continue
;
if
(
!
var
->
Persistable
())
continue
;
auto
t
=
var
->
GetDataType
();
if
(
!
model_types
.
count
(
t
))
continue
;
if
(
t
==
VarType
::
FP16
)
{
if
(
ret
!=
phi
::
DataType
::
FLOAT32
&&
ret
!=
phi
::
DataType
::
FLOAT16
)
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"The model's weights already has been set %s type, but also has "
"%s type, which is an error, please check the model."
,
ret
,
phi
::
DataType
::
FLOAT16
));
}
ret
=
phi
::
DataType
::
FLOAT16
;
}
else
if
(
t
==
VarType
::
BF16
)
{
if
(
ret
!=
phi
::
DataType
::
FLOAT32
&&
ret
!=
phi
::
DataType
::
BFLOAT16
)
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"The model's weights already has been set %s type, but also has "
"%s type, which is an error, please check the model."
,
ret
,
phi
::
DataType
::
BFLOAT16
));
}
ret
=
phi
::
DataType
::
BFLOAT16
;
}
}
}
return
ret
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/utils/model_utils.h
0 → 100644
浏览文件 @
c7694b82
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
// Get all model's weights and return the data_type, e.g., fp16/bf16 or fp32.
phi
::
DataType
GetModelPrecision
(
const
framework
::
ProgramDesc
&
program
);
}
// namespace inference
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录