Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7985407b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7985407b
编写于
6月 24, 2022
作者:
W
Wilber
提交者:
GitHub
6月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
revert 40531 (#43807)
* revert 40531 * update
上级
69717717
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
326 addition
and
580 deletion
+326
-580
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+0
-1
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
+0
-151
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
+0
-39
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+95
-66
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+21
-17
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+26
-63
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
...rence/analysis/passes/ir_params_sync_among_devices_pass.h
+1
-6
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+56
-62
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-5
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+9
-32
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+14
-34
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+0
-34
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+5
-14
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+99
-56
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
7985407b
...
...
@@ -157,7 +157,6 @@ pass_library(layer_norm_fuse_pass inference)
pass_library
(
add_support_int8_pass inference
)
pass_library
(
matmul_scale_fuse_pass inference
)
pass_library
(
gpu_cpu_map_matmul_to_mul_pass inference
)
pass_library
(
mixed_precision_configure_pass inference
)
pass_library
(
dense_fc_to_sparse_pass inference
)
pass_library
(
dense_multihead_matmul_to_sparse_pass inference
)
pass_library
(
generate_pass DEPS pass_desc_proto
)
...
...
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
已删除
100644 → 0
浏览文件 @
69717717
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mixed_precision_configure_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
MixedPrecisionConfigurePass
::
InsertCastOps
(
Graph
*
graph
,
const
StringSet
&
blacklist
)
const
{
VLOG
(
3
)
<<
"Insert the cast op before and after the kernel that does not "
"supports fp16 precision"
;
auto
update_cast_desc
=
[
&
](
framework
::
OpDesc
&
desc
,
const
std
::
string
&
x_name
,
const
std
::
string
&
out_name
,
const
int
in_dtype
,
const
int
out_dtype
)
{
desc
.
SetType
(
"cast"
);
desc
.
SetInput
(
"X"
,
{
x_name
});
desc
.
SetOutput
(
"Out"
,
{
out_name
});
desc
.
SetAttr
(
"in_dtype"
,
in_dtype
);
desc
.
SetAttr
(
"out_dtype"
,
out_dtype
);
desc
.
SetAttr
(
"use_mkldnn"
,
false
);
desc
.
SetAttr
(
"with_quant_attr"
,
false
);
desc
.
Flush
();
};
auto
cast_input
=
[
&
](
Graph
*
graph
,
Node
*
op_node
,
const
StringSet
&
cast_list
)
{
auto
inlinks
=
op_node
->
inputs
;
for
(
auto
*
pre_node
:
inlinks
)
{
if
(
pre_node
->
IsVar
())
{
const
auto
is_persistable
=
pre_node
->
Var
()
->
Persistable
();
const
auto
is_float
=
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP16
||
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP32
||
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP64
;
if
(
!
is_persistable
&&
is_float
)
{
int
suffix
=
0
;
for
(
auto
*
pre_node_input
:
pre_node
->
inputs
)
{
if
(
!
pre_node_input
->
IsOp
())
continue
;
const
auto
&
type
=
pre_node_input
->
Op
()
->
Type
();
if
(
!
cast_list
.
count
(
type
)
&&
type
!=
"cast"
)
{
std
::
string
old_name
=
pre_node
->
Name
();
std
::
string
new_name
=
old_name
+
"_cast.tmp_"
+
std
::
to_string
(
suffix
);
suffix
++
;
framework
::
OpDesc
new_op_desc
(
op_node
->
Op
()
->
Block
());
// 4 for fp16, 5 for fp32
update_cast_desc
(
new_op_desc
,
old_name
,
new_name
,
4
,
5
);
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
VarDesc
out_var
(
new_name
);
out_var
.
SetPersistable
(
false
);
auto
*
node_var
=
graph
->
CreateVarNode
(
&
out_var
);
op_node
->
Op
()
->
RenameInput
(
old_name
,
new_name
);
IR_NODE_LINK_TO
(
pre_node
,
new_op
);
IR_NODE_LINK_TO
(
new_op
,
node_var
);
IR_NODE_LINK_TO
(
node_var
,
op_node
);
}
}
}
}
}
};
auto
cast_output
=
[
&
](
Graph
*
graph
,
Node
*
op_node
,
const
StringSet
&
cast_list
)
{
auto
outlinks
=
op_node
->
outputs
;
for
(
auto
*
next_node
:
outlinks
)
{
if
(
next_node
->
IsVar
())
{
const
auto
is_persistable
=
next_node
->
Var
()
->
Persistable
();
const
auto
is_float
=
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP16
||
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP32
||
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP64
;
if
(
!
is_persistable
&&
is_float
)
{
int
suffix
=
0
;
for
(
auto
*
next_node_output
:
next_node
->
outputs
)
{
if
(
!
next_node_output
->
IsOp
())
continue
;
const
auto
&
type
=
next_node_output
->
Op
()
->
Type
();
if
(
!
cast_list
.
count
(
type
)
&&
type
!=
"cast"
)
{
std
::
string
old_name
=
next_node
->
Name
();
std
::
string
new_name
=
old_name
+
"_cast.tmp_"
+
std
::
to_string
(
suffix
);
suffix
++
;
framework
::
OpDesc
new_op_desc
(
op_node
->
Op
()
->
Block
());
// 4 for fp16, 5 for fp32
update_cast_desc
(
new_op_desc
,
old_name
,
new_name
,
5
,
4
);
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
VarDesc
out_var
(
new_name
);
out_var
.
SetPersistable
(
false
);
auto
*
node_var
=
graph
->
CreateVarNode
(
&
out_var
);
next_node_output
->
Op
()
->
RenameInput
(
old_name
,
new_name
);
IR_NODE_LINK_TO
(
next_node
,
new_op
);
IR_NODE_LINK_TO
(
new_op
,
node_var
);
IR_NODE_LINK_TO
(
node_var
,
next_node_output
);
}
}
}
}
}
};
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
()
||
op_node
->
Op
()
->
Type
()
==
"feed"
||
op_node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
const
auto
&
type
=
op_node
->
Op
()
->
Type
();
if
(
blacklist
.
count
(
type
))
{
cast_input
(
graph
,
op_node
,
blacklist
);
cast_output
(
graph
,
op_node
,
blacklist
);
}
}
}
void
MixedPrecisionConfigurePass
::
ApplyImpl
(
Graph
*
graph
)
const
{
const
auto
blacklist
=
Get
<
std
::
unordered_set
<
std
::
string
>>
(
"gpu_fp16_disabled_op_types"
);
InsertCastOps
(
graph
,
blacklist
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
mixed_precision_configure_pass
,
paddle
::
framework
::
ir
::
MixedPrecisionConfigurePass
);
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
已删除
100644 → 0
浏览文件 @
69717717
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
using
StringSet
=
std
::
unordered_set
<
std
::
string
>
;
class
MixedPrecisionConfigurePass
:
public
FusePassBase
{
public:
MixedPrecisionConfigurePass
()
=
default
;
virtual
~
MixedPrecisionConfigurePass
()
{}
protected:
void
ApplyImpl
(
Graph
*
graph
)
const
override
;
private:
void
InsertCastOps
(
Graph
*
graph
,
const
StringSet
&
blacklist
)
const
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7985407b
...
...
@@ -80,7 +80,8 @@ struct Argument {
public: \
type__& field__() { \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
Has(#field__), \
true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return field__##_; \
} \
...
...
@@ -97,41 +98,45 @@ struct Argument {
#define DECL_ARGUMENT_FIELD_VALID(field__) \
bool field__##_valid() { return Has(#field__); }
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
"filed should not be null.")); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return *static_cast<type__*>(field__##_.get()); \
} \
void Set##Field(type__* x) { \
field__##_ = \
unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
valid_fields_.insert(#field__); \
} \
void Set##Field##NotOwned(type__* x) { \
valid_fields_.insert(#field__); \
field__##_ = unique_ptr_t(x, [](void* x) {}); \
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
type__* field__##_ptr() { \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.get()); \
} \
type__* Release##Field() { \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
\
private: \
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE_NOT_NULL( \
field__##_, \
platform::errors::PreconditionNotMet("filed should not be null.")); \
PADDLE_ENFORCE_EQ( \
Has(#field__), \
true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return *static_cast<type__*>(field__##_.get()); \
} \
void Set##Field(type__* x) { \
field__##_ = \
unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
valid_fields_.insert(#field__); \
} \
void Set##Field##NotOwned(type__* x) { \
valid_fields_.insert(#field__); \
field__##_ = unique_ptr_t(x, [](void* x) {}); \
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
type__* field__##_ptr() { \
PADDLE_ENFORCE_EQ( \
Has(#field__), \
true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.get()); \
} \
type__* Release##Field() { \
PADDLE_ENFORCE_EQ( \
Has(#field__), \
true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
\
private: \
unique_ptr_t field__##_;
DECL_ARGUMENT_FIELD
(
predictor_id
,
PredictorID
,
int
);
...
...
@@ -153,34 +158,40 @@ struct Argument {
DECL_ARGUMENT_UNIQUE_FIELD
(
main_program
,
MainProgram
,
framework
::
ProgramDesc
);
// The ir passes to perform in analysis phase.
DECL_ARGUMENT_FIELD
(
ir_analysis_passes
,
IrAnalysisPasses
,
DECL_ARGUMENT_FIELD
(
ir_analysis_passes
,
IrAnalysisPasses
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
analysis_passes
,
AnalysisPasses
,
DECL_ARGUMENT_FIELD
(
analysis_passes
,
AnalysisPasses
,
std
::
vector
<
std
::
string
>
);
// whether to mute all logs in inference.
DECL_ARGUMENT_FIELD
(
disable_logs
,
DisableLogs
,
bool
);
// Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD
(
mkldnn_enabled_op_types
,
MKLDNNEnabledOpTypes
,
DECL_ARGUMENT_FIELD
(
mkldnn_enabled_op_types
,
MKLDNNEnabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
// The cache capacity of different input shapes for mkldnn.
DECL_ARGUMENT_FIELD
(
mkldnn_cache_capacity
,
MkldnnCacheCapacity
,
int
);
#ifdef PADDLE_WITH_MKLDNN
// A set of op types to enable their quantized kernels
DECL_ARGUMENT_FIELD
(
quantize_enabled_op_types
,
QuantizeEnabledOpTypes
,
DECL_ARGUMENT_FIELD
(
quantize_enabled_op_types
,
QuantizeEnabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
// A set of op IDs to exclude from enabling their quantized kernels
DECL_ARGUMENT_FIELD
(
quantize_excluded_op_ids
,
QuantizeExcludedOpIds
,
DECL_ARGUMENT_FIELD
(
quantize_excluded_op_ids
,
QuantizeExcludedOpIds
,
std
::
unordered_set
<
int
>
);
// Scales for variables to be quantized
DECL_ARGUMENT_FIELD
(
quant_var_scales
,
QuantVarScales
,
VarQuantScale
);
// A set of op types to enable their bfloat16 kernels
DECL_ARGUMENT_FIELD
(
bfloat16_enabled_op_types
,
Bfloat16EnabledOpTypes
,
DECL_ARGUMENT_FIELD
(
bfloat16_enabled_op_types
,
Bfloat16EnabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
use_mkldnn_int8
,
UseMkldnnInt8
,
bool
);
...
...
@@ -190,9 +201,6 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
use_fc_padding
,
UseFcPadding
,
bool
);
DECL_ARGUMENT_FIELD
(
gpu_device_id
,
GPUDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
use_gpu_fp16
,
UseGPUFp16
,
bool
);
DECL_ARGUMENT_FIELD
(
gpu_fp16_disabled_op_types
,
GpuFp16DisabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
// Usually use for trt dynamic shape.
// TRT will select the best kernel according to opt shape
...
...
@@ -209,25 +217,33 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_min_subgraph_size
,
TensorRtMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_disabled_ops
,
TensorRtDisabledOPs
,
DECL_ARGUMENT_FIELD
(
tensorrt_disabled_ops
,
TensorRtDisabledOPs
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
tensorrt_precision_mode
,
TensorRtPrecisionMode
,
DECL_ARGUMENT_FIELD
(
tensorrt_precision_mode
,
TensorRtPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_static_engine
,
TensorRtUseStaticEngine
,
DECL_ARGUMENT_FIELD
(
tensorrt_use_static_engine
,
TensorRtUseStaticEngine
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_calib_mode
,
TensorRtUseCalibMode
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_varseqlen
,
TensorRtUseOSS
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_with_interleaved
,
TensorRtWithInterleaved
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_transformer_posid
,
TensorRtTransformerPosid
,
DECL_ARGUMENT_FIELD
(
tensorrt_transformer_posid
,
TensorRtTransformerPosid
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_transformer_maskid
,
TensorRtTransformerMaskid
,
DECL_ARGUMENT_FIELD
(
tensorrt_transformer_maskid
,
TensorRtTransformerMaskid
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_shape_range_info_path
,
TensorRtShapeRangeInfoPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_tuned_dynamic_shape
,
TensorRtTunedDynamicShape
,
TensorRtShapeRangeInfoPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_tuned_dynamic_shape
,
TensorRtTunedDynamicShape
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_allow_build_at_runtime
,
TensorRtAllowBuildAtRuntime
,
bool
);
TensorRtAllowBuildAtRuntime
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_inspector
,
TensorRtUseInspector
,
bool
);
DECL_ARGUMENT_FIELD
(
use_dlnne
,
UseDlnne
,
bool
);
...
...
@@ -235,10 +251,12 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
dlnne_max_batch_size
,
DlnneMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
dlnne_workspace_size
,
DlnneWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
lite_passes_filter
,
LitePassesFilter
,
DECL_ARGUMENT_FIELD
(
lite_passes_filter
,
LitePassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_ops_filter
,
LiteOpsFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_precision_mode
,
LitePrecisionMode
,
DECL_ARGUMENT_FIELD
(
lite_precision_mode
,
LitePrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
lite_zero_copy
,
LiteZeroCopy
,
bool
);
...
...
@@ -252,19 +270,26 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
xpu_device_id
,
XpuDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
use_nnadapter
,
UseNNAdapter
,
bool
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_dir
,
NNAdapterModelCacheDir
,
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_dir
,
NNAdapterModelCacheDir
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_device_names
,
NNAdapterDeviceNames
,
DECL_ARGUMENT_FIELD
(
nnadapter_device_names
,
NNAdapterDeviceNames
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
nnadapter_context_properties
,
NNAdapterContextProperties
,
DECL_ARGUMENT_FIELD
(
nnadapter_context_properties
,
NNAdapterContextProperties
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_subgraph_partition_config_buffer
,
NNAdapterSubgraphPartitionConfigBuffer
,
std
::
string
);
NNAdapterSubgraphPartitionConfigBuffer
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_subgraph_partition_config_path
,
NNAdapterSubgraphPartitionConfigPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_token
,
NNAdapterModelCacheToken
,
NNAdapterSubgraphPartitionConfigPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_token
,
NNAdapterModelCacheToken
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_buffer
,
NNAdapterModelCacheBuffer
,
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_buffer
,
NNAdapterModelCacheBuffer
,
std
::
vector
<
std
::
vector
<
char
>>
);
// Memory optimized related.
...
...
@@ -275,13 +300,15 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
memory_optim_sort_kind
,
MemoryOptimSortKind
,
int
);
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
framework
::
proto
::
ProgramDesc
);
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD
(
cpu_math_library_num_threads
,
CpuMathLibraryNumThreads
,
DECL_ARGUMENT_FIELD
(
cpu_math_library_num_threads
,
CpuMathLibraryNumThreads
,
int
);
// ipu related
...
...
@@ -293,7 +320,8 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
ipu_enable_fp16
,
IpuEnableFp16
,
bool
);
DECL_ARGUMENT_FIELD
(
ipu_replica_num
,
IpuReplicaNum
,
int
);
DECL_ARGUMENT_FIELD
(
ipu_available_memory_proportion
,
IpuAvailableMemoryProportion
,
float
);
IpuAvailableMemoryProportion
,
float
);
DECL_ARGUMENT_FIELD
(
ipu_enable_half_partial
,
IpuEnableHalfPartial
,
bool
);
// npu related
...
...
@@ -306,7 +334,8 @@ struct Argument {
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE_EQ( \
argument__->Has(#fieldname__), true, \
argument__->Has(#fieldname__), \
true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7985407b
...
...
@@ -68,12 +68,15 @@ void IRPassManager::CreatePasses(Argument *argument,
auto
precision_mode
=
argument
->
tensorrt_precision_mode
();
bool
enable_int8
=
precision_mode
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
max_input_shape
()));
pass
->
Set
(
"min_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
min_input_shape
()));
pass
->
Set
(
"optim_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
optim_input_shape
()));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
max_input_shape
()));
pass
->
Set
(
"min_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
min_input_shape
()));
pass
->
Set
(
"optim_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
optim_input_shape
()));
// tuned trt dynamic_shape
pass
->
Set
(
"trt_tuned_dynamic_shape"
,
new
bool
(
argument
->
tensorrt_tuned_dynamic_shape
()));
...
...
@@ -143,14 +146,16 @@ void IRPassManager::CreatePasses(Argument *argument,
bool
int8_valid
=
!
(
model_from_memory
&&
optim_cache_dir
.
empty
()
&&
enable_int8
&&
use_calib_mode
);
PADDLE_ENFORCE_EQ
(
int8_valid
,
true
,
int8_valid
,
true
,
platform
::
errors
::
PreconditionNotMet
(
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()"
));
if
(
model_from_memory
&&
use_static_engine
)
{
PADDLE_ENFORCE_EQ
(
optim_cache_dir
.
empty
(),
false
,
optim_cache_dir
.
empty
(),
false
,
platform
::
errors
::
PreconditionNotMet
(
"When you are using Paddle-TRT, and using load model "
"from memory, and also set the use_static to true. "
...
...
@@ -161,7 +166,8 @@ void IRPassManager::CreatePasses(Argument *argument,
if
(
!
optim_cache_dir
.
empty
())
{
if
(
!
PathExists
(
optim_cache_dir
))
{
PADDLE_ENFORCE_NE
(
MKDIR
(
optim_cache_dir
.
c_str
()),
-
1
,
MKDIR
(
optim_cache_dir
.
c_str
()),
-
1
,
platform
::
errors
::
PreconditionNotMet
(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write"
,
...
...
@@ -187,8 +193,9 @@ void IRPassManager::CreatePasses(Argument *argument,
new
std
::
string
(
argument
->
tensorrt_shape_range_info_path
()));
pass
->
Set
(
"trt_allow_build_at_runtime"
,
new
bool
(
argument
->
tensorrt_allow_build_at_runtime
()));
pass
->
Set
(
"trt_disabled_ops"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
tensorrt_disabled_ops
()));
pass
->
Set
(
"trt_disabled_ops"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
tensorrt_disabled_ops
()));
pass
->
Set
(
"trt_use_dla"
,
new
bool
(
argument
->
tensorrt_use_dla
()));
pass
->
Set
(
"trt_dla_core"
,
new
int
(
argument
->
tensorrt_dla_core
()));
// Setting the disable_trt_plugin_fp16 to true means that TRT plugin will
...
...
@@ -200,10 +207,6 @@ void IRPassManager::CreatePasses(Argument *argument,
new
int
(
argument
->
dlnne_min_subgraph_size
()));
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
}
else
if
(
pass_name
==
"mixed_precision_configure_pass"
)
{
pass
->
Set
(
"gpu_fp16_disabled_op_types"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
gpu_fp16_disabled_op_types
()));
}
if
(
pass_name
==
"lite_subgraph_pass"
)
{
bool
lite_enable_int8
=
...
...
@@ -272,8 +275,9 @@ std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
if
(
passes_
.
empty
())
{
return
graph
;
}
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
7985407b
...
...
@@ -15,7 +15,6 @@
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -37,7 +36,8 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
LOG
(
INFO
)
<<
"Sync params from CPU to NPU"
;
PADDLE_ENFORCE_EQ
(
argument
->
npu_device_id_valid
(),
true
,
PADDLE_ENFORCE_EQ
(
argument
->
npu_device_id_valid
(),
true
,
platform
::
errors
::
PreconditionNotMet
(
"The npu_device_id field should be valid"
));
platform
::
Place
place
=
platform
::
NPUPlace
(
argument
->
npu_device_id
());
...
...
@@ -46,8 +46,9 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
for
(
auto
&
var_name
:
all_vars
)
{
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
...
...
@@ -67,26 +68,6 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
#else
void
IrParamsSyncAmongDevicesPass
::
GetVarNameToOpTypeMap
(
const
framework
::
ir
::
Graph
&
graph
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
var_name_op_type_map
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
node_list
=
framework
::
ir
::
TopologyVarientSort
(
graph
,
static_cast
<
framework
::
ir
::
SortKind
>
(
0
));
for
(
auto
*
op_node
:
node_list
)
{
if
(
!
op_node
->
IsOp
()
||
op_node
->
Op
()
->
Type
()
==
"feed"
||
op_node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
for
(
auto
*
pre_node
:
op_node
->
inputs
)
{
if
(
pre_node
->
IsVar
()
&&
pre_node
->
Var
()
->
Persistable
())
{
var_name_op_type_map
->
insert
(
std
::
pair
<
std
::
string
,
std
::
string
>
(
pre_node
->
Var
()
->
Name
(),
op_node
->
Op
()
->
Type
()));
}
}
}
}
void
IrParamsSyncAmongDevicesPass
::
CopyParamsToGpu
(
Argument
*
argument
)
{
// The parameters are on the cpu, therefore, synchronization is not necessary.
if
(
!
argument
->
use_gpu
())
return
;
...
...
@@ -100,7 +81,8 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
LOG
(
INFO
)
<<
"Sync params from CPU to GPU"
;
PADDLE_ENFORCE_EQ
(
argument
->
gpu_device_id_valid
(),
true
,
PADDLE_ENFORCE_EQ
(
argument
->
gpu_device_id_valid
(),
true
,
platform
::
errors
::
PreconditionNotMet
(
"The gpu_device_id field should be valid"
));
platform
::
Place
place
=
platform
::
CUDAPlace
(
argument
->
gpu_device_id
());
...
...
@@ -124,54 +106,34 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
if
(
with_dynamic_shape
)
{
reserve_cpu_weights
=
true
;
}
bool
mixed_precision_mode
=
argument
->
Has
(
"use_gpu_fp16"
)
&&
argument
->
use_gpu_fp16
();
std
::
unordered_map
<
std
::
string
,
std
::
string
>
var_name_op_type_map
{};
std
::
unordered_set
<
std
::
string
>
blacklist
{};
if
(
mixed_precision_mode
)
{
GetVarNameToOpTypeMap
(
graph
,
&
var_name_op_type_map
);
blacklist
=
argument
->
gpu_fp16_disabled_op_types
();
}
for
(
auto
&
var_name
:
all_vars
)
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
}
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
));
if
(
var
->
IsType
<
framework
::
LoDTensor
>
()
||
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
bool
is_float
=
t
->
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
t
->
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT64
;
if
(
mixed_precision_mode
&&
!
blacklist
.
count
(
var_name_op_type_map
[
var_name
])
&&
is_float
)
{
framework
::
Tensor
half_tensor
;
half_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
half_tensor
.
Resize
(
t
->
dims
());
auto
*
half_data
=
half_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
auto
*
data
=
t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
half_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
half_tensor
,
place
,
t
);
}
else
{
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
// Copy the parameter data to a tmp tensor.
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
// Reallocation the space on GPU
t
->
clear
();
// Copy parameter data to newly allocated GPU space.
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
}
}
...
...
@@ -180,7 +142,8 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
void
IrParamsSyncAmongDevicesPass
::
RunImpl
(
Argument
*
argument
)
{
PADDLE_ENFORCE_EQ
(
argument
->
scope_valid
(),
true
,
argument
->
scope_valid
(),
true
,
platform
::
errors
::
PreconditionNotMet
(
"The scope field should be valid"
));
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
浏览文件 @
7985407b
...
...
@@ -38,12 +38,7 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
#ifdef PADDLE_WITH_ASCEND_CL
void
CopyParamsToNpu
(
Argument
*
argument
);
#else
void
GetVarNameToOpTypeMap
(
const
framework
::
ir
::
Graph
&
graph
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
var_name_op_type_map
);
void
CopyParamsToGpu
(
Argument
*
argument
);
void
CopyParamsToGpu
(
Argument
*
argument
);
#endif
};
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7985407b
...
...
@@ -84,7 +84,6 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
Update
();
}
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
@@ -101,16 +100,18 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
}
void
AnalysisConfig
::
SetExecStream
(
void
*
stream
)
{
PADDLE_ENFORCE_NOT_NULL
(
stream
,
platform
::
errors
::
InvalidArgument
(
"`stream` should not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
stream
,
platform
::
errors
::
InvalidArgument
(
"`stream` should not be nullptr"
));
exec_stream_
=
stream
;
use_external_stream_
=
true
;
Update
();
}
void
*
AnalysisConfig
::
GetExecStream
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
exec_stream_
,
platform
::
errors
::
InvalidArgument
(
"`stream` should not be nullptr"
));
PADDLE_ENFORCE_NOT_NULL
(
exec_stream_
,
platform
::
errors
::
InvalidArgument
(
"`stream` should not be nullptr"
));
return
exec_stream_
;
}
...
...
@@ -124,27 +125,16 @@ void AnalysisConfig::DisableGpu() {
Update
();
}
void
AnalysisConfig
::
Exp_EnableUseGpuFp16
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_fp16_
=
true
;
gpu_fp16_disabled_op_types_
.
insert
(
op_list
.
begin
(),
op_list
.
end
());
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to Exp_EnableUseGpuFp16()"
;
use_gpu_fp16_
=
false
;
#endif
Update
();
}
void
AnalysisConfig
::
DisableFCPadding
()
{
use_fc_padding_
=
false
;
Update
();
}
void
AnalysisConfig
::
EnableXpu
(
int
l3_workspace_size
,
bool
locked
,
bool
autotune
,
const
std
::
string
&
autotune_file
,
void
AnalysisConfig
::
EnableXpu
(
int
l3_workspace_size
,
bool
locked
,
bool
autotune
,
const
std
::
string
&
autotune_file
,
const
std
::
string
&
precision
,
bool
adaptive_seqlen
)
{
use_xpu_
=
true
;
...
...
@@ -158,7 +148,8 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
}
void
AnalysisConfig
::
SetXpuDeviceId
(
int
device_id
)
{
PADDLE_ENFORCE_EQ
(
use_xpu_
,
true
,
PADDLE_ENFORCE_EQ
(
use_xpu_
,
true
,
platform
::
errors
::
PreconditionNotMet
(
"Should call EnableXpu before SetXpuDeviceId."
));
xpu_device_id_
=
device_id
;
...
...
@@ -190,7 +181,8 @@ void AnalysisConfig::EnableCustomDevice(const std::string &device_type,
Update
();
}
void
AnalysisConfig
::
EnableIpu
(
int
ipu_device_num
,
int
ipu_micro_batch_size
,
void
AnalysisConfig
::
EnableIpu
(
int
ipu_device_num
,
int
ipu_micro_batch_size
,
bool
ipu_enable_pipelining
,
int
ipu_batches_per_step
)
{
enable_ir_optim_
=
true
;
...
...
@@ -204,7 +196,8 @@ void AnalysisConfig::EnableIpu(int ipu_device_num, int ipu_micro_batch_size,
Update
();
}
void
AnalysisConfig
::
SetIpuConfig
(
bool
ipu_enable_fp16
,
int
ipu_replica_num
,
void
AnalysisConfig
::
SetIpuConfig
(
bool
ipu_enable_fp16
,
int
ipu_replica_num
,
float
ipu_available_memory_proportion
,
bool
ipu_enable_half_partial
)
{
ipu_enable_fp16_
=
ipu_enable_fp16
;
...
...
@@ -262,8 +255,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
use_gpu_fp16_
);
CP_MEMBER
(
gpu_fp16_disabled_op_types_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
...
...
@@ -366,7 +357,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
custom_device_id_
);
if
(
use_gpu_
)
{
PADDLE_ENFORCE_EQ
(
use_xpu_
,
false
,
PADDLE_ENFORCE_EQ
(
use_xpu_
,
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
GpuPassStrategy
(
...
...
@@ -406,8 +398,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
std
::
sort
(
all_passes
.
begin
(),
all_passes
.
end
());
std
::
sort
(
other_passes
.
begin
(),
other_passes
.
end
());
std
::
vector
<
std
::
string
>
deleted_passes
;
std
::
set_difference
(
all_passes
.
begin
(),
all_passes
.
end
(),
other_passes
.
begin
(),
other_passes
.
end
(),
std
::
set_difference
(
all_passes
.
begin
(),
all_passes
.
end
(),
other_passes
.
begin
(),
other_passes
.
end
(),
std
::
inserter
(
deleted_passes
,
deleted_passes
.
begin
()));
for
(
auto
ps
:
deleted_passes
)
{
pass_builder_
->
DeletePass
(
ps
);
...
...
@@ -516,8 +510,11 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
}
void
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
,
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
bool
use_static
,
int
workspace_size
,
int
max_batch_size
,
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
bool
use_static
,
bool
use_calib_mode
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
use_gpu
())
{
...
...
@@ -594,19 +591,22 @@ void AnalysisConfig::Update() {
pass_builder_
.
reset
(
new
IpuPassStrategy
);
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and CustomDevice."
));
pass_builder_
.
reset
(
new
CustomDevicePassStrategy
);
...
...
@@ -624,21 +624,24 @@ void AnalysisConfig::Update() {
*
static_cast
<
IpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and CustomDevice."
));
pass_builder_
.
reset
(
new
CustomDevicePassStrategy
(
...
...
@@ -677,20 +680,6 @@ void AnalysisConfig::Update() {
#endif
}
if
(
use_gpu_fp16_
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"Exp_EnableUseGpuFp16() only works when IR optimization is "
"enabled."
;
}
else
if
(
!
use_gpu
())
{
LOG
(
ERROR
)
<<
"Exp_EnableUseGpuFp16() only works when use_gpu is enabled."
;
}
else
{
pass_builder
()
->
Exp_EnableUseGpuFp16
();
}
#endif
}
if
(
use_mkldnn_
)
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
enable_ir_optim_
)
{
...
...
@@ -749,7 +738,8 @@ void AnalysisConfig::Update() {
#endif
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kLiteSubgraphPasses
)
{
if
(
std
::
find
(
lite_passes_filter_
.
begin
(),
lite_passes_filter_
.
end
(),
if
(
std
::
find
(
lite_passes_filter_
.
begin
(),
lite_passes_filter_
.
end
(),
pass
)
==
lite_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
...
...
@@ -758,7 +748,8 @@ void AnalysisConfig::Update() {
if
(
use_xpu_
)
{
#if (defined LITE_SUBGRAPH_WITH_XPU) || (defined PADDLE_WITH_XPU)
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
"Currently, XPU and GPU cannot be enabled in the "
"same analysis configuration."
));
...
...
@@ -771,7 +762,8 @@ void AnalysisConfig::Update() {
if
(
use_npu_
)
{
#if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
"Currently, NPU and GPU cannot be enabled in the "
"same analysis configuration."
));
...
...
@@ -809,8 +801,6 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
use_gpu_
;
ss
<<
use_external_stream_
;
ss
<<
exec_stream_
;
ss
<<
use_gpu_fp16_
;
for
(
auto
&
item
:
gpu_fp16_disabled_op_types_
)
ss
<<
item
;
ss
<<
use_fc_padding_
;
ss
<<
gpu_device_id_
;
ss
<<
xpu_device_id_
;
...
...
@@ -957,7 +947,8 @@ void AnalysisConfig::DisableGlogInfo() {
}
void
AnalysisConfig
::
EnableLiteEngine
(
AnalysisConfig
::
Precision
precision_mode
,
bool
zero_copy
,
AnalysisConfig
::
Precision
precision_mode
,
bool
zero_copy
,
const
std
::
vector
<
std
::
string
>
&
passes_filter
,
const
std
::
vector
<
std
::
string
>
&
ops_filter
)
{
use_lite_
=
true
;
...
...
@@ -1057,9 +1048,9 @@ std::string AnalysisConfig::Summary() {
// dynamic_shape
os
.
InsertRow
({
"tensorrt_enable_dynamic_shape"
,
min_input_shape_
.
empty
()
?
"false"
:
"true"
});
os
.
InsertRow
(
{
"tensorrt_tuned_dynamic_shape"
,
trt_tuned_dynamic_shape_
?
shape_range_info_path_
:
"false"
});
os
.
InsertRow
(
{
"tensorrt_tuned_dynamic_shape"
,
trt_tuned_dynamic_shape_
?
shape_range_info_path_
:
"false"
});
os
.
InsertRow
(
{
"tensorrt_use_varseqlen"
,
trt_use_varseqlen_
?
"true"
:
"false"
});
...
...
@@ -1123,10 +1114,12 @@ LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir(
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetModelCacheBuffers
(
const
std
::
string
&
model_cache_token
,
const
std
::
vector
<
char
>
&
model_cache_buffer
)
{
PADDLE_ENFORCE_EQ
(
model_cache_token
.
empty
(),
false
,
PADDLE_ENFORCE_EQ
(
model_cache_token
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"model_cache_token should not be empty."
));
PADDLE_ENFORCE_EQ
(
model_cache_buffer
.
empty
(),
false
,
PADDLE_ENFORCE_EQ
(
model_cache_buffer
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"model_cache_buffer should not be empty."
));
PADDLE_ENFORCE_EQ
(
nnadapter_model_cache_buffers
.
count
(
model_cache_token
),
...
...
@@ -1165,7 +1158,8 @@ void AnalysisConfig::CollectShapeRangeInfo(
<<
"all intermediate tensors in the compute graph and calculate "
"the min_shape, max_shape and opt_shape."
;
collect_shape_range_info_
=
true
;
PADDLE_ENFORCE_EQ
(
shape_range_info_path
.
empty
(),
false
,
PADDLE_ENFORCE_EQ
(
shape_range_info_path
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"The shape_range_info_path should not be empty, please "
"re-check the argument."
));
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7985407b
...
...
@@ -1048,11 +1048,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetDlnneMinSubgraphSize
(
config_
.
dlnne_min_subgraph_size_
);
}
if
(
config_
.
gpu_fp16_enabled
())
{
argument_
.
SetUseGPUFp16
(
true
);
argument_
.
SetGpuFp16DisabledOpTypes
(
config_
.
gpu_fp16_disabled_op_types_
);
}
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetCpuMathLibraryNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
7985407b
...
...
@@ -371,19 +371,6 @@ TEST(AnalysisPredictor, enable_onnxruntime) {
ASSERT_TRUE
(
!
config
.
use_onnxruntime
());
}
TEST
(
AnalysisPredictor
,
exp_enable_use_gpu_fp16
)
{
AnalysisConfig
config
;
config
.
SwitchIrOptim
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
config
.
EnableUseGpu
(
100
,
0
);
config
.
Exp_EnableUseGpuFp16
();
ASSERT_TRUE
(
config
.
gpu_fp16_enabled
());
#else
config
.
DisableGpu
();
#endif
LOG
(
INFO
)
<<
config
.
Summary
();
}
}
// namespace paddle
namespace
paddle_infer
{
...
...
@@ -443,19 +430,6 @@ TEST(Predictor, EnableONNXRuntime) {
auto
predictor
=
CreatePredictor
(
config
);
}
TEST
(
Predictor
,
Exp_EnableUseGpuFp16
)
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
config
.
EnableUseGpu
(
100
,
0
);
config
.
Exp_EnableUseGpuFp16
();
#else
config
.
DisableGpu
();
#endif
auto
predictor
=
CreatePredictor
(
config
);
}
TEST
(
Tensor
,
CpuShareExternalData
)
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
...
...
@@ -476,8 +450,8 @@ TEST(Tensor, CpuShareExternalData) {
auto
out
=
predictor
->
GetOutputHandle
(
"fc_1.tmp_2"
);
auto
out_shape
=
out
->
shape
();
std
::
vector
<
float
>
out_data
;
out_data
.
resize
(
std
::
accumulate
(
out_shape
.
begin
(),
out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
out_data
.
resize
(
std
::
accumulate
(
out_shape
.
begin
(),
out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
out
->
ShareExternalData
<
float
>
(
out_data
.
data
(),
out_shape
,
PlaceType
::
kCPU
);
predictor
->
Run
();
...
...
@@ -507,7 +481,9 @@ TEST(Tensor, GpuShareExternalData) {
for
(
size_t
i
=
0
;
i
<
4
;
++
i
)
{
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
input_gpu
[
i
]),
4
*
sizeof
(
int64_t
));
cudaMemcpy
(
input_gpu
[
i
],
input_data
[
i
].
data
(),
4
*
sizeof
(
int64_t
),
cudaMemcpy
(
input_gpu
[
i
],
input_data
[
i
].
data
(),
4
*
sizeof
(
int64_t
),
cudaMemcpyHostToDevice
);
}
...
...
@@ -519,9 +495,10 @@ TEST(Tensor, GpuShareExternalData) {
auto
out
=
predictor
->
GetOutputHandle
(
"fc_1.tmp_2"
);
auto
out_shape
=
out
->
shape
();
float
*
out_data
=
nullptr
;
auto
out_size
=
std
::
accumulate
(
out_shape
.
begin
(),
out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
())
*
sizeof
(
float
);
auto
out_size
=
std
::
accumulate
(
out_shape
.
begin
(),
out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
())
*
sizeof
(
float
);
cudaMalloc
(
reinterpret_cast
<
void
**>
(
out_data
),
out_size
*
sizeof
(
float
));
out
->
ShareExternalData
<
float
>
(
out_data
,
out_shape
,
PlaceType
::
kGPU
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
7985407b
...
...
@@ -253,19 +253,6 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
void
DisableGpu
();
///
/// \brief Enable GPU fp16 precision computation, in experimental state.
///
/// \param op_list The operator type list.
///
void
Exp_EnableUseGpuFp16
(
std
::
unordered_set
<
std
::
string
>
op_list
=
{});
///
/// \brief A boolean state telling whether the GPU fp16 precision is turned
/// on.
///
/// \return bool Whether the GPU fp16 precision is turned on.
///
bool
gpu_fp16_enabled
()
const
{
return
use_gpu_fp16_
;
}
///
/// \brief Turn on XPU.
...
...
@@ -287,8 +274,10 @@ struct PD_INFER_DECL AnalysisConfig {
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
///
void
EnableXpu
(
int
l3_workspace_size
=
0xfffc00
,
bool
locked
=
false
,
bool
autotune
=
true
,
const
std
::
string
&
autotune_file
=
""
,
void
EnableXpu
(
int
l3_workspace_size
=
0xfffc00
,
bool
locked
=
false
,
bool
autotune
=
true
,
const
std
::
string
&
autotune_file
=
""
,
const
std
::
string
&
precision
=
"int16"
,
bool
adaptive_seqlen
=
false
);
...
...
@@ -301,7 +290,8 @@ struct PD_INFER_DECL AnalysisConfig {
/// \param ipu_enable_pipelining enable pipelining.
/// \param ipu_batches_per_step the number of batches per run in pipelining.
///
void
EnableIpu
(
int
ipu_device_num
=
1
,
int
ipu_micro_batch_size
=
1
,
void
EnableIpu
(
int
ipu_device_num
=
1
,
int
ipu_micro_batch_size
=
1
,
bool
ipu_enable_pipelining
=
false
,
int
ipu_batches_per_step
=
1
);
...
...
@@ -315,7 +305,8 @@ struct PD_INFER_DECL AnalysisConfig {
/// \param ipu_enable_half_partial enable fp16 partial for matmul, only work
/// with fp16.
///
void
SetIpuConfig
(
bool
ipu_enable_fp16
=
false
,
int
ipu_replica_num
=
1
,
void
SetIpuConfig
(
bool
ipu_enable_fp16
=
false
,
int
ipu_replica_num
=
1
,
float
ipu_available_memory_proportion
=
1.0
,
bool
ipu_enable_half_partial
=
false
);
...
...
@@ -525,7 +516,8 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
,
Precision
precision
=
Precision
::
kFloat32
,
bool
use_static
=
false
,
bool
use_calib_mode
=
true
);
...
...
@@ -821,8 +813,10 @@ struct PD_INFER_DECL AnalysisConfig {
/// \param params_buffer The memory buffer of the combined parameters file.
/// \param params_buffer_size The size of the combined parameters data.
///
void
SetModelBuffer
(
const
char
*
prog_buffer
,
size_t
prog_buffer_size
,
const
char
*
params_buffer
,
size_t
params_buffer_size
);
void
SetModelBuffer
(
const
char
*
prog_buffer
,
size_t
prog_buffer_size
,
const
char
*
params_buffer
,
size_t
params_buffer_size
);
///
/// \brief A boolean state telling whether the model is set from the CPU
/// memory.
...
...
@@ -929,20 +923,6 @@ struct PD_INFER_DECL AnalysisConfig {
int
gpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
thread_local_stream_
{
false
};
bool
use_gpu_fp16_
{
false
};
std
::
unordered_set
<
std
::
string
>
gpu_fp16_disabled_op_types_
{
"conv2d_fusion"
,
"conv2d"
,
"roll"
,
"strided_slice"
,
"depthwise_conv2d"
,
"unfold"
,
"generate_proposals_v2"
,
"nearest_interp_v2"
,
"bilinear_interp_v2"
"yolo_box"
,
"multiclass_nms3"
,
"matrix_nms"
};
bool
use_cudnn_
{
false
};
bool
use_external_stream_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
7985407b
...
...
@@ -194,40 +194,6 @@ void GpuPassStrategy::EnableCUDNN() {
use_cudnn_
=
true
;
}
void
GpuPassStrategy
::
Exp_EnableUseGpuFp16
()
{
passes_
.
assign
({
"is_test_pass"
,
//
"simplify_with_basic_ops_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"embedding_eltwise_layernorm_fuse_pass"
,
//
"multihead_matmul_fuse_pass_v2"
,
//
"gpu_cpu_squeeze2_matmul_fuse_pass"
,
//
"gpu_cpu_reshape2_matmul_fuse_pass"
,
//
"gpu_cpu_flatten2_matmul_fuse_pass"
,
//
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
//
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
//
"gpu_cpu_map_matmul_to_mul_pass"
,
//
// "fc_fuse_pass", //
"fc_elementwise_layernorm_fuse_pass"
,
//
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
// disable the pass.
#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100)
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
#endif
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"mixed_precision_configure_pass"
,
//
"runtime_context_cache_pass"
//
});
use_gpu_fp16_
=
true
;
}
void
GpuPassStrategy
::
EnableMKLDNN
()
{
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
}
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
7985407b
...
...
@@ -109,8 +109,11 @@ class PD_INFER_DECL PaddlePassBuilder {
protected:
/// \cond Protected
std
::
vector
<
std
::
string
>
analysis_passes_
{
{
"ir_graph_build_pass"
,
"ir_graph_clean_pass"
,
"ir_analysis_pass"
,
"ir_params_sync_among_devices_pass"
,
"adjust_cudnn_workspace_size_pass"
,
{
"ir_graph_build_pass"
,
"ir_graph_clean_pass"
,
"ir_analysis_pass"
,
"ir_params_sync_among_devices_pass"
,
"adjust_cudnn_workspace_size_pass"
,
"inference_op_replace_pass"
}};
std
::
vector
<
std
::
string
>
passes_
;
/// \endcond
...
...
@@ -129,9 +132,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \brief Enable the use of cuDNN kernel.
virtual
void
EnableCUDNN
()
{}
/// \brief Enable use gpu fp16 kernel.
virtual
void
Exp_EnableUseGpuFp16
()
{}
/// \brief Enable the use of MKLDNN.
/// The MKLDNN control exists in both CPU and GPU mode, because there can
/// still be some CPU kernels running in GPU mode.
...
...
@@ -150,10 +150,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in gpu mode.
bool
use_gpu
()
const
{
return
use_gpu_
;
}
/// \brief Check if we are using gpu fp16 kernel.
/// \return A bool variable implying whether we are in gpu fp16 mode.
bool
use_gpu_fp16
()
const
{
return
use_gpu_fp16_
;
}
/// \brief Check if we are using xpu.
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
...
...
@@ -180,7 +176,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool
use_npu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_gpu_fp16_
{
false
};
bool
use_custom_device_
{
false
};
/// \endcond
};
...
...
@@ -248,9 +243,6 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \brief Enable the use of cuDNN kernel.
void
EnableCUDNN
()
override
;
/// \brief Enable the use of gpu fp16 kernel.
void
Exp_EnableUseGpuFp16
()
override
;
/// \brief Not supported in GPU mode yet.
void
EnableMKLDNN
()
override
;
...
...
@@ -269,7 +261,6 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
protected:
/// \cond Protected
bool
use_cudnn_
{
false
};
bool
use_gpu_fp16_
{
false
};
/// \endcond
};
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
7985407b
...
...
@@ -113,7 +113,8 @@ template <typename T>
PaddleBuf
PaddleBufCreate
(
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
data
)
{
PaddleBuf
buf
(
data
.
size
()
*
sizeof
(
T
));
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
static_cast
<
T
*>
(
buf
.
data
()));
return
buf
;
}
...
...
@@ -123,7 +124,8 @@ void PaddleBufReset(
PaddleBuf
&
buf
,
// NOLINT
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
data
)
{
// NOLINT
buf
.
Resize
(
data
.
size
()
*
sizeof
(
T
));
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
static_cast
<
T
*>
(
buf
.
data
()));
}
...
...
@@ -131,12 +133,14 @@ template <typename T>
PaddleTensor
PaddleTensorCreate
(
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
data
,
const
std
::
string
name
=
""
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
=
{},
bool
copy
=
true
)
{
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
=
{},
bool
copy
=
true
)
{
PaddleTensor
tensor
;
if
(
copy
)
{
PaddleBuf
buf
(
data
.
size
()
*
sizeof
(
T
));
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
std
::
copy_n
(
static_cast
<
const
T
*>
(
data
.
data
()),
data
.
size
(),
static_cast
<
T
*>
(
buf
.
data
()));
tensor
.
data
=
std
::
move
(
buf
);
}
else
{
...
...
@@ -235,11 +239,13 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
}
if
(
input_tensor
.
dtype
()
==
phi
::
DataType
::
FLOAT32
)
{
tensor
.
ShareExternalData
(
static_cast
<
float
*>
(
input_tensor
.
data
()),
shape
,
static_cast
<
float
*>
(
input_tensor
.
data
()),
shape
,
ToPaddleInferPlace
(
input_tensor
.
place
().
GetType
()));
}
else
if
(
input_tensor
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
tensor
.
ShareExternalData
(
static_cast
<
paddle
::
platform
::
float16
*>
(
input_tensor
.
data
()),
shape
,
static_cast
<
paddle
::
platform
::
float16
*>
(
input_tensor
.
data
()),
shape
,
ToPaddleInferPlace
(
input_tensor
.
place
().
GetType
()));
}
}
...
...
@@ -379,9 +385,11 @@ void BindInferenceApi(py::module *m) {
BindMkldnnQuantizerConfig
(
m
);
#endif
m
->
def
(
"create_paddle_predictor"
,
&
paddle
::
CreatePaddlePredictor
<
AnalysisConfig
>
,
py
::
arg
(
"config"
));
&
paddle
::
CreatePaddlePredictor
<
AnalysisConfig
>
,
py
::
arg
(
"config"
));
m
->
def
(
"create_paddle_predictor"
,
&
paddle
::
CreatePaddlePredictor
<
NativeConfig
>
,
py
::
arg
(
"config"
));
&
paddle
::
CreatePaddlePredictor
<
NativeConfig
>
,
py
::
arg
(
"config"
));
m
->
def
(
"create_predictor"
,
[](
const
paddle_infer
::
Config
&
config
)
->
std
::
unique_ptr
<
paddle_infer
::
Predictor
>
{
...
...
@@ -478,15 +486,18 @@ void BindPaddleBuf(py::module *m) {
void
BindPaddleTensor
(
py
::
module
*
m
)
{
py
::
class_
<
PaddleTensor
>
(
*
m
,
"PaddleTensor"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
int32_t
>
),
py
::
arg
(
"data"
),
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
int32_t
>
),
py
::
arg
(
"data"
),
py
::
arg
(
"name"
)
=
""
,
py
::
arg
(
"lod"
)
=
std
::
vector
<
std
::
vector
<
size_t
>>
(),
py
::
arg
(
"copy"
)
=
true
)
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
int64_t
>
),
py
::
arg
(
"data"
),
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
int64_t
>
),
py
::
arg
(
"data"
),
py
::
arg
(
"name"
)
=
""
,
py
::
arg
(
"lod"
)
=
std
::
vector
<
std
::
vector
<
size_t
>>
(),
py
::
arg
(
"copy"
)
=
true
)
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
float
>
),
py
::
arg
(
"data"
),
.
def
(
py
::
init
(
&
PaddleTensorCreate
<
float
>
),
py
::
arg
(
"data"
),
py
::
arg
(
"name"
)
=
""
,
py
::
arg
(
"lod"
)
=
std
::
vector
<
std
::
vector
<
size_t
>>
(),
py
::
arg
(
"copy"
)
=
true
)
...
...
@@ -563,7 +574,8 @@ void BindNativePredictor(py::module *m) {
.
def
(
"get_output_tensor"
,
&
NativePaddlePredictor
::
GetOutputTensor
)
.
def
(
"zero_copy_run"
,
&
NativePaddlePredictor
::
ZeroCopyRun
)
.
def
(
"clone"
,
&
NativePaddlePredictor
::
Clone
)
.
def
(
"scope"
,
&
NativePaddlePredictor
::
scope
,
.
def
(
"scope"
,
&
NativePaddlePredictor
::
scope
,
py
::
return_value_policy
::
reference
);
}
...
...
@@ -581,8 +593,9 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
py
::
init
<
const
std
::
string
&>
())
.
def
(
py
::
init
<
const
std
::
string
&
,
const
std
::
string
&>
())
.
def
(
"summary"
,
&
AnalysisConfig
::
Summary
)
.
def
(
"set_model"
,
(
void
(
AnalysisConfig
::*
)(
const
std
::
string
&
))
&
AnalysisConfig
::
SetModel
)
.
def
(
"set_model"
,
(
void
(
AnalysisConfig
::*
)(
const
std
::
string
&
))
&
AnalysisConfig
::
SetModel
)
.
def
(
"set_model"
,
(
void
(
AnalysisConfig
::*
)(
const
std
::
string
&
,
const
std
::
string
&
))
&
AnalysisConfig
::
SetModel
)
...
...
@@ -591,25 +604,32 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"model_dir"
,
&
AnalysisConfig
::
model_dir
)
.
def
(
"prog_file"
,
&
AnalysisConfig
::
prog_file
)
.
def
(
"params_file"
,
&
AnalysisConfig
::
params_file
)
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
py
::
arg
(
"memory_pool_init_size_mb"
),
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"exp_enable_use_gpu_fp16"
,
&
AnalysisConfig
::
Exp_EnableUseGpuFp16
,
py
::
arg
(
"
gpu_fp16_disabled_op_types"
)
=
std
::
unordered_set
<
std
::
string
>
({}))
.
def
(
"enable_xpu"
,
&
AnalysisConfig
::
EnableXpu
,
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
py
::
arg
(
"memory_pool_init_size_mb"
)
,
py
::
arg
(
"
device_id"
)
=
0
)
.
def
(
"enable_xpu"
,
&
AnalysisConfig
::
EnableXpu
,
py
::
arg
(
"l3_workspace_size"
)
=
16
*
1024
*
1024
,
py
::
arg
(
"locked"
)
=
false
,
py
::
arg
(
"autotune"
)
=
true
,
py
::
arg
(
"autotune_file"
)
=
""
,
py
::
arg
(
"precision"
)
=
"int16"
,
py
::
arg
(
"locked"
)
=
false
,
py
::
arg
(
"autotune"
)
=
true
,
py
::
arg
(
"autotune_file"
)
=
""
,
py
::
arg
(
"precision"
)
=
"int16"
,
py
::
arg
(
"adaptive_seqlen"
)
=
false
)
.
def
(
"set_xpu_device_id"
,
&
AnalysisConfig
::
SetXpuDeviceId
,
.
def
(
"set_xpu_device_id"
,
&
AnalysisConfig
::
SetXpuDeviceId
,
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"enable_npu"
,
&
AnalysisConfig
::
EnableNpu
,
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"enable_ipu"
,
&
AnalysisConfig
::
EnableIpu
,
py
::
arg
(
"ipu_device_num"
)
=
1
,
py
::
arg
(
"ipu_micro_batch_size"
)
=
1
,
.
def
(
"enable_ipu"
,
&
AnalysisConfig
::
EnableIpu
,
py
::
arg
(
"ipu_device_num"
)
=
1
,
py
::
arg
(
"ipu_micro_batch_size"
)
=
1
,
py
::
arg
(
"ipu_enable_pipelining"
)
=
false
,
py
::
arg
(
"ipu_batches_per_step"
)
=
1
)
.
def
(
"set_ipu_config"
,
&
AnalysisConfig
::
SetIpuConfig
,
py
::
arg
(
"ipu_enable_fp16"
)
=
false
,
py
::
arg
(
"ipu_replica_num"
)
=
1
,
.
def
(
"set_ipu_config"
,
&
AnalysisConfig
::
SetIpuConfig
,
py
::
arg
(
"ipu_enable_fp16"
)
=
false
,
py
::
arg
(
"ipu_replica_num"
)
=
1
,
py
::
arg
(
"ipu_available_memory_proportion"
)
=
1.0
,
py
::
arg
(
"ipu_enable_half_partial"
)
=
false
)
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
...
...
@@ -627,27 +647,34 @@ void BindAnalysisConfig(py::module *m) {
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
&
AnalysisConfig
::
fraction_of_gpu_memory_for_pool
)
.
def
(
"switch_ir_optim"
,
&
AnalysisConfig
::
SwitchIrOptim
,
.
def
(
"switch_ir_optim"
,
&
AnalysisConfig
::
SwitchIrOptim
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"ir_optim"
,
&
AnalysisConfig
::
ir_optim
)
.
def
(
"enable_memory_optim"
,
&
AnalysisConfig
::
EnableMemoryOptim
,
.
def
(
"enable_memory_optim"
,
&
AnalysisConfig
::
EnableMemoryOptim
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"enable_profile"
,
&
AnalysisConfig
::
EnableProfile
)
.
def
(
"disable_glog_info"
,
&
AnalysisConfig
::
DisableGlogInfo
)
.
def
(
"glog_info_disabled"
,
&
AnalysisConfig
::
glog_info_disabled
)
.
def
(
"set_optim_cache_dir"
,
&
AnalysisConfig
::
SetOptimCacheDir
)
.
def
(
"switch_use_feed_fetch_ops"
,
&
AnalysisConfig
::
SwitchUseFeedFetchOps
,
.
def
(
"switch_use_feed_fetch_ops"
,
&
AnalysisConfig
::
SwitchUseFeedFetchOps
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"use_feed_fetch_ops_enabled"
,
&
AnalysisConfig
::
use_feed_fetch_ops_enabled
)
.
def
(
"switch_specify_input_names"
,
&
AnalysisConfig
::
SwitchSpecifyInputNames
,
py
::
arg
(
"x"
)
=
true
)
&
AnalysisConfig
::
SwitchSpecifyInputNames
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"specify_input_name"
,
&
AnalysisConfig
::
specify_input_name
)
.
def
(
"enable_tensorrt_engine"
,
&
AnalysisConfig
::
EnableTensorRtEngine
,
py
::
arg
(
"workspace_size"
)
=
1
<<
20
,
py
::
arg
(
"max_batch_size"
)
=
1
,
.
def
(
"enable_tensorrt_engine"
,
&
AnalysisConfig
::
EnableTensorRtEngine
,
py
::
arg
(
"workspace_size"
)
=
1
<<
20
,
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"min_subgraph_size"
)
=
3
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"set_trt_dynamic_shape_info"
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
...
...
@@ -674,7 +701,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"trt_allow_build_at_runtime"
,
&
AnalysisConfig
::
trt_allow_build_at_runtime
)
.
def
(
"exp_disable_tensorrt_ops"
,
&
AnalysisConfig
::
Exp_DisableTensorRtOPs
)
.
def
(
"enable_tensorrt_dla"
,
&
AnalysisConfig
::
EnableTensorRtDLA
,
.
def
(
"enable_tensorrt_dla"
,
&
AnalysisConfig
::
EnableTensorRtDLA
,
py
::
arg
(
"dla_core"
)
=
0
)
.
def
(
"tensorrt_dla_enabled"
,
&
AnalysisConfig
::
tensorrt_dla_enabled
)
.
def
(
"enable_tensorrt_inspector"
,
...
...
@@ -682,15 +710,18 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"tensorrt_inspector_enabled"
,
&
AnalysisConfig
::
tensorrt_inspector_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"enable_dlnne"
,
&
AnalysisConfig
::
EnableDlnne
,
.
def
(
"enable_dlnne"
,
&
AnalysisConfig
::
EnableDlnne
,
py
::
arg
(
"min_subgraph_size"
)
=
3
)
.
def
(
"enable_lite_engine"
,
&
AnalysisConfig
::
EnableLiteEngine
,
.
def
(
"enable_lite_engine"
,
&
AnalysisConfig
::
EnableLiteEngine
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"zero_copy"
)
=
false
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"lite_engine_enabled"
,
&
AnalysisConfig
::
lite_engine_enabled
)
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"enable_mkldnn"
,
&
AnalysisConfig
::
EnableMKLDNN
)
.
def
(
"mkldnn_enabled"
,
&
AnalysisConfig
::
mkldnn_enabled
)
...
...
@@ -702,12 +733,15 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_quantizer"
,
&
AnalysisConfig
::
EnableMkldnnQuantizer
)
.
def
(
"enable_mkldnn_bfloat16"
,
&
AnalysisConfig
::
EnableMkldnnBfloat16
)
#ifdef PADDLE_WITH_MKLDNN
.
def
(
"quantizer_config"
,
&
AnalysisConfig
::
mkldnn_quantizer_config
,
.
def
(
"quantizer_config"
,
&
AnalysisConfig
::
mkldnn_quantizer_config
,
py
::
return_value_policy
::
reference
)
.
def
(
"set_mkldnn_cache_capacity"
,
&
AnalysisConfig
::
SetMkldnnCacheCapacity
,
.
def
(
"set_mkldnn_cache_capacity"
,
&
AnalysisConfig
::
SetMkldnnCacheCapacity
,
py
::
arg
(
"capacity"
)
=
0
)
.
def
(
"set_bfloat16_op"
,
&
AnalysisConfig
::
SetBfloat16Op
)
.
def
(
"enable_mkldnn_int8"
,
&
AnalysisConfig
::
EnableMkldnnInt8
,
.
def
(
"enable_mkldnn_int8"
,
&
AnalysisConfig
::
EnableMkldnnInt8
,
py
::
arg
(
"mkldnn_int8_enabled_op_types"
)
=
std
::
unordered_set
<
std
::
string
>
({}))
.
def
(
"mkldnn_int8_enabled"
,
&
AnalysisConfig
::
mkldnn_int8_enabled
)
...
...
@@ -807,17 +841,20 @@ void BindAnalysisPredictor(py::module *m) {
.
def
(
"prepare_argument"
,
&
AnalysisPredictor
::
PrepareArgument
)
.
def
(
"optimize_inference_program"
,
&
AnalysisPredictor
::
OptimizeInferenceProgram
)
.
def
(
"analysis_argument"
,
&
AnalysisPredictor
::
analysis_argument
,
.
def
(
"analysis_argument"
,
&
AnalysisPredictor
::
analysis_argument
,
py
::
return_value_policy
::
reference
)
.
def
(
"clone"
,
&
AnalysisPredictor
::
Clone
)
.
def
(
"scope"
,
&
AnalysisPredictor
::
scope
,
.
def
(
"scope"
,
&
AnalysisPredictor
::
scope
,
py
::
return_value_policy
::
reference
)
.
def
(
"program"
,
&
AnalysisPredictor
::
program
,
.
def
(
"program"
,
&
AnalysisPredictor
::
program
,
py
::
return_value_policy
::
reference
)
.
def
(
"get_serialized_program"
,
&
AnalysisPredictor
::
GetSerializedProgram
)
.
def
(
"mkldnn_quantize"
,
&
AnalysisPredictor
::
MkldnnQuantize
)
.
def
(
"SaveOptimModel"
,
&
AnalysisPredictor
::
SaveOptimModel
,
py
::
arg
(
"dir"
));
.
def
(
"SaveOptimModel"
,
&
AnalysisPredictor
::
SaveOptimModel
,
py
::
arg
(
"dir"
));
}
void
BindPaddleInferPredictor
(
py
::
module
*
m
)
{
...
...
@@ -842,10 +879,12 @@ void BindPaddleInferPredictor(py::module *m) {
void
BindZeroCopyTensor
(
py
::
module
*
m
)
{
py
::
class_
<
ZeroCopyTensor
>
(
*
m
,
"ZeroCopyTensor"
)
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
vector
<
int
>
&>
(
&
ZeroCopyTensor
::
Reshape
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
size_t
&>
(
&
paddle_infer
::
Tensor
::
ReshapeStrings
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
vector
<
int
>
&>
(
&
ZeroCopyTensor
::
Reshape
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
size_t
&>
(
&
paddle_infer
::
Tensor
::
ReshapeStrings
))
.
def
(
"copy_from_cpu"
,
&
ZeroCopyTensorCreate
<
int32_t
>
)
.
def
(
"copy_from_cpu"
,
&
ZeroCopyTensorCreate
<
int64_t
>
)
.
def
(
"copy_from_cpu"
,
&
ZeroCopyTensorCreate
<
float
>
)
...
...
@@ -860,10 +899,12 @@ void BindZeroCopyTensor(py::module *m) {
void
BindPaddleInferTensor
(
py
::
module
*
m
)
{
py
::
class_
<
paddle_infer
::
Tensor
>
(
*
m
,
"PaddleInferTensor"
)
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
vector
<
int
>
&>
(
&
paddle_infer
::
Tensor
::
Reshape
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
size_t
&>
(
&
paddle_infer
::
Tensor
::
ReshapeStrings
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
vector
<
int
>
&>
(
&
paddle_infer
::
Tensor
::
Reshape
))
.
def
(
"reshape"
,
py
::
overload_cast
<
const
std
::
size_t
&>
(
&
paddle_infer
::
Tensor
::
ReshapeStrings
))
.
def
(
"copy_from_cpu_bind"
,
&
PaddleInferTensorCreate
<
int32_t
>
)
.
def
(
"copy_from_cpu_bind"
,
&
PaddleInferTensorCreate
<
int64_t
>
)
.
def
(
"copy_from_cpu_bind"
,
&
PaddleInferTensorCreate
<
float
>
)
...
...
@@ -881,7 +922,8 @@ void BindPaddleInferTensor(py::module *m) {
void
BindPredictorPool
(
py
::
module
*
m
)
{
py
::
class_
<
paddle_infer
::
services
::
PredictorPool
>
(
*
m
,
"PredictorPool"
)
.
def
(
py
::
init
<
const
paddle_infer
::
Config
&
,
size_t
>
())
.
def
(
"retrive"
,
&
paddle_infer
::
services
::
PredictorPool
::
Retrive
,
.
def
(
"retrive"
,
&
paddle_infer
::
services
::
PredictorPool
::
Retrive
,
py
::
return_value_policy
::
reference
);
}
...
...
@@ -904,7 +946,8 @@ void BindPaddlePassBuilder(py::module *m) {
.
def
(
"append_analysis_pass"
,
&
PaddlePassBuilder
::
AppendAnalysisPass
)
.
def
(
"turn_on_debug"
,
&
PaddlePassBuilder
::
TurnOnDebug
)
.
def
(
"debug_string"
,
&
PaddlePassBuilder
::
DebugString
)
.
def
(
"all_passes"
,
&
PaddlePassBuilder
::
AllPasses
,
.
def
(
"all_passes"
,
&
PaddlePassBuilder
::
AllPasses
,
py
::
return_value_policy
::
reference
)
.
def
(
"analysis_passes"
,
&
PaddlePassBuilder
::
AnalysisPasses
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录