Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ea5ca555
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ea5ca555
编写于
12月 02, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
12月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT] Support engine sharing memory of multiple predictors (#47631)
上级
d969c309
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
51 addition
and
15 deletion
+51
-15
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+25
-14
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+6
-1
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+15
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+5
-0
未找到文件。
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ea5ca555
...
@@ -679,24 +679,11 @@ void AnalysisConfig::EnableTensorRtEngine(
...
@@ -679,24 +679,11 @@ void AnalysisConfig::EnableTensorRtEngine(
bool
use_calib_mode
)
{
bool
use_calib_mode
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
use_gpu
())
{
if
(
!
use_gpu
())
{
LOG
(
ERROR
)
<<
"To use TensorRT engine, please call EnableGpu() first"
;
LOG
(
ERROR
)
<<
"To use TensorRT engine, please call Enable
Use
Gpu() first"
;
return
;
return
;
}
}
use_tensorrt_
=
true
;
use_tensorrt_
=
true
;
#ifdef PADDLE_WITH_TENSORRT
// https://forums.developer.nvidia.com/t/nvinfer1-createexecutioncontextwithoutdevicememory-returns-nullptr/111878/2
// when trt version less than 7.2,
// createExecutionContextWithoutDeviceMemory() has bug.
// so, we cannot enable engine context memory sharing.
#if IS_TRT_VERSION_GE(7200)
trt_engine_memory_sharing_
=
true
;
#else
LOG
(
WARNING
)
<<
"TensorRT engine context memory sharing needs version 7.2 and after."
;
trt_engine_memory_sharing_
=
false
;
#endif
#endif
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
...
@@ -711,6 +698,30 @@ void AnalysisConfig::EnableTensorRtEngine(
...
@@ -711,6 +698,30 @@ void AnalysisConfig::EnableTensorRtEngine(
#endif
#endif
}
}
void
AnalysisConfig
::
EnableTensorRTMemoryOptim
(
bool
engine_memory_sharing
,
int
sharing_identifier
)
{
PADDLE_ENFORCE_EQ
(
use_tensorrt_
,
true
,
platform
::
errors
::
InvalidArgument
(
"To enable TensorRT memory optim, please call "
"EnableTensorRtEngine or enable_tensorrt_engine first."
));
PADDLE_ENFORCE_GE
(
sharing_identifier
,
0
,
platform
::
errors
::
InvalidArgument
(
"The value of sharing_identifier must be greater "
"than or equal to 0."
));
if
(
!
engine_memory_sharing
)
{
PADDLE_ENFORCE_EQ
(
sharing_identifier
,
0
,
platform
::
errors
::
InvalidArgument
(
"The value of sharing_identifier must be equal to 0 "
"when engine_memory_sharing is false."
));
}
trt_engine_memory_sharing_
=
engine_memory_sharing
;
trt_engine_memory_sharing_identifier_
=
sharing_identifier
;
}
void
AnalysisConfig
::
EnableDlnne
(
void
AnalysisConfig
::
EnableDlnne
(
int
min_subgraph_size
,
int
min_subgraph_size
,
int
max_batch_size
,
int
max_batch_size
,
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
ea5ca555
...
@@ -103,8 +103,13 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -103,8 +103,13 @@ class AnalysisPredictor : public PaddlePredictor {
if
(
config_
.
shape_range_info_collected
())
{
if
(
config_
.
shape_range_info_collected
())
{
config_
.
SwitchIrOptim
(
false
);
config_
.
SwitchIrOptim
(
false
);
}
}
auto
trt_identifier
=
config_
.
trt_engine_memory_sharing_identifier_
;
if
(
trt_identifier
>
0
)
{
predictor_id_
=
-
trt_identifier
;
}
else
{
predictor_id_
=
inference
::
GetUniqueId
();
predictor_id_
=
inference
::
GetUniqueId
();
}
}
}
///
///
/// \brief Destroy the Analysis Predictor object
/// \brief Destroy the Analysis Predictor object
///
///
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ea5ca555
...
@@ -576,6 +576,20 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -576,6 +576,20 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
///
///
/// \brief Turn on the TensorRT memory optimization.
///
/// \param engine_memory_sharing Whether to enable TensorRT memory
/// optimization.
/// \param sharing_identifier This parameter can be set if TensorRT memory
/// optimization is enabled, and the value must be greater than 0. If you have
/// multiple predictors that want to share memory, you can specify a
/// same value for these predictors. NOTE: The predictors specified with the
/// same value must be guaranteed to be executed serially, otherwise undefined
/// behavior will occur.
///
void
EnableTensorRTMemoryOptim
(
bool
engine_memory_sharing
=
true
,
int
sharing_identifier
=
0
);
///
/// \brief A boolean state telling whether the tensorrt engine memory sharing
/// \brief A boolean state telling whether the tensorrt engine memory sharing
/// is activated.
/// is activated.
///
///
...
@@ -1093,6 +1107,7 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -1093,6 +1107,7 @@ struct PD_INFER_DECL AnalysisConfig {
// memory reuse related.
// memory reuse related.
bool
enable_memory_optim_
{
false
};
bool
enable_memory_optim_
{
false
};
bool
trt_engine_memory_sharing_
{
false
};
bool
trt_engine_memory_sharing_
{
false
};
int
trt_engine_memory_sharing_identifier_
{
0
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
ea5ca555
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
...
@@ -732,6 +733,10 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -732,6 +733,10 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"enable_tensorrt_memory_optim"
,
&
AnalysisConfig
::
EnableTensorRTMemoryOptim
,
py
::
arg
(
"engine_memory_sharing"
)
=
true
,
py
::
arg
(
"sharing_identifier"
)
=
0
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"set_trt_dynamic_shape_info"
,
.
def
(
"set_trt_dynamic_shape_info"
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录