Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ea5ca555
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ea5ca555
编写于
12月 02, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
12月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT] Support engine sharing memory of multiple predictors (#47631)
上级
d969c309
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
51 addition
and
15 deletion
+51
-15
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+25
-14
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+6
-1
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+15
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+5
-0
未找到文件。
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ea5ca555
...
@@ -679,24 +679,11 @@ void AnalysisConfig::EnableTensorRtEngine(
...
@@ -679,24 +679,11 @@ void AnalysisConfig::EnableTensorRtEngine(
bool
use_calib_mode
)
{
bool
use_calib_mode
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
use_gpu
())
{
if
(
!
use_gpu
())
{
LOG
(
ERROR
)
<<
"To use TensorRT engine, please call EnableGpu() first"
;
LOG
(
ERROR
)
<<
"To use TensorRT engine, please call Enable
Use
Gpu() first"
;
return
;
return
;
}
}
use_tensorrt_
=
true
;
use_tensorrt_
=
true
;
#ifdef PADDLE_WITH_TENSORRT
// https://forums.developer.nvidia.com/t/nvinfer1-createexecutioncontextwithoutdevicememory-returns-nullptr/111878/2
// when trt version less than 7.2,
// createExecutionContextWithoutDeviceMemory() has bug.
// so, we cannot enable engine context memory sharing.
#if IS_TRT_VERSION_GE(7200)
trt_engine_memory_sharing_
=
true
;
#else
LOG
(
WARNING
)
<<
"TensorRT engine context memory sharing needs version 7.2 and after."
;
trt_engine_memory_sharing_
=
false
;
#endif
#endif
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
...
@@ -711,6 +698,30 @@ void AnalysisConfig::EnableTensorRtEngine(
...
@@ -711,6 +698,30 @@ void AnalysisConfig::EnableTensorRtEngine(
#endif
#endif
}
}
void
AnalysisConfig
::
EnableTensorRTMemoryOptim
(
bool
engine_memory_sharing
,
int
sharing_identifier
)
{
PADDLE_ENFORCE_EQ
(
use_tensorrt_
,
true
,
platform
::
errors
::
InvalidArgument
(
"To enable TensorRT memory optim, please call "
"EnableTensorRtEngine or enable_tensorrt_engine first."
));
PADDLE_ENFORCE_GE
(
sharing_identifier
,
0
,
platform
::
errors
::
InvalidArgument
(
"The value of sharing_identifier must be greater "
"than or equal to 0."
));
if
(
!
engine_memory_sharing
)
{
PADDLE_ENFORCE_EQ
(
sharing_identifier
,
0
,
platform
::
errors
::
InvalidArgument
(
"The value of sharing_identifier must be equal to 0 "
"when engine_memory_sharing is false."
));
}
trt_engine_memory_sharing_
=
engine_memory_sharing
;
trt_engine_memory_sharing_identifier_
=
sharing_identifier
;
}
void
AnalysisConfig
::
EnableDlnne
(
void
AnalysisConfig
::
EnableDlnne
(
int
min_subgraph_size
,
int
min_subgraph_size
,
int
max_batch_size
,
int
max_batch_size
,
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
ea5ca555
...
@@ -103,8 +103,13 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -103,8 +103,13 @@ class AnalysisPredictor : public PaddlePredictor {
if
(
config_
.
shape_range_info_collected
())
{
if
(
config_
.
shape_range_info_collected
())
{
config_
.
SwitchIrOptim
(
false
);
config_
.
SwitchIrOptim
(
false
);
}
}
auto
trt_identifier
=
config_
.
trt_engine_memory_sharing_identifier_
;
if
(
trt_identifier
>
0
)
{
predictor_id_
=
-
trt_identifier
;
}
else
{
predictor_id_
=
inference
::
GetUniqueId
();
predictor_id_
=
inference
::
GetUniqueId
();
}
}
}
///
///
/// \brief Destroy the Analysis Predictor object
/// \brief Destroy the Analysis Predictor object
///
///
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ea5ca555
...
@@ -576,6 +576,20 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -576,6 +576,20 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
///
///
/// \brief Turn on the TensorRT memory optimization.
///
/// \param engine_memory_sharing Whether to enable TensorRT memory
/// optimization.
/// \param sharing_identifier This parameter can be set if TensorRT memory
/// optimization is enabled, and the value must be greater than 0. If you have
/// multiple predictors that want to share memory, you can specify a
/// same value for these predictors. NOTE: The predictors specified with the
/// same value must be guaranteed to be executed serially, otherwise undefined
/// behavior will occur.
///
void
EnableTensorRTMemoryOptim
(
bool
engine_memory_sharing
=
true
,
int
sharing_identifier
=
0
);
///
/// \brief A boolean state telling whether the tensorrt engine memory sharing
/// \brief A boolean state telling whether the tensorrt engine memory sharing
/// is activated.
/// is activated.
///
///
...
@@ -1093,6 +1107,7 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -1093,6 +1107,7 @@ struct PD_INFER_DECL AnalysisConfig {
// memory reuse related.
// memory reuse related.
bool
enable_memory_optim_
{
false
};
bool
enable_memory_optim_
{
false
};
bool
trt_engine_memory_sharing_
{
false
};
bool
trt_engine_memory_sharing_
{
false
};
int
trt_engine_memory_sharing_identifier_
{
0
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
ea5ca555
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
...
@@ -732,6 +733,10 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -732,6 +733,10 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"enable_tensorrt_memory_optim"
,
&
AnalysisConfig
::
EnableTensorRTMemoryOptim
,
py
::
arg
(
"engine_memory_sharing"
)
=
true
,
py
::
arg
(
"sharing_identifier"
)
=
0
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"set_trt_dynamic_shape_info"
,
.
def
(
"set_trt_dynamic_shape_info"
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录