Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
02621079
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
02621079
编写于
8月 29, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
8月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
TensorRT Engine context memory bind with predictor id (#45468)
上级
e10e26e7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
69 addition
and
18 deletion
+69
-18
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+19
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+6
-4
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+44
-14
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
02621079
...
@@ -907,6 +907,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -907,6 +907,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
return
false
;
return
false
;
}
}
#ifdef PADDLE_WITH_TENSORRT
if
(
config_
.
tensorrt_engine_enabled
())
{
inference
::
tensorrt
::
TensorRTEngine
::
predictor_id_per_thread
=
predictor_id_
;
VLOG
(
3
)
<<
"thread_local var predictor_id in TensorRTEngine is set to: "
<<
inference
::
tensorrt
::
TensorRTEngine
::
predictor_id_per_thread
;
}
#endif
// Run the inference program
// Run the inference program
// if share variables, we need not create variables
// if share variables, we need not create variables
executor_
->
Run
();
executor_
->
Run
();
...
@@ -1630,6 +1639,16 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -1630,6 +1639,16 @@ bool AnalysisPredictor::ZeroCopyRun() {
MkldnnPreSet
(
shape_vector
);
MkldnnPreSet
(
shape_vector
);
}
}
#endif
#endif
#ifdef PADDLE_WITH_TENSORRT
if
(
config_
.
tensorrt_engine_enabled
())
{
inference
::
tensorrt
::
TensorRTEngine
::
predictor_id_per_thread
=
predictor_id_
;
VLOG
(
3
)
<<
"thread_local var predictor_id in TensorRTEngine is set to: "
<<
inference
::
tensorrt
::
TensorRTEngine
::
predictor_id_per_thread
;
}
#endif
executor_
->
Run
();
executor_
->
Run
();
if
(
config_
.
shape_range_info_collected
())
{
if
(
config_
.
shape_range_info_collected
())
{
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
02621079
...
@@ -30,6 +30,9 @@ namespace paddle {
...
@@ -30,6 +30,9 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
int
TensorRTEngine
::
runtime_batch_
=
1
;
thread_local
int
TensorRTEngine
::
predictor_id_per_thread
=
-
1
;
void
TensorRTEngine
::
Weight
::
SetDataType
(
phi
::
DataType
type
)
{
void
TensorRTEngine
::
Weight
::
SetDataType
(
phi
::
DataType
type
)
{
nvinfer1
::
DataType
nv_type
=
nvinfer1
::
DataType
::
kFLOAT
;
nvinfer1
::
DataType
nv_type
=
nvinfer1
::
DataType
::
kFLOAT
;
switch
(
type
)
{
switch
(
type
)
{
...
@@ -59,8 +62,6 @@ void TensorRTEngine::Weight::SetDataType(phi::DataType type) {
...
@@ -59,8 +62,6 @@ void TensorRTEngine::Weight::SetDataType(phi::DataType type) {
w_
.
type
=
nv_type
;
w_
.
type
=
nv_type
;
}
}
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
InitNetwork
()
{
void
TensorRTEngine
::
InitNetwork
()
{
freshDeviceId
();
freshDeviceId
();
infer_builder_
.
reset
(
createInferBuilder
(
&
logger_
));
infer_builder_
.
reset
(
createInferBuilder
(
&
logger_
));
...
@@ -680,8 +681,9 @@ void TensorRTEngine::GetEngineInfo() {
...
@@ -680,8 +681,9 @@ void TensorRTEngine::GetEngineInfo() {
LOG
(
INFO
)
<<
"====== engine info ======"
;
LOG
(
INFO
)
<<
"====== engine info ======"
;
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
infer_engine_
->
createEngineInspector
());
infer_engine_
->
createEngineInspector
());
auto
infer_context
=
context
();
auto
infer_context
=
infer_ptr
<
nvinfer1
::
IExecutionContext
>
(
infer_inspector
->
setExecutionContext
(
infer_context
);
infer_engine_
->
createExecutionContextWithoutDeviceMemory
());
infer_inspector
->
setExecutionContext
(
infer_context
.
get
());
LOG
(
INFO
)
<<
infer_inspector
->
getEngineInformation
(
LOG
(
INFO
)
<<
infer_inspector
->
getEngineInformation
(
nvinfer1
::
LayerInformationFormat
::
kONELINE
);
nvinfer1
::
LayerInformationFormat
::
kONELINE
);
LOG
(
INFO
)
<<
"====== engine info end ======"
;
LOG
(
INFO
)
<<
"====== engine info end ======"
;
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
02621079
...
@@ -177,6 +177,7 @@ class TRTInt8Calibrator;
...
@@ -177,6 +177,7 @@ class TRTInt8Calibrator;
class
TensorRTEngine
{
class
TensorRTEngine
{
using
DescType
=
::
paddle
::
framework
::
proto
::
BlockDesc
;
using
DescType
=
::
paddle
::
framework
::
proto
::
BlockDesc
;
using
ShapeMapType
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
using
ShapeMapType
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
using
PredictorID
=
int
;
public:
public:
// Weight is model parameter.
// Weight is model parameter.
...
@@ -286,9 +287,17 @@ class TensorRTEngine {
...
@@ -286,9 +287,17 @@ class TensorRTEngine {
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
IExecutionContext
*
context
()
{
nvinfer1
::
IExecutionContext
*
context
()
{
#ifndef PADDLE_WITH_TESTING
PADDLE_ENFORCE_GT
(
predictor_id_per_thread
,
-
1
,
platform
::
errors
::
InvalidArgument
(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d"
,
predictor_id_per_thread
));
#endif
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
if
(
infer_context_
.
find
(
predictor_id_per_thread
)
==
infer_context_
.
end
())
{
if
(
infer_context_
.
find
(
tid
)
==
infer_context_
.
end
())
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
infer_engine_
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
...
@@ -296,24 +305,34 @@ class TensorRTEngine {
...
@@ -296,24 +305,34 @@ class TensorRTEngine {
// We may see trt warning: Profile 0 has been chosen by another
// We may see trt warning: Profile 0 has been chosen by another
// IExecutionContext...
// IExecutionContext...
// It's ok. We will set it later.
// It's ok. We will set it later.
infer_context_
[
tid
].
reset
(
infer_engine_
->
createExecutionContext
());
infer_context_
[
predictor_id_per_thread
].
reset
(
infer_engine_
->
createExecutionContext
());
if
(
with_dynamic_shape_
)
{
if
(
with_dynamic_shape_
)
{
// need new profile if it's not the first
// need new profile if it's not the first
if
(
cur_profile_num_
>
0
)
{
if
(
cur_profile_num_
>
0
)
{
infer_context_
[
tid
]
->
setOptimizationProfile
(
cur_profile_num_
);
infer_context_
[
predictor_id_per_thread
]
->
setOptimizationProfile
(
cur_profile_num_
);
}
}
profile_index_
[
ti
d
]
=
cur_profile_num_
;
profile_index_
[
predictor_id_per_threa
d
]
=
cur_profile_num_
;
++
cur_profile_num_
;
++
cur_profile_num_
;
}
}
}
}
return
infer_context_
[
ti
d
].
get
();
return
infer_context_
[
predictor_id_per_threa
d
].
get
();
}
}
int
GetProfileIndex
()
{
int
GetProfileIndex
()
{
if
(
max_profile_num_
>
1
)
{
if
(
max_profile_num_
>
1
)
{
#ifndef PADDLE_WITH_TESTING
PADDLE_ENFORCE_GT
(
predictor_id_per_thread
,
-
1
,
platform
::
errors
::
InvalidArgument
(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d"
,
predictor_id_per_thread
));
#endif
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
return
profile_index_
[
predictor_id_per_thread
];
return
profile_index_
[
tid
];
}
else
{
}
else
{
return
0
;
return
0
;
}
}
...
@@ -326,14 +345,22 @@ class TensorRTEngine {
...
@@ -326,14 +345,22 @@ class TensorRTEngine {
int
GetNbBindings
()
{
return
binding_num_
;
}
int
GetNbBindings
()
{
return
binding_num_
;
}
void
ResetContext
()
{
void
ResetContext
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
infer_engine_
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"You should build engine first and then set the context."
));
"You should build engine first and then set the context."
));
infer_context_
[
tid
].
reset
(
nullptr
);
#ifndef PADDLE_WITH_TESTING
infer_context_
.
erase
(
tid
);
PADDLE_ENFORCE_GT
(
predictor_id_per_thread
,
-
1
,
platform
::
errors
::
InvalidArgument
(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d"
,
predictor_id_per_thread
));
#endif
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
infer_context_
[
predictor_id_per_thread
].
reset
(
nullptr
);
infer_context_
.
erase
(
predictor_id_per_thread
);
}
}
nvinfer1
::
IHostMemory
*
Serialize
()
{
nvinfer1
::
IHostMemory
*
Serialize
()
{
...
@@ -686,7 +713,7 @@ class TensorRTEngine {
...
@@ -686,7 +713,7 @@ class TensorRTEngine {
int
device_id_
;
int
device_id_
;
int
max_profile_num_
{
1
};
int
max_profile_num_
{
1
};
int
cur_profile_num_
{
0
};
int
cur_profile_num_
{
0
};
std
::
unordered_map
<
std
::
thread
::
id
,
int
>
profile_index_
;
std
::
unordered_map
<
PredictorID
,
int
>
profile_index_
;
ShapeMapType
min_input_shape_
;
ShapeMapType
min_input_shape_
;
ShapeMapType
max_input_shape_
;
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
ShapeMapType
optim_input_shape_
;
...
@@ -723,7 +750,7 @@ class TensorRTEngine {
...
@@ -723,7 +750,7 @@ class TensorRTEngine {
infer_ptr
<
nvinfer1
::
IBuilder
>
infer_builder_
;
infer_ptr
<
nvinfer1
::
IBuilder
>
infer_builder_
;
infer_ptr
<
nvinfer1
::
INetworkDefinition
>
infer_network_
;
infer_ptr
<
nvinfer1
::
INetworkDefinition
>
infer_network_
;
infer_ptr
<
nvinfer1
::
ICudaEngine
>
infer_engine_
;
infer_ptr
<
nvinfer1
::
ICudaEngine
>
infer_engine_
;
std
::
unordered_map
<
std
::
thread
::
id
,
infer_ptr
<
nvinfer1
::
IExecutionContext
>>
std
::
unordered_map
<
PredictorID
,
infer_ptr
<
nvinfer1
::
IExecutionContext
>>
infer_context_
;
infer_context_
;
infer_ptr
<
nvinfer1
::
IHostMemory
>
ihost_memory_
;
infer_ptr
<
nvinfer1
::
IHostMemory
>
ihost_memory_
;
std
::
unordered_map
<
nvinfer1
::
ITensor
*
,
float
>
quant_dynamic_range_
;
std
::
unordered_map
<
nvinfer1
::
ITensor
*
,
float
>
quant_dynamic_range_
;
...
@@ -741,6 +768,9 @@ class TensorRTEngine {
...
@@ -741,6 +768,9 @@ class TensorRTEngine {
#endif
#endif
std
::
mutex
mutex_
;
std
::
mutex
mutex_
;
bool
use_inspector_
;
bool
use_inspector_
;
public:
thread_local
static
int
predictor_id_per_thread
;
};
// class TensorRTEngine
};
// class TensorRTEngine
// Add a layer__ into engine__ with args ARGS.
// Add a layer__ into engine__ with args ARGS.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录