Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
69793a27
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
69793a27
编写于
2月 11, 2022
作者:
L
Leo Chen
提交者:
GitHub
2月 11, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add TensorRT inspector into Paddle-TRT (#38362)
上级
575fa0fe
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
143 addition
and
17 deletion
+143
-17
AUTHORS.md
AUTHORS.md
+1
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+2
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+4
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+19
-3
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+5
-11
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+5
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+4
-0
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
.../paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
...fluid/tests/unittests/ir/inference/inference_pass_test.py
+14
-2
python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
.../fluid/tests/unittests/ir/inference/test_trt_inspector.py
+82
-0
未找到文件。
AUTHORS.md
浏览文件 @
69793a27
...
...
@@ -83,3 +83,4 @@
| jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
| mingxu1067 | Ming Huang (NVIDIA) |
| zlsh80826 | Reese Wang (NVIDIA) |
| leo0519 | Leo Chen (NVIDIA) |
paddle/fluid/inference/analysis/argument.h
浏览文件 @
69793a27
...
...
@@ -219,6 +219,7 @@ struct Argument {
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_allow_build_at_runtime
,
TensorRtAllowBuildAtRuntime
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_inspector
,
TensorRtUseInspector
,
bool
);
DECL_ARGUMENT_FIELD
(
use_dlnne
,
UseDlnne
,
bool
);
DECL_ARGUMENT_FIELD
(
dlnne_min_subgraph_size
,
DlnneMinSubgraphSize
,
int
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
69793a27
...
...
@@ -156,6 +156,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"use_static_engine"
,
new
bool
(
use_static_engine
));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"use_inspector"
,
new
bool
(
argument
->
tensorrt_use_inspector
()));
// tuned trt dynamic_shape
pass
->
Set
(
"trt_shape_range_info_path"
,
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
69793a27
...
...
@@ -265,6 +265,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetAttr
(
"parameters"
,
params
);
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
...
...
@@ -375,6 +376,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetUseDLA
(
Get
<
bool
>
(
"trt_use_dla"
));
trt_engine
->
SetDLACore
(
Get
<
int
>
(
"trt_dla_core"
));
trt_engine
->
SetUseInspector
(
Get
<
bool
>
(
"use_inspector"
));
trt_engine
->
SetWithErnie
(
graph
->
Has
(
framework
::
ir
::
kEmbEltwiseLayernormPass
)
&&
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
69793a27
...
...
@@ -194,6 +194,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
trt_allow_build_at_runtime_
);
CP_MEMBER
(
collect_shape_range_info_
);
CP_MEMBER
(
shape_range_info_path_
);
CP_MEMBER
(
trt_use_inspector_
);
// Dlnne related
CP_MEMBER
(
use_dlnne_
);
CP_MEMBER
(
dlnne_min_subgraph_size_
);
...
...
@@ -427,6 +428,8 @@ void AnalysisConfig::EnableTensorRtDLA(int dla_core) {
trt_dla_core_
=
dla_core
;
}
void
AnalysisConfig
::
EnableTensorRtInspector
()
{
trt_use_inspector_
=
true
;
}
void
AnalysisConfig
::
Exp_DisableTensorRtOPs
(
const
std
::
vector
<
std
::
string
>
&
ops
)
{
trt_disabled_ops_
.
insert
(
trt_disabled_ops_
.
end
(),
ops
.
begin
(),
ops
.
end
());
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
69793a27
...
...
@@ -615,6 +615,7 @@ void AnalysisPredictor::PrepareArgument() {
config_
.
tuned_tensorrt_dynamic_shape
());
argument_
.
SetTensorRtAllowBuildAtRuntime
(
config_
.
trt_allow_build_at_runtime
());
argument_
.
SetTensorRtUseInspector
(
config_
.
trt_use_inspector_
);
}
if
(
config_
.
dlnne_enabled
())
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
69793a27
...
...
@@ -521,6 +521,9 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
tensorrt_dla_enabled
()
{
return
trt_use_dla_
;
}
void
EnableTensorRtInspector
();
bool
tensorrt_inspector_enabled
()
{
return
trt_use_inspector_
;
}
void
EnableDlnne
(
int
min_subgraph_size
=
3
);
bool
dlnne_enabled
()
const
{
return
use_dlnne_
;
}
...
...
@@ -807,6 +810,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool
trt_allow_build_at_runtime_
{
false
};
// tune to get dynamic_shape info.
bool
trt_tuned_dynamic_shape_
{
false
};
bool
trt_use_inspector_
{
false
};
// In CollectShapeInfo mode, we will collect the shape information of
// all intermediate tensors in the compute graph and calculate the
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
69793a27
...
...
@@ -57,7 +57,6 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
}
else
{
#if IS_TRT_VERSION_GE(6000)
infer_context
->
enqueueV2
(
buffers
->
data
(),
stream
,
nullptr
);
GetEngineInfo
();
#endif
}
SetRuntimeBatch
(
batch_size
);
...
...
@@ -244,8 +243,10 @@ void TensorRTEngine::FreezeNetwork() {
#endif
}
#if IS_TRT_VERSION_GE(8200)
infer_builder_config_
->
setProfilingVerbosity
(
nvinfer1
::
ProfilingVerbosity
::
kDETAILED
);
if
(
use_inspector_
)
{
infer_builder_config_
->
setProfilingVerbosity
(
nvinfer1
::
ProfilingVerbosity
::
kDETAILED
);
}
#endif
#if IS_TRT_VERSION_LT(8000)
...
...
@@ -411,6 +412,21 @@ void TensorRTEngine::freshDeviceId() {
platform
::
SetDeviceId
(
device_id_
);
}
void
TensorRTEngine
::
GetEngineInfo
()
{
#if IS_TRT_VERSION_GE(8200)
LOG
(
INFO
)
<<
"====== engine info ======"
;
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
infer_engine_
->
createEngineInspector
());
auto
infer_context
=
context
();
infer_inspector
->
setExecutionContext
(
infer_context
);
LOG
(
INFO
)
<<
infer_inspector
->
getEngineInformation
(
nvinfer1
::
LayerInformationFormat
::
kONELINE
);
LOG
(
INFO
)
<<
"====== engine info end ======"
;
#else
LOG
(
INFO
)
<<
"Inspector needs TensorRT version 8.2 and after."
;
#endif
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
69793a27
...
...
@@ -580,17 +580,10 @@ class TensorRTEngine {
}
void
SetProfileNum
(
int
num
)
{
max_profile_num_
=
num
;
}
void
GetEngineInfo
()
{
#if IS_TRT_VERSION_GE(8200)
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
infer_engine_
->
createEngineInspector
());
infer_inspector
->
setExecutionContext
(
context
());
VLOG
(
3
)
<<
infer_inspector
->
getEngineInformation
(
nvinfer1
::
LayerInformationFormat
::
kJSON
);
#else
VLOG
(
3
)
<<
"Inspector needs TensorRT version 8.2 and after."
;
#endif
}
void
GetEngineInfo
();
void
SetUseInspector
(
bool
use_inspector
)
{
use_inspector_
=
use_inspector
;
}
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
...
...
@@ -664,6 +657,7 @@ class TensorRTEngine {
std
::
vector
<
std
::
unique_ptr
<
plugin
::
DynamicPluginTensorRT
>>
owned_pluginv2_
;
#endif
std
::
mutex
mutex_
;
bool
use_inspector_
;
};
// class TensorRTEngine
// Add a layer__ into engine__ with args ARGS.
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
69793a27
...
...
@@ -140,6 +140,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
enable_int8_
;
bool
enable_fp16_
;
bool
use_calib_mode_
;
bool
use_inspector_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
std
::
string
calibration_engine_key_
;
...
...
@@ -175,6 +176,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
shape_range_info_path_
=
Attr
<
std
::
string
>
(
"shape_range_info_path"
);
allow_build_at_runtime_
=
Attr
<
bool
>
(
"allow_build_at_runtime"
);
use_static_engine_
=
Attr
<
bool
>
(
"use_static_engine"
);
use_inspector_
=
HasAttr
(
"use_inspector"
)
&&
Attr
<
bool
>
(
"use_inspector"
);
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
...
...
@@ -285,6 +287,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
return
;
}
auto
*
trt_engine
=
GetEngine
(
scope
,
dev_place
);
if
(
use_inspector_
)
{
trt_engine
->
GetEngineInfo
();
}
if
(
trt_engine
->
with_dynamic_shape
())
{
// get runtime input shapes.
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
runtime_input_shape
;
...
...
@@ -331,7 +336,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
anc
=
&
scope
;
}
PrepareTRTEngine
(
*
anc
,
trt_engine
);
// update shape_range_info_pbtxt
if
(
!
shape_range_info_path_
.
empty
())
{
inference
::
UpdateShapeRangeInfo
(
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
69793a27
...
...
@@ -615,6 +615,10 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_tensorrt_dla"
,
&
AnalysisConfig
::
EnableTensorRtDLA
,
py
::
arg
(
"dla_core"
)
=
0
)
.
def
(
"tensorrt_dla_enabled"
,
&
AnalysisConfig
::
tensorrt_dla_enabled
)
.
def
(
"enable_tensorrt_inspector"
,
&
AnalysisConfig
::
EnableTensorRtInspector
)
.
def
(
"tensorrt_inspector_enabled"
,
&
AnalysisConfig
::
tensorrt_inspector_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"enable_dlnne"
,
&
AnalysisConfig
::
EnableDlnne
,
py
::
arg
(
"min_subgraph_size"
)
=
3
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
浏览文件 @
69793a27
...
...
@@ -75,6 +75,7 @@ set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_trt_conv_pass PROPERTIES TIMEOUT 120
)
#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200)
set_tests_properties
(
test_trt_dynamic_shape PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_trt_inspector PROPERTIES TIMEOUT 60
)
if
(
WITH_NV_JETSON
)
set_tests_properties
(
test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450
)
set_tests_properties
(
test_trt_pool3d_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
浏览文件 @
69793a27
...
...
@@ -122,6 +122,11 @@ class InferencePassTest(unittest.TestCase):
self
.
trt_parameters
.
precision
,
self
.
trt_parameters
.
use_static
,
self
.
trt_parameters
.
use_calib_mode
)
if
self
.
trt_parameters
.
use_inspector
:
config
.
enable_tensorrt_inspector
()
self
.
assertTrue
(
config
.
tensorrt_inspector_enabled
(),
"The inspector option is not set correctly."
)
if
self
.
dynamic_shape_params
:
config
.
set_trt_dynamic_shape_info
(
...
...
@@ -244,14 +249,21 @@ class InferencePassTest(unittest.TestCase):
Prepare TensorRT subgraph engine parameters.
'''
def
__init__
(
self
,
workspace_size
,
max_batch_size
,
min_subgraph_size
,
precision
,
use_static
,
use_calib_mode
):
def
__init__
(
self
,
workspace_size
,
max_batch_size
,
min_subgraph_size
,
precision
,
use_static
,
use_calib_mode
,
use_inspector
=
False
):
self
.
workspace_size
=
workspace_size
self
.
max_batch_size
=
max_batch_size
self
.
min_subgraph_size
=
min_subgraph_size
self
.
precision
=
precision
self
.
use_static
=
use_static
self
.
use_calib_mode
=
use_calib_mode
self
.
use_inspector
=
use_inspector
class
DynamicShapeParam
:
'''
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
0 → 100644
浏览文件 @
69793a27
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
os
import
threading
import
time
import
unittest
import
numpy
as
np
from
inference_pass_test
import
InferencePassTest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.core
import
PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
import
subprocess
class
TensorRTInspectorTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
1
,
16
,
16
],
dtype
=
"float32"
)
matmul_out
=
fluid
.
layers
.
matmul
(
x
=
data
,
y
=
data
,
transpose_x
=
self
.
transpose_x
,
transpose_y
=
self
.
transpose_y
,
alpha
=
self
.
alpha
)
out
=
fluid
.
layers
.
batch_norm
(
matmul_out
,
is_test
=
True
)
self
.
feeds
=
{
"data"
:
np
.
ones
([
1
,
16
,
16
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
InferencePassTest
.
TensorRTParam
(
1
<<
30
,
1
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
,
True
)
self
.
fetch_list
=
[
out
]
def
set_params
(
self
):
self
.
transpose_x
=
True
self
.
transpose_y
=
True
self
.
alpha
=
2.0
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
build_engine
=
subprocess
.
run
(
[
sys
.
executable
,
'test_trt_inspector.py'
,
'--build-engine'
],
stderr
=
subprocess
.
PIPE
)
engine_info
=
build_engine
.
stderr
.
decode
(
'ascii'
)
trt_compile_version
=
paddle
.
inference
.
get_trt_compile_version
()
trt_runtime_version
=
paddle
.
inference
.
get_trt_runtime_version
()
valid_version
=
(
8
,
2
,
0
)
if
trt_compile_version
>=
valid_version
and
trt_runtime_version
>=
valid_version
:
self
.
assertTrue
(
'====== engine info ======'
in
engine_info
)
self
.
assertTrue
(
'====== engine info end ======'
in
engine_info
)
self
.
assertTrue
(
'matmul'
in
engine_info
)
self
.
assertTrue
(
'LayerType: Scale'
in
engine_info
)
self
.
assertTrue
(
'batch_norm'
in
engine_info
)
else
:
self
.
assertTrue
(
'Inspector needs TensorRT version 8.2 and after.'
in
engine_info
)
if
__name__
==
"__main__"
:
if
'--build-engine'
in
sys
.
argv
:
test
=
TensorRTInspectorTest
()
test
.
setUp
()
use_gpu
=
True
test
.
check_output_with_option
(
use_gpu
)
else
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录