Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
69793a27
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
69793a27
编写于
2月 11, 2022
作者:
L
Leo Chen
提交者:
GitHub
2月 11, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add TensorRT inspector into Paddle-TRT (#38362)
上级
575fa0fe
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
143 addition
and
17 deletion
+143
-17
AUTHORS.md
AUTHORS.md
+1
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+2
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+4
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+19
-3
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+5
-11
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+5
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+4
-0
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
.../paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
...fluid/tests/unittests/ir/inference/inference_pass_test.py
+14
-2
python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
.../fluid/tests/unittests/ir/inference/test_trt_inspector.py
+82
-0
未找到文件。
AUTHORS.md
浏览文件 @
69793a27
...
...
@@ -83,3 +83,4 @@
| jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
| mingxu1067 | Ming Huang (NVIDIA) |
| zlsh80826 | Reese Wang (NVIDIA) |
| leo0519 | Leo Chen (NVIDIA) |
paddle/fluid/inference/analysis/argument.h
浏览文件 @
69793a27
...
...
@@ -219,6 +219,7 @@ struct Argument {
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_allow_build_at_runtime
,
TensorRtAllowBuildAtRuntime
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_inspector
,
TensorRtUseInspector
,
bool
);
DECL_ARGUMENT_FIELD
(
use_dlnne
,
UseDlnne
,
bool
);
DECL_ARGUMENT_FIELD
(
dlnne_min_subgraph_size
,
DlnneMinSubgraphSize
,
int
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
69793a27
...
...
@@ -156,6 +156,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"use_static_engine"
,
new
bool
(
use_static_engine
));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"use_inspector"
,
new
bool
(
argument
->
tensorrt_use_inspector
()));
// tuned trt dynamic_shape
pass
->
Set
(
"trt_shape_range_info_path"
,
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
69793a27
...
...
@@ -265,6 +265,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetAttr
(
"parameters"
,
params
);
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
...
...
@@ -375,6 +376,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetUseDLA
(
Get
<
bool
>
(
"trt_use_dla"
));
trt_engine
->
SetDLACore
(
Get
<
int
>
(
"trt_dla_core"
));
trt_engine
->
SetUseInspector
(
Get
<
bool
>
(
"use_inspector"
));
trt_engine
->
SetWithErnie
(
graph
->
Has
(
framework
::
ir
::
kEmbEltwiseLayernormPass
)
&&
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
69793a27
...
...
@@ -194,6 +194,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
trt_allow_build_at_runtime_
);
CP_MEMBER
(
collect_shape_range_info_
);
CP_MEMBER
(
shape_range_info_path_
);
CP_MEMBER
(
trt_use_inspector_
);
// Dlnne related
CP_MEMBER
(
use_dlnne_
);
CP_MEMBER
(
dlnne_min_subgraph_size_
);
...
...
@@ -427,6 +428,8 @@ void AnalysisConfig::EnableTensorRtDLA(int dla_core) {
trt_dla_core_
=
dla_core
;
}
void
AnalysisConfig
::
EnableTensorRtInspector
()
{
trt_use_inspector_
=
true
;
}
void
AnalysisConfig
::
Exp_DisableTensorRtOPs
(
const
std
::
vector
<
std
::
string
>
&
ops
)
{
trt_disabled_ops_
.
insert
(
trt_disabled_ops_
.
end
(),
ops
.
begin
(),
ops
.
end
());
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
69793a27
...
...
@@ -615,6 +615,7 @@ void AnalysisPredictor::PrepareArgument() {
config_
.
tuned_tensorrt_dynamic_shape
());
argument_
.
SetTensorRtAllowBuildAtRuntime
(
config_
.
trt_allow_build_at_runtime
());
argument_
.
SetTensorRtUseInspector
(
config_
.
trt_use_inspector_
);
}
if
(
config_
.
dlnne_enabled
())
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
69793a27
...
...
@@ -521,6 +521,9 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
tensorrt_dla_enabled
()
{
return
trt_use_dla_
;
}
void
EnableTensorRtInspector
();
bool
tensorrt_inspector_enabled
()
{
return
trt_use_inspector_
;
}
void
EnableDlnne
(
int
min_subgraph_size
=
3
);
bool
dlnne_enabled
()
const
{
return
use_dlnne_
;
}
...
...
@@ -807,6 +810,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool
trt_allow_build_at_runtime_
{
false
};
// tune to get dynamic_shape info.
bool
trt_tuned_dynamic_shape_
{
false
};
bool
trt_use_inspector_
{
false
};
// In CollectShapeInfo mode, we will collect the shape information of
// all intermediate tensors in the compute graph and calculate the
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
69793a27
...
...
@@ -57,7 +57,6 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
}
else
{
#if IS_TRT_VERSION_GE(6000)
infer_context
->
enqueueV2
(
buffers
->
data
(),
stream
,
nullptr
);
GetEngineInfo
();
#endif
}
SetRuntimeBatch
(
batch_size
);
...
...
@@ -244,8 +243,10 @@ void TensorRTEngine::FreezeNetwork() {
#endif
}
#if IS_TRT_VERSION_GE(8200)
if
(
use_inspector_
)
{
infer_builder_config_
->
setProfilingVerbosity
(
nvinfer1
::
ProfilingVerbosity
::
kDETAILED
);
}
#endif
#if IS_TRT_VERSION_LT(8000)
...
...
@@ -411,6 +412,21 @@ void TensorRTEngine::freshDeviceId() {
platform
::
SetDeviceId
(
device_id_
);
}
void
TensorRTEngine
::
GetEngineInfo
()
{
#if IS_TRT_VERSION_GE(8200)
LOG
(
INFO
)
<<
"====== engine info ======"
;
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
infer_engine_
->
createEngineInspector
());
auto
infer_context
=
context
();
infer_inspector
->
setExecutionContext
(
infer_context
);
LOG
(
INFO
)
<<
infer_inspector
->
getEngineInformation
(
nvinfer1
::
LayerInformationFormat
::
kONELINE
);
LOG
(
INFO
)
<<
"====== engine info end ======"
;
#else
LOG
(
INFO
)
<<
"Inspector needs TensorRT version 8.2 and after."
;
#endif
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
69793a27
...
...
@@ -580,17 +580,10 @@ class TensorRTEngine {
}
void
SetProfileNum
(
int
num
)
{
max_profile_num_
=
num
;
}
void
GetEngineInfo
()
{
#if IS_TRT_VERSION_GE(8200)
std
::
unique_ptr
<
nvinfer1
::
IEngineInspector
>
infer_inspector
(
infer_engine_
->
createEngineInspector
());
infer_inspector
->
setExecutionContext
(
context
());
VLOG
(
3
)
<<
infer_inspector
->
getEngineInformation
(
nvinfer1
::
LayerInformationFormat
::
kJSON
);
#else
VLOG
(
3
)
<<
"Inspector needs TensorRT version 8.2 and after."
;
#endif
}
void
GetEngineInfo
();
void
SetUseInspector
(
bool
use_inspector
)
{
use_inspector_
=
use_inspector
;
}
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
...
...
@@ -664,6 +657,7 @@ class TensorRTEngine {
std
::
vector
<
std
::
unique_ptr
<
plugin
::
DynamicPluginTensorRT
>>
owned_pluginv2_
;
#endif
std
::
mutex
mutex_
;
bool
use_inspector_
;
};
// class TensorRTEngine
// Add a layer__ into engine__ with args ARGS.
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
69793a27
...
...
@@ -140,6 +140,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
enable_int8_
;
bool
enable_fp16_
;
bool
use_calib_mode_
;
bool
use_inspector_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
std
::
string
calibration_engine_key_
;
...
...
@@ -175,6 +176,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
shape_range_info_path_
=
Attr
<
std
::
string
>
(
"shape_range_info_path"
);
allow_build_at_runtime_
=
Attr
<
bool
>
(
"allow_build_at_runtime"
);
use_static_engine_
=
Attr
<
bool
>
(
"use_static_engine"
);
use_inspector_
=
HasAttr
(
"use_inspector"
)
&&
Attr
<
bool
>
(
"use_inspector"
);
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
...
...
@@ -285,6 +287,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
return
;
}
auto
*
trt_engine
=
GetEngine
(
scope
,
dev_place
);
if
(
use_inspector_
)
{
trt_engine
->
GetEngineInfo
();
}
if
(
trt_engine
->
with_dynamic_shape
())
{
// get runtime input shapes.
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
runtime_input_shape
;
...
...
@@ -331,7 +336,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
anc
=
&
scope
;
}
PrepareTRTEngine
(
*
anc
,
trt_engine
);
// update shape_range_info_pbtxt
if
(
!
shape_range_info_path_
.
empty
())
{
inference
::
UpdateShapeRangeInfo
(
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
69793a27
...
...
@@ -615,6 +615,10 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_tensorrt_dla"
,
&
AnalysisConfig
::
EnableTensorRtDLA
,
py
::
arg
(
"dla_core"
)
=
0
)
.
def
(
"tensorrt_dla_enabled"
,
&
AnalysisConfig
::
tensorrt_dla_enabled
)
.
def
(
"enable_tensorrt_inspector"
,
&
AnalysisConfig
::
EnableTensorRtInspector
)
.
def
(
"tensorrt_inspector_enabled"
,
&
AnalysisConfig
::
tensorrt_inspector_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"enable_dlnne"
,
&
AnalysisConfig
::
EnableDlnne
,
py
::
arg
(
"min_subgraph_size"
)
=
3
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
浏览文件 @
69793a27
...
...
@@ -75,6 +75,7 @@ set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_trt_conv_pass PROPERTIES TIMEOUT 120
)
#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200)
set_tests_properties
(
test_trt_dynamic_shape PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_trt_inspector PROPERTIES TIMEOUT 60
)
if
(
WITH_NV_JETSON
)
set_tests_properties
(
test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450
)
set_tests_properties
(
test_trt_pool3d_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
浏览文件 @
69793a27
...
...
@@ -122,6 +122,11 @@ class InferencePassTest(unittest.TestCase):
self
.
trt_parameters
.
precision
,
self
.
trt_parameters
.
use_static
,
self
.
trt_parameters
.
use_calib_mode
)
if
self
.
trt_parameters
.
use_inspector
:
config
.
enable_tensorrt_inspector
()
self
.
assertTrue
(
config
.
tensorrt_inspector_enabled
(),
"The inspector option is not set correctly."
)
if
self
.
dynamic_shape_params
:
config
.
set_trt_dynamic_shape_info
(
...
...
@@ -244,14 +249,21 @@ class InferencePassTest(unittest.TestCase):
Prepare TensorRT subgraph engine parameters.
'''
def
__init__
(
self
,
workspace_size
,
max_batch_size
,
min_subgraph_size
,
precision
,
use_static
,
use_calib_mode
):
def
__init__
(
self
,
workspace_size
,
max_batch_size
,
min_subgraph_size
,
precision
,
use_static
,
use_calib_mode
,
use_inspector
=
False
):
self
.
workspace_size
=
workspace_size
self
.
max_batch_size
=
max_batch_size
self
.
min_subgraph_size
=
min_subgraph_size
self
.
precision
=
precision
self
.
use_static
=
use_static
self
.
use_calib_mode
=
use_calib_mode
self
.
use_inspector
=
use_inspector
class
DynamicShapeParam
:
'''
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
0 → 100644
浏览文件 @
69793a27
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
os
import
threading
import
time
import
unittest
import
numpy
as
np
from
inference_pass_test
import
InferencePassTest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.core
import
PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
import
subprocess
class
TensorRTInspectorTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
1
,
16
,
16
],
dtype
=
"float32"
)
matmul_out
=
fluid
.
layers
.
matmul
(
x
=
data
,
y
=
data
,
transpose_x
=
self
.
transpose_x
,
transpose_y
=
self
.
transpose_y
,
alpha
=
self
.
alpha
)
out
=
fluid
.
layers
.
batch_norm
(
matmul_out
,
is_test
=
True
)
self
.
feeds
=
{
"data"
:
np
.
ones
([
1
,
16
,
16
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
InferencePassTest
.
TensorRTParam
(
1
<<
30
,
1
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
,
True
)
self
.
fetch_list
=
[
out
]
def
set_params
(
self
):
self
.
transpose_x
=
True
self
.
transpose_y
=
True
self
.
alpha
=
2.0
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
build_engine
=
subprocess
.
run
(
[
sys
.
executable
,
'test_trt_inspector.py'
,
'--build-engine'
],
stderr
=
subprocess
.
PIPE
)
engine_info
=
build_engine
.
stderr
.
decode
(
'ascii'
)
trt_compile_version
=
paddle
.
inference
.
get_trt_compile_version
()
trt_runtime_version
=
paddle
.
inference
.
get_trt_runtime_version
()
valid_version
=
(
8
,
2
,
0
)
if
trt_compile_version
>=
valid_version
and
trt_runtime_version
>=
valid_version
:
self
.
assertTrue
(
'====== engine info ======'
in
engine_info
)
self
.
assertTrue
(
'====== engine info end ======'
in
engine_info
)
self
.
assertTrue
(
'matmul'
in
engine_info
)
self
.
assertTrue
(
'LayerType: Scale'
in
engine_info
)
self
.
assertTrue
(
'batch_norm'
in
engine_info
)
else
:
self
.
assertTrue
(
'Inspector needs TensorRT version 8.2 and after.'
in
engine_info
)
if
__name__
==
"__main__"
:
if
'--build-engine'
in
sys
.
argv
:
test
=
TensorRTInspectorTest
()
test
.
setUp
()
use_gpu
=
True
test
.
check_output_with_option
(
use_gpu
)
else
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录