Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7c96efed
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7c96efed
编写于
9月 14, 2021
作者:
W
Wilber
提交者:
GitHub
9月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference] Add tuned trt_dynamic_shape mode. (#34806)
上级
f5e430c5
变更
26
隐藏空白更改
内联
并排
Showing
26 changed file
with
929 addition
and
67 deletion
+929
-67
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+6
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+13
-5
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+29
-11
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+8
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+47
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+90
-0
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+10
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+52
-4
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+56
-1
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+4
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+61
-0
paddle/fluid/inference/tensorrt/helper.h
paddle/fluid/inference/tensorrt/helper.h
+10
-0
paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc
paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc
+66
-0
paddle/fluid/inference/tests/api/trt_mobilenet_test.cc
paddle/fluid/inference/tests/api/trt_mobilenet_test.cc
+18
-0
paddle/fluid/inference/utils/CMakeLists.txt
paddle/fluid/inference/utils/CMakeLists.txt
+3
-1
paddle/fluid/inference/utils/io_utils.cc
paddle/fluid/inference/utils/io_utils.cc
+105
-0
paddle/fluid/inference/utils/io_utils.h
paddle/fluid/inference/utils/io_utils.h
+24
-0
paddle/fluid/inference/utils/io_utils_tester.cc
paddle/fluid/inference/utils/io_utils_tester.cc
+26
-0
paddle/fluid/inference/utils/shape_range_info.proto
paddle/fluid/inference/utils/shape_range_info.proto
+29
-0
paddle/fluid/operators/tensorrt/CMakeLists.txt
paddle/fluid/operators/tensorrt/CMakeLists.txt
+1
-1
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+145
-35
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+24
-3
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+12
-1
python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py
...ts/unittests/ir/inference/test_trt_tuned_dynamic_shape.py
+88
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7c96efed
...
...
@@ -212,6 +212,12 @@ struct Argument {
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_calib_mode
,
TensorRtUseCalibMode
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_oss
,
TensorRtUseOSS
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_shape_range_info_path
,
TensorRtShapeRangeInfoPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_tuned_dynamic_shape
,
TensorRtTunedDynamicShape
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_allow_build_at_runtime
,
TensorRtAllowBuildAtRuntime
,
bool
);
DECL_ARGUMENT_FIELD
(
use_dlnne
,
UseDlnne
,
bool
);
DECL_ARGUMENT_FIELD
(
dlnne_min_subgraph_size
,
DlnneMinSubgraphSize
,
int
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7c96efed
...
...
@@ -146,6 +146,14 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"use_static_engine"
,
new
bool
(
use_static_engine
));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
// tuned trt dynamic_shape
pass
->
Set
(
"trt_shape_range_info_path"
,
new
std
::
string
(
argument
->
tensorrt_shape_range_info_path
()));
pass
->
Set
(
"trt_tuned_dynamic_shape"
,
new
bool
(
argument
->
tensorrt_tuned_dynamic_shape
()));
pass
->
Set
(
"trt_allow_build_at_runtime"
,
new
bool
(
argument
->
tensorrt_allow_build_at_runtime
()));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
max_input_shape
()));
pass
->
Set
(
"min_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
...
...
@@ -153,17 +161,17 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"optim_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
optim_input_shape
()));
bool
with_dynamic_shape
=
argument
->
max_input_shape
().
size
()
>
0
&&
argument
->
min_input_shape
().
size
()
>
0
&&
argument
->
optim_input_shape
().
size
()
>
0
;
bool
with_dynamic_shape
=
(
argument
->
max_input_shape
().
size
()
>
0
&&
argument
->
min_input_shape
().
size
()
>
0
&&
argument
->
optim_input_shape
().
size
()
>
0
)
||
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"trt_disabled_ops"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
tensorrt_disabled_ops
()));
pass
->
Set
(
"trt_use_dla"
,
new
bool
(
argument
->
tensorrt_use_dla
()));
pass
->
Set
(
"trt_dla_core"
,
new
int
(
argument
->
tensorrt_dla_core
()));
// Setting the disable_trt_plugin_fp16 to true means that TRT plugin will
// not
// run fp16.
// not run fp16.
pass
->
Set
(
"disable_trt_plugin_fp16"
,
new
bool
(
argument
->
disable_trt_plugin_fp16
()));
}
else
if
(
pass_name
==
"dlnne_subgraph_pass"
)
{
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
浏览文件 @
7c96efed
cc_library
(
subgraph_util SRCS subgraph_util.cc DEPS subgraph_detector
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_util tensorrt_op_teller
)
cc_library
(
tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_util tensorrt_op_teller
infer_io_utils
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_util tensorrt_subgraph_pass
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
7c96efed
...
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/op_teller.h"
#include "paddle/fluid/inference/utils/io_utils.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -197,6 +198,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
auto
opt_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"optim_input_shape"
);
auto
allow_build_at_runtime
=
Get
<
bool
>
(
"trt_allow_build_at_runtime"
);
auto
shape_range_info_path
=
Get
<
std
::
string
>
(
"trt_shape_range_info_path"
);
auto
trt_tuned_dynamic_shape
=
Get
<
bool
>
(
"trt_tuned_dynamic_shape"
);
int
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
if
(
trt_tuned_dynamic_shape
)
{
VLOG
(
1
)
<<
"trt dynamic_shape deserialize from "
<<
shape_range_info_path
;
inference
::
DeserializeShapeRangeInfo
(
shape_range_info_path
,
&
min_input_shape
,
&
max_input_shape
,
&
opt_input_shape
);
}
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
// Why we do this?
...
...
@@ -242,12 +254,14 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetBlockAttr
(
"sub_block"
,
new_block
);
op_desc
->
SetAttr
(
"subgraph"
,
block_desc
.
Proto
()
->
SerializeAsString
());
op_desc
->
SetAttr
(
"max_batch_size"
,
Get
<
int
>
(
"max_batch_size"
)
);
op_desc
->
SetAttr
(
"max_batch_size"
,
max_batch_size
);
op_desc
->
SetAttr
(
"workspace_size"
,
Get
<
int
>
(
"workspace_size"
));
op_desc
->
SetAttr
(
"gpu_id"
,
Get
<
int
>
(
"gpu_device_id"
));
op_desc
->
SetAttr
(
"output_name_mapping"
,
output_mapping
);
op_desc
->
SetAttr
(
"origin_output_dims"
,
renamed_output_dims
);
op_desc
->
SetAttr
(
"parameters"
,
params
);
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
...
...
@@ -259,6 +273,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
op_desc
->
SetAttr
(
"use_static_engine"
,
use_static_engine
);
if
(
use_static_engine
)
op_desc
->
SetAttr
(
"model_opt_cache_dir"
,
Get
<
std
::
string
>
(
"model_opt_cache_dir"
));
// TODO(NHZlX)
// There are models with the same structure but the different parameters,
// when running in the 'use_serialize' mode, there is a bug.
...
...
@@ -266,12 +285,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// So we use seperate engine keys in serialization and calibration.
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
),
std
::
to_string
(
Get
<
int
>
(
"max_batch_size"
)
),
std
::
to_string
(
max_batch_size
),
std
::
to_string
(
static_cast
<
int
>
(
precision_mode
)),
false
);
auto
calibration_engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
)
,
std
::
to_string
(
Get
<
int
>
(
"max_batch_size"
)
),
std
::
to_string
(
static_cast
<
int
>
(
precision_mode
)),
true
);
auto
calibration_engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
),
std
::
to_string
(
max_batch_size
),
std
::
to_string
(
static_cast
<
int
>
(
precision_mode
)),
true
);
auto
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
// Get "" when there is no cached calibration table data.
...
...
@@ -345,11 +364,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
bool
disable_trt_plugin_fp16
=
Get
<
bool
>
(
"disable_trt_plugin_fp16"
);
tensorrt
::
TensorRTEngine
*
trt_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key
+
std
::
to_string
(
predictor_id
),
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
precision_mode
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
),
min_input_shape
,
max_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
.
Create
(
engine_key
+
std
::
to_string
(
predictor_id
),
max_batch_size
,
Get
<
int
>
(
"workspace_size"
),
precision_mode
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
),
min_input_shape
,
max_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_oss"
));
trt_engine
->
SetUseDLA
(
Get
<
bool
>
(
"trt_use_dla"
));
trt_engine
->
SetDLACore
(
Get
<
int
>
(
"trt_dla_core"
));
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
7c96efed
...
...
@@ -55,10 +55,17 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// We get all the vars from local_scope instead of the ProgramDesc.
// Because there exists the case that new parameter variables are not added to
// the program in the analysis pass.
bool
reserve_cpu_weights
=
false
;
if
(
argument
->
tensorrt_allow_build_at_runtime_valid
()
&&
argument
->
tensorrt_allow_build_at_runtime
())
{
reserve_cpu_weights
=
true
;
}
for
(
auto
&
var_name
:
all_vars
)
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
scope
->
EraseVars
({
var_name
});
if
(
!
reserve_cpu_weights
)
{
scope
->
EraseVars
({
var_name
});
}
continue
;
}
auto
*
var
=
scope
->
FindLocalVar
(
var_name
);
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
7c96efed
...
...
@@ -49,7 +49,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
endif
()
cc_library
(
analysis_predictor SRCS analysis_predictor.cc
${
mkldnn_quantizer_src
}
DEPS
${
inference_deps
}
zero_copy_tensor ir_pass_manager op_compatible_info
)
zero_copy_tensor ir_pass_manager op_compatible_info
infer_io_utils
)
cc_test
(
test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api
)
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7c96efed
...
...
@@ -158,6 +158,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
trt_use_static_engine_
);
CP_MEMBER
(
trt_use_calib_mode_
);
CP_MEMBER
(
trt_use_oss_
);
CP_MEMBER
(
trt_tuned_dynamic_shape_
);
CP_MEMBER
(
trt_allow_build_at_runtime_
);
CP_MEMBER
(
collect_shape_range_info_
);
CP_MEMBER
(
shape_range_info_path_
);
// Dlnne related
CP_MEMBER
(
use_dlnne_
);
CP_MEMBER
(
dlnne_min_subgraph_size_
);
...
...
@@ -653,8 +657,8 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
#endif
}
void
AnalysisConfig
::
EnableMemoryOptim
()
{
enable_memory_optim_
=
true
;
void
AnalysisConfig
::
EnableMemoryOptim
(
bool
x
)
{
enable_memory_optim_
=
x
;
Update
();
}
...
...
@@ -783,6 +787,9 @@ std::string AnalysisConfig::Summary() {
// dynamic_shape
os
.
InsertRow
({
"tensorrt_enable_dynamic_shape"
,
min_input_shape_
.
empty
()
?
"false"
:
"true"
});
os
.
InsertRow
({
"tensorrt_tuned_dynamic_shape"
,
trt_tuned_dynamic_shape_
?
shape_range_info_path_
:
"false"
});
os
.
InsertRow
({
"tensorrt_use_oss"
,
trt_use_oss_
?
"true"
:
"false"
});
os
.
InsertRow
({
"tensorrt_use_dla"
,
trt_use_dla_
?
"true"
:
"false"
});
...
...
@@ -812,8 +819,46 @@ std::string AnalysisConfig::Summary() {
os
.
InsertRow
({
"memory_optim"
,
enable_memory_optim_
?
"true"
:
"false"
});
os
.
InsertRow
({
"enable_profile"
,
with_profile_
?
"true"
:
"false"
});
os
.
InsertRow
({
"enable_log"
,
with_glog_info_
?
"true"
:
"false"
});
os
.
InsertRow
({
"collect_shape_range_info"
,
collect_shape_range_info_
?
shape_range_info_path_
:
"false"
});
return
os
.
PrintTable
();
}
void
AnalysisConfig
::
CollectShapeRangeInfo
(
const
std
::
string
&
shape_range_info_path
)
{
LOG
(
INFO
)
<<
"In CollectShapeInfo mode, we will disable optimizations and "
"collect the shape information of "
<<
"all intermediate tensors in the compute graph and calculate "
"the min_shape, max_shape and opt_shape."
;
collect_shape_range_info_
=
true
;
PADDLE_ENFORCE_EQ
(
shape_range_info_path
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"The shape_range_info_path should not be empty, please "
"re-check the argument."
));
shape_range_info_path_
=
shape_range_info_path
;
}
const
std
::
string
&
AnalysisConfig
::
shape_range_info_path
()
{
return
shape_range_info_path_
;
}
bool
AnalysisConfig
::
shape_range_info_collected
()
{
return
collect_shape_range_info_
;
}
void
AnalysisConfig
::
EnableTunedTensorRtDynamicShape
(
const
std
::
string
&
shape_range_info_path
,
bool
allow_build_at_runtime
)
{
shape_range_info_path_
=
shape_range_info_path
;
trt_allow_build_at_runtime_
=
allow_build_at_runtime
;
trt_tuned_dynamic_shape_
=
true
;
}
bool
AnalysisConfig
::
tuned_tensorrt_dynamic_shape
()
{
return
trt_tuned_dynamic_shape_
;
}
bool
AnalysisConfig
::
trt_allow_build_at_runtime
()
{
return
trt_allow_build_at_runtime_
;
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7c96efed
...
...
@@ -13,7 +13,9 @@
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h>
#include <algorithm>
#include <fstream>
#include <memory>
...
...
@@ -21,6 +23,7 @@
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/extension/include/ext_op_meta_info.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
...
...
@@ -34,6 +37,7 @@
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
...
...
@@ -570,6 +574,11 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetMaxInputShape
(
config_
.
max_input_shape_
);
argument_
.
SetOptimInputShape
(
config_
.
optim_input_shape_
);
argument_
.
SetCloseTrtPluginFp16
(
config_
.
disable_trt_plugin_fp16_
);
argument_
.
SetTensorRtShapeRangeInfoPath
(
config_
.
shape_range_info_path
());
argument_
.
SetTensorRtTunedDynamicShape
(
config_
.
tuned_tensorrt_dynamic_shape
());
argument_
.
SetTensorRtAllowBuildAtRuntime
(
config_
.
trt_allow_build_at_runtime
());
}
if
(
config_
.
dlnne_enabled
())
{
...
...
@@ -915,6 +924,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
#endif
executor_
->
Run
();
if
(
config_
.
shape_range_info_collected
())
{
CollectShapeRangeInfo
();
}
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
tensor_array_batch_cleaner_
.
ResetTensorArray
();
...
...
@@ -934,6 +948,78 @@ bool AnalysisPredictor::ZeroCopyRun() {
return
true
;
}
void
AnalysisPredictor
::
CollectShapeRangeInfo
()
{
// if use gpu, sync first.
if
(
config_
.
use_gpu
())
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
gpu_place
=
BOOST_GET_CONST
(
paddle
::
platform
::
CUDAPlace
,
place_
);
auto
*
dev_ctx
=
static_cast
<
const
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
#ifdef PADDLE_WITH_HIP
hipStreamSynchronize
(
dev_ctx
->
stream
());
#else
cudaStreamSynchronize
(
dev_ctx
->
stream
());
#endif
#endif
}
std
::
vector
<
std
::
string
>
var_names
=
sub_scope_
->
LocalVarNames
();
for
(
const
auto
&
name
:
var_names
)
{
auto
*
var
=
sub_scope_
->
GetVar
(
name
);
if
(
!
var
->
IsType
<
framework
::
LoDTensor
>
())
{
continue
;
}
framework
::
DDim
dim
=
var
->
Get
<
framework
::
LoDTensor
>
().
dims
();
std
::
vector
<
int32_t
>
shape
(
dim
.
size
());
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
shape
[
i
]
=
dim
[
i
];
shape_info_
[
name
].
emplace_back
(
shape
);
}
}
void
AnalysisPredictor
::
StatisticShapeRangeInfo
()
{
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
max_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
opt_shapes
;
for
(
auto
it
:
shape_info_
)
{
auto
name
=
it
.
first
;
auto
shapes
=
it
.
second
;
std
::
vector
<
int32_t
>
min_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
max_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
opt_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
auto
ShapeMaxFreq
=
[](
const
std
::
map
<
int32_t
,
int32_t
>
&
m
)
->
int32_t
{
std
::
vector
<
std
::
pair
<
int32_t
,
int32_t
>>
counter
;
for
(
auto
&
it
:
m
)
counter
.
push_back
(
it
);
std
::
sort
(
counter
.
begin
(),
counter
.
end
(),
[](
std
::
pair
<
int32_t
,
int32_t
>
&
a
,
std
::
pair
<
int32_t
,
int32_t
>
&
b
)
{
return
a
.
second
>
b
.
second
;
});
return
counter
[
0
].
first
;
};
for
(
size_t
d
=
0
;
d
<
shapes
[
0
].
size
();
++
d
)
{
std
::
map
<
int32_t
,
int32_t
>
counter
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
++
i
)
{
counter
[
shapes
[
i
][
d
]]
+=
1
;
if
(
shapes
[
i
][
d
]
<
min_shape
[
d
])
min_shape
[
d
]
=
shapes
[
i
][
d
];
if
(
shapes
[
i
][
d
]
>
max_shape
[
d
])
max_shape
[
d
]
=
shapes
[
i
][
d
];
}
opt_shape
[
d
]
=
ShapeMaxFreq
(
counter
);
}
min_shapes
[
name
]
=
min_shape
;
max_shapes
[
name
]
=
max_shape
;
opt_shapes
[
name
]
=
opt_shape
;
}
inference
::
SerializeShapeRangeInfo
(
config_
.
shape_range_info_path
(),
min_shapes
,
max_shapes
,
opt_shapes
);
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
std
::
string
filename
;
...
...
@@ -1140,6 +1226,10 @@ AnalysisPredictor::~AnalysisPredictor() {
}
#endif
if
(
config_
.
shape_range_info_collected
())
{
StatisticShapeRangeInfo
();
}
memory
::
Release
(
place_
);
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
7c96efed
...
...
@@ -91,6 +91,10 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] AnalysisConfig config
///
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{
if
(
config_
.
shape_range_info_collected
())
{
config_
.
SwitchIrOptim
(
false
);
config_
.
EnableMemoryOptim
(
false
);
}
predictor_id_
=
inference
::
GetUniqueId
();
}
///
...
...
@@ -377,6 +381,10 @@ class AnalysisPredictor : public PaddlePredictor {
FRIEND_TEST
(
AnalysisPredictor
,
with_gpu
);
#endif
private:
void
StatisticShapeRangeInfo
();
void
CollectShapeRangeInfo
();
private:
AnalysisConfig
config_
;
Argument
argument_
;
...
...
@@ -419,6 +427,8 @@ class AnalysisPredictor : public PaddlePredictor {
private:
// Some status here that help to determine the status inside the predictor.
bool
status_is_cloned_
{
false
};
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_info_
;
};
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
7c96efed
...
...
@@ -19,8 +19,10 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/platform/cpu_info.h"
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
...
...
@@ -32,6 +34,8 @@ TEST(AnalysisPredictor, analysis_off) {
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
(
false
);
LOG
(
INFO
)
<<
config
.
Summary
();
LOG
(
INFO
)
<<
"Shape Info collected: "
<<
config
.
shape_range_info_collected
()
<<
", path: "
<<
config
.
shape_range_info_path
();
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
...
...
@@ -86,10 +90,6 @@ TEST(AnalysisPredictor, analysis_on) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
inputs
,
&
outputs
));
for
(
auto
&
output
:
outputs
)
{
LOG
(
INFO
)
<<
inference
::
DescribeTensor
(
output
);
}
// compare with NativePredictor
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
());
...
...
@@ -139,6 +139,54 @@ TEST(AnalysisPredictor, ZeroCopy) {
predictor
->
TryShrinkMemory
();
}
TEST
(
AnalysisPredictor
,
CollectShapeRangeInfo
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchUseFeedFetchOps
(
false
);
config
.
EnableUseGpu
(
100
,
0
);
config
.
CollectShapeRangeInfo
(
FLAGS_dirname
+
"/shape_range.pbtxt"
);
LOG
(
INFO
)
<<
config
.
Summary
();
AnalysisConfig
config2
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config2
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
auto
w1
=
predictor
->
GetInputTensor
(
"secondw"
);
auto
w2
=
predictor
->
GetInputTensor
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputTensor
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
ZeroCopyRun
();
auto
out
=
predictor
->
GetOutputTensor
(
"fc_1.tmp_2"
);
PaddlePlace
place
;
int
size
=
0
;
out
->
data
<
float
>
(
&
place
,
&
size
);
LOG
(
INFO
)
<<
"output size: "
<<
size
/
sizeof
(
float
);
// TODO(wilber): check for windows
// std::map<std::string, std::vector<int32_t>> min_shape;
// std::map<std::string, std::vector<int32_t>> max_shape;
// std::map<std::string, std::vector<int32_t>> opt_shape;
// inference::DeserializeShapeRangeInfo(FLAGS_dirname + "/shape_range.pbtxt",
// &min_shape, &max_shape, &opt_shape);
// ASSERT_EQ(min_shape.size(), 14u);
}
TEST
(
AnalysisPredictor
,
Clone
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
7c96efed
...
...
@@ -380,6 +380,50 @@ struct PD_INFER_DECL AnalysisConfig {
bool
tensorrt_dynamic_shape_enabled
()
const
{
return
!
min_input_shape_
.
empty
();
}
///
/// \brief Enable tuned tensorrt dynamic shape.
///
/// \param shape_range_info_path the path to shape_info file got in
/// CollectShapeInfo
/// mode.
/// \param allow_build_at_runtime allow build trt engine at runtime.
///
void
EnableTunedTensorRtDynamicShape
(
const
std
::
string
&
shape_range_info_path
,
bool
allow_build_at_runtime
=
true
);
///
/// \brief A boolean state telling whether to use tuned tensorrt dynamic
/// shape.
///
bool
tuned_tensorrt_dynamic_shape
();
///
/// \brief A boolean state telling whether to allow building trt engine at
/// runtime.
///
bool
trt_allow_build_at_runtime
();
///
/// \brief Collect shape info of all tensors in compute graph.
///
/// \param shape_range_info_path the path to save shape info.
///
void
CollectShapeRangeInfo
(
const
std
::
string
&
shape_range_info_path
);
///
/// \brief the shape info path in CollectShapeInfo mode.
///
/// \return the shape info path.
///
const
std
::
string
&
shape_range_info_path
();
///
/// \brief A boolean state telling whether to collect shape info.
///
/// \return bool Whether to collect shape info.
///
bool
shape_range_info_collected
();
///
/// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken.
...
...
@@ -573,7 +617,9 @@ struct PD_INFER_DECL AnalysisConfig {
/// \brief Turn on memory optimize
/// NOTE still in development.
///
void
EnableMemoryOptim
();
/// \param x Whether to enable memory optimize.
///
void
EnableMemoryOptim
(
bool
x
=
true
);
///
/// \brief A boolean state telling whether the memory optimization is
/// activated.
...
...
@@ -693,6 +739,15 @@ struct PD_INFER_DECL AnalysisConfig {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape_
{};
std
::
vector
<
std
::
string
>
trt_disabled_ops_
{};
bool
disable_trt_plugin_fp16_
{
false
};
bool
trt_allow_build_at_runtime_
{
false
};
// tune to get dynamic_shape info.
bool
trt_tuned_dynamic_shape_
{
false
};
// In CollectShapeInfo mode, we will collect the shape information of
// all intermediate tensors in the compute graph and calculate the
// min_shape, max_shape and opt_shape and save in shape_range_info_path_;
bool
collect_shape_range_info_
{
false
};
std
::
string
shape_range_info_path_
;
// dlnne related.
bool
use_dlnne_
{
false
};
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
7c96efed
...
...
@@ -196,6 +196,10 @@ void TensorRTEngine::FreezeNetwork() {
#if IS_TRT_VERSION_GE(6000)
LOG
(
INFO
)
<<
"Run Paddle-TRT Dynamic Shape mode."
;
for
(
auto
&
input
:
min_input_shape_
)
{
VLOG
(
4
)
<<
"TRT dynamic_shape set "
<<
input
.
first
<<
" min: "
<<
Vec2Str
(
input
.
second
)
<<
", max: "
<<
Vec2Str
(
max_input_shape_
[
input
.
first
])
<<
", opt: "
<<
Vec2Str
(
optim_input_shape_
[
input
.
first
]);
optim_profile_
->
setDimensions
(
input
.
first
.
c_str
(),
nvinfer1
::
OptProfileSelector
::
kMIN
,
Vec2TRT_Dims
(
input
.
second
,
input
.
first
,
true
));
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
7c96efed
...
...
@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/utils/any.h"
namespace
paddle
{
...
...
@@ -226,6 +227,7 @@ class TensorRTEngine {
const
std
::
string
&
name
);
// Set the itensor_map_[name] as the network's output, and set its name.
void
DeclareOutput
(
const
std
::
string
&
name
);
void
ClearTensorMap
()
{
itensor_map_
.
clear
();
}
void
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
);
// Get an ITensor called name.
...
...
@@ -244,6 +246,16 @@ class TensorRTEngine {
}
return
infer_context_
[
tid
].
get
();
}
void
ResetContext
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
platform
::
errors
::
InvalidArgument
(
"You should build engine first and then set the context."
));
infer_context_
[
tid
].
reset
(
nullptr
);
infer_context_
.
erase
(
tid
);
}
nvinfer1
::
IHostMemory
*
Serialize
()
{
PADDLE_ENFORCE_NOT_NULL
(
...
...
@@ -364,6 +376,55 @@ class TensorRTEngine {
ShapeMapType
min_input_shape
()
{
return
min_input_shape_
;
}
ShapeMapType
max_input_shape
()
{
return
max_input_shape_
;
}
ShapeMapType
optim_input_shape
()
{
return
optim_input_shape_
;
}
bool
AdjustDynamicShapeRange
(
const
ShapeMapType
&
runtime_input_shape
,
std
::
vector
<
std
::
string
>*
changed
)
{
bool
ret
=
false
;
changed
->
clear
();
for
(
const
auto
&
it
:
runtime_input_shape
)
{
auto
name
=
it
.
first
;
auto
input_shape
=
it
.
second
;
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
count
(
name
),
true
,
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s not found."
,
name
));
PADDLE_ENFORCE_EQ
(
min_input_shape_
[
name
].
size
(),
input_shape
.
size
(),
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d."
,
name
,
name
,
min_input_shape_
[
name
].
size
(),
name
,
input_shape
.
size
()));
auto
bak_min_shape
=
min_input_shape_
[
name
];
auto
bak_max_shape
=
max_input_shape_
[
name
];
bool
min_change
=
false
;
bool
max_change
=
false
;
for
(
size_t
d
=
0
;
d
<
input_shape
.
size
();
++
d
)
{
if
(
input_shape
[
d
]
<
min_input_shape_
[
name
][
d
])
{
ret
=
true
;
min_change
=
true
;
min_input_shape_
[
name
][
d
]
=
input_shape
[
d
];
}
if
(
input_shape
[
d
]
>
max_input_shape_
[
name
][
d
])
{
ret
=
true
;
max_change
=
true
;
max_input_shape_
[
name
][
d
]
=
input_shape
[
d
];
}
}
if
(
min_change
)
LOG
(
INFO
)
<<
"refactor shape range: "
<<
name
<<
", min_shape from "
<<
Vec2Str
(
bak_min_shape
)
<<
" to "
<<
Vec2Str
(
min_input_shape_
[
name
]);
if
(
max_change
)
LOG
(
INFO
)
<<
"refactor shape range: "
<<
name
<<
", max_shape from "
<<
Vec2Str
(
bak_max_shape
)
<<
" to "
<<
Vec2Str
(
max_input_shape_
[
name
]);
if
(
min_change
||
max_change
)
changed
->
push_back
(
name
);
}
return
ret
;
}
bool
use_oss
()
{
return
use_oss_
;
}
bool
with_ernie
()
{
return
with_ernie_
;
}
bool
disable_trt_plugin_fp16
()
{
return
disable_trt_plugin_fp16_
;
}
...
...
paddle/fluid/inference/tensorrt/helper.h
浏览文件 @
7c96efed
...
...
@@ -154,6 +154,16 @@ inline void PrintITensorShape(nvinfer1::ITensor* X) {
std
::
cout
<<
"]
\n
"
;
}
template
<
typename
T
>
inline
std
::
string
Vec2Str
(
const
std
::
vector
<
T
>&
vec
)
{
std
::
ostringstream
os
;
os
<<
"("
;
for
(
size_t
i
=
0
;
i
<
vec
.
size
()
-
1
;
++
i
)
{
os
<<
vec
[
i
]
<<
","
;
}
os
<<
vec
[
vec
.
size
()
-
1
]
<<
")"
;
return
os
.
str
();
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc
浏览文件 @
7c96efed
...
...
@@ -143,6 +143,70 @@ void TestDynamic2() {
}
}
void
TestTunedDynamic
()
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/complex_model_dynamic/complex_model_dynamic2"
;
AnalysisConfig
config_tuned
;
const
std
::
string
shape_range
=
"shape_range.pbtxt"
;
config_tuned
.
EnableUseGpu
(
100
,
0
);
config_tuned
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config_tuned
.
SwitchUseFeedFetchOps
(
false
);
config_tuned
.
CollectShapeRangeInfo
(
shape_range
);
int
batch_size
=
1
;
auto
predictor_tuned
=
CreatePaddlePredictor
(
config_tuned
);
auto
check_func
=
[
batch_size
](
PaddlePredictor
*
predictor
)
{
int
channels
=
3
;
int
height
=
5
;
int
width
=
5
;
int
input_num
=
channels
*
height
*
width
*
1
;
float
*
input
=
new
float
[
input_num
];
memset
(
input
,
0
,
input_num
*
sizeof
(
float
));
auto
input_names
=
predictor
->
GetInputNames
();
auto
input_t
=
predictor
->
GetInputTensor
(
input_names
[
0
]);
input_t
->
Reshape
({
batch_size
,
channels
,
height
,
width
});
input_t
->
copy_from_cpu
(
input
);
auto
input_t1
=
predictor
->
GetInputTensor
(
input_names
[
1
]);
input_t1
->
Reshape
({
batch_size
,
2
,
1
,
1
});
std
::
vector
<
float
>
first
;
for
(
int
i
=
0
;
i
<
batch_size
*
2
;
i
++
)
first
.
push_back
(
1.0
);
input_t1
->
copy_from_cpu
(
first
.
data
());
auto
input_t2
=
predictor
->
GetInputTensor
(
input_names
[
2
]);
input_t2
->
Reshape
({
batch_size
,
2
,
1
,
1
});
input_t2
->
copy_from_cpu
(
first
.
data
());
ASSERT_TRUE
(
predictor
->
ZeroCopyRun
());
std
::
vector
<
float
>
out_data
;
auto
output_names
=
predictor
->
GetOutputNames
();
auto
output_t
=
predictor
->
GetOutputTensor
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_t
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
out_data
.
resize
(
out_num
);
output_t
->
copy_to_cpu
(
out_data
.
data
());
};
check_func
(
predictor_tuned
.
get
());
// check tuned_dynamic_shape
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
std
::
string
cache_dir
=
"tuned_cache"
;
config
.
SetOptimCacheDir
(
cache_dir
);
delete_cache_files
(
cache_dir
);
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
SwitchUseFeedFetchOps
(
false
);
config
.
EnableTunedTensorRtDynamicShape
(
shape_range
,
true
);
config
.
EnableTensorRtEngine
(
1
<<
30
,
batch_size
,
0
,
AnalysisConfig
::
Precision
::
kFloat32
,
true
,
false
);
auto
test_predictor
=
CreatePaddlePredictor
(
config
);
check_func
(
test_predictor
.
get
());
}
TEST
(
AnalysisPredictor
,
trt_dynamic
)
{
TestDynamic
(
true
);
}
TEST
(
AnalysisPredictor
,
trt_static
)
{
TestDynamic
(
false
);
}
TEST
(
AnalysisPredictor
,
trt_memory_serialize
)
{
...
...
@@ -153,5 +217,7 @@ TEST(AnalysisPredictor, trt_memory_serialize) {
}
TEST
(
AnalysisPredictor
,
trt_dynamic2
)
{
TestDynamic2
();
}
TEST
(
AnalysisPredictor
,
trt_tuned_dynamic
)
{
TestTunedDynamic
();
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/trt_mobilenet_test.cc
浏览文件 @
7c96efed
...
...
@@ -47,6 +47,24 @@ TEST(AnalysisPredictor, use_gpu) {
}
}
TEST
(
AnalysisPredictor
,
collect_shape_range
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
SetModel
(
model_dir
);
config
.
CollectShapeRangeInfo
(
"shape_range.pbtxt"
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
auto
predictor
=
CreatePaddlePredictor
(
config
);
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
std
::
vector
<
PaddleTensor
>
outputs
;
for
(
auto
&
input
:
inputs_all
)
{
ASSERT_TRUE
(
predictor
->
Run
(
input
,
&
outputs
));
predictor
->
ClearIntermediateTensor
();
}
}
}
// namespace inference
}
// namespace paddle
...
...
paddle/fluid/inference/utils/CMakeLists.txt
浏览文件 @
7c96efed
cc_library
(
benchmark SRCS benchmark.cc DEPS enforce
)
cc_test
(
test_benchmark SRCS benchmark_tester.cc DEPS benchmark
)
cc_library
(
infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor
)
cc_library
(
infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor
shape_range_info_proto
)
cc_test
(
infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils
)
cc_library
(
table_printer SRCS table_printer.cc
)
cc_test
(
test_table_printer SRCS table_printer_tester.cc DEPS table_printer
)
proto_library
(
shape_range_info_proto SRCS shape_range_info.proto
)
paddle/fluid/inference/utils/io_utils.cc
浏览文件 @
7c96efed
...
...
@@ -13,7 +13,15 @@
// limitations under the License.
#include "paddle/fluid/inference/utils/io_utils.h"
#include <fcntl.h>
#include <utility>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/text_format.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/utils/shape_range_info.pb.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -157,5 +165,102 @@ void DeserializePDTensorsToFile(const std::string &path,
fin
.
close
();
}
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
paddle
::
inference
::
proto
::
ShapeRangeInfos
&
info
)
{
int
out_fd
=
open
(
path
.
c_str
(),
O_WRONLY
|
O_CREAT
|
O_TRUNC
,
0644
);
google
::
protobuf
::
io
::
FileOutputStream
*
os
=
new
google
::
protobuf
::
io
::
FileOutputStream
(
out_fd
);
google
::
protobuf
::
TextFormat
::
Print
(
info
,
os
);
delete
os
;
close
(
out_fd
);
}
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
for
(
auto
it
:
min_shape
)
{
auto
*
s
=
shape_range_infos
.
add_shape_range_info
();
s
->
set_name
(
it
.
first
);
for
(
size_t
i
=
0
;
i
<
it
.
second
.
size
();
++
i
)
{
s
->
add_min_shape
(
it
.
second
[
i
]);
s
->
add_max_shape
(
max_shape
.
at
(
it
.
first
)[
i
]);
s
->
add_opt_shape
(
opt_shape
.
at
(
it
.
first
)[
i
]);
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
)
{
int
fd
=
open
(
path
.
c_str
(),
O_RDONLY
);
google
::
protobuf
::
io
::
FileInputStream
*
is
=
new
google
::
protobuf
::
io
::
FileInputStream
(
fd
);
google
::
protobuf
::
TextFormat
::
Parse
(
is
,
info
);
delete
is
;
close
(
fd
);
}
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_shape
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
DeserializeShapeRangeInfo
(
path
,
&
shape_range_infos
);
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
auto
info
=
shape_range_infos
.
shape_range_info
(
i
);
auto
name
=
info
.
name
();
if
(
min_shape
->
count
(
name
)
||
max_shape
->
count
(
name
)
||
opt_shape
->
count
(
name
))
{
continue
;
}
else
{
std
::
vector
<
int32_t
>
tmp
(
info
.
min_shape_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
min_shape
(
k
);
min_shape
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
max_shape_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
max_shape
(
k
);
max_shape
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
opt_shape_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
opt_shape
(
k
);
opt_shape
->
insert
(
std
::
make_pair
(
name
,
tmp
));
}
}
}
void
UpdateShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
,
const
std
::
vector
<
std
::
string
>
&
names
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
DeserializeShapeRangeInfo
(
path
,
&
shape_range_infos
);
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
auto
*
info
=
shape_range_infos
.
mutable_shape_range_info
(
i
);
for
(
const
auto
&
name
:
names
)
{
if
(
info
->
name
()
==
name
)
{
info
->
clear_min_shape
();
info
->
clear_max_shape
();
info
->
clear_opt_shape
();
for
(
size_t
j
=
0
;
j
<
min_shape
.
at
(
name
).
size
();
++
j
)
info
->
add_min_shape
(
min_shape
.
at
(
name
)[
j
]);
for
(
size_t
j
=
0
;
j
<
max_shape
.
at
(
name
).
size
();
++
j
)
info
->
add_max_shape
(
max_shape
.
at
(
name
)[
j
]);
for
(
size_t
j
=
0
;
j
<
opt_shape
.
at
(
name
).
size
();
++
j
)
info
->
add_opt_shape
(
opt_shape
.
at
(
name
)[
j
]);
break
;
}
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/utils/io_utils.h
浏览文件 @
7c96efed
...
...
@@ -19,6 +19,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/utils/shape_range_info.pb.h"
namespace
paddle
{
struct
PaddleTensor
;
...
...
@@ -41,5 +42,28 @@ void SerializePDTensorsToFile(const std::string& path,
const
std
::
vector
<
PaddleTensor
>&
tensors
);
void
DeserializePDTensorsToFile
(
const
std
::
string
&
path
,
std
::
vector
<
PaddleTensor
>*
tensors
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
paddle
::
inference
::
proto
::
ShapeRangeInfos
&
info
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_shape
);
void
UpdateShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
,
const
std
::
vector
<
std
::
string
>&
names
);
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/utils/io_utils_tester.cc
浏览文件 @
7c96efed
...
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/inference/utils/io_utils.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <utility>
#include "paddle/fluid/inference/api/helper.h"
namespace
paddle
{
...
...
@@ -93,3 +94,28 @@ TEST(infer_io_utils, tensors) {
paddle
::
inference
::
pd_tensor_equal
(
tensors_in
[
i
],
tensors_out
[
i
]));
}
}
TEST
(
shape_info_io
,
read_and_write
)
{
const
std
::
string
path
=
"test_shape_info_io"
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shape
,
max_shape
,
opt_shape
;
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
112
,
112
}));
max_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
opt_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
SerializeShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
);
min_shape
.
clear
();
max_shape
.
clear
();
opt_shape
.
clear
();
opt_shape
.
insert
(
std
::
make_pair
(
"test2"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
DeserializeShapeRangeInfo
(
path
,
&
min_shape
,
&
max_shape
,
&
opt_shape
);
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
56
,
56
}));
std
::
vector
<
std
::
string
>
names
{
"test1"
};
paddle
::
inference
::
UpdateShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
,
names
);
}
paddle/fluid/inference/utils/shape_range_info.proto
0 → 100644
浏览文件 @
7c96efed
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
syntax
=
"proto2"
;
package
paddle
.
inference.proto
;
// To support trt dynamic shape, record the runtime shape
// information of all tmp tensors in the Compution graph.
message
ShapeRangeInfos
{
message
ShapeRangeInfo
{
required
string
name
=
1
;
repeated
int32
min_shape
=
2
;
repeated
int32
max_shape
=
3
;
repeated
int32
opt_shape
=
4
;
}
repeated
ShapeRangeInfo
shape_range_info
=
1
;
}
paddle/fluid/operators/tensorrt/CMakeLists.txt
浏览文件 @
7c96efed
op_library
(
tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter
)
op_library
(
tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter
infer_io_utils analysis_helper
)
file
(
APPEND
${
pybind_file
}
"USE_NO_KERNEL_OP(tensorrt_engine);
\n
"
)
nv_test
(
test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
7c96efed
...
...
@@ -14,6 +14,7 @@
#pragma once
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
#include <memory>
...
...
@@ -30,6 +31,7 @@
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/utils/io_utils.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -77,18 +79,19 @@ static void RuntimeStaticShapeCheck(std::vector<int64_t> runtime_input_shape,
}
static
void
RuntimeDynamicShapeCheck
(
const
std
::
string
&
x
,
const
std
::
vector
<
int64_t
>
&
runtime_input_shape
,
const
std
::
vector
<
int
>
&
min_input_shape
,
const
std
::
vector
<
int
>
&
max_input_shape
)
{
PADDLE_ENFORCE_EQ
(
runtime_input_shape
.
size
(),
min_input_shape
.
size
(),
platform
::
errors
::
InvalidArgument
(
"TRT engine runtime input dims size(%d) inconsistent "
"with the dynamic shape size(%d)"
,
runtime_input_shape
.
size
(),
min_input_shape
.
size
()));
const
std
::
string
&
x
,
const
std
::
vector
<
int32_t
>
&
runtime_input_shape
,
const
std
::
vector
<
int32_t
>
&
min_input_shape
,
const
std
::
vector
<
int32_t
>
&
max_input_shape
)
{
// PADDLE_ENFORCE_EQ(
// runtime_input_shape.size(), min_input_shape.size(),
// platform::errors::InvalidArgument(
// "TRT engine runtime input %s dims size(%d) inconsistent "
// "with the dynamic shape size(%d)",
// x, runtime_input_shape.size(), min_input_shape.size()));
auto
is_input_shape_valid
=
[
&
](
const
std
::
vector
<
int
64
_t
>
&
runtime_input_shape
,
const
std
::
vector
<
int
>
&
min_input_shape
,
const
std
::
vector
<
int
>
&
max_input_shape
)
->
bool
{
const
std
::
vector
<
int
32
_t
>
&
runtime_input_shape
,
const
std
::
vector
<
int
32_t
>
&
min_input_shape
,
const
std
::
vector
<
int
32_t
>
&
max_input_shape
)
->
bool
{
for
(
size_t
i
=
0
;
i
<
runtime_input_shape
.
size
();
i
++
)
{
if
(
runtime_input_shape
[
i
]
<=
max_input_shape
[
i
]
&&
runtime_input_shape
[
i
]
>=
min_input_shape
[
i
])
{
...
...
@@ -128,6 +131,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
std
::
vector
<
std
::
string
>
runtime_input_names_
;
mutable
TensorRTEngine
*
trt_engine_
{
nullptr
};
int
max_batch_size_
;
int
workspace_size_
;
...
...
@@ -141,7 +145,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
calibration_mode_
;
int
predictor_id_
;
int
device_id_
;
bool
allow_build_at_runtime_
{
false
};
std
::
string
shape_range_info_path_
;
std
::
string
model_opt_cache_dir_
;
bool
use_static_engine_
;
AnalysisConfig
::
Precision
precision_mode_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
opt_input_shape_
{};
public:
TensorRTEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -160,11 +171,57 @@ class TensorRTEngineOp : public framework::OperatorBase {
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
calibration_engine_key_
=
Attr
<
std
::
string
>
(
"calibration_engine_key"
);
predictor_id_
=
Attr
<
int
>
(
"predictor_id"
);
shape_range_info_path_
=
Attr
<
std
::
string
>
(
"shape_range_info_path"
);
allow_build_at_runtime_
=
Attr
<
bool
>
(
"allow_build_at_runtime"
);
use_static_engine_
=
Attr
<
bool
>
(
"use_static_engine"
);
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
if
(
HasAttr
(
"dynamic_shape_names"
)
&&
HasAttr
(
"min_input_shape"
)
&&
HasAttr
(
"max_input_shape"
)
&&
HasAttr
(
"opt_input_shape"
))
{
std
::
vector
<
std
::
string
>
dynamic_shape_names
;
std
::
vector
<
std
::
vector
<
int
>>
min_input_shapes
;
std
::
vector
<
std
::
vector
<
int
>>
max_input_shapes
;
std
::
vector
<
std
::
vector
<
int
>>
opt_input_shapes
;
std
::
vector
<
int
>
dynamic_shape_lens
;
dynamic_shape_names
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"dynamic_shape_names"
);
std
::
vector
<
int
>
min_shapes
=
Attr
<
std
::
vector
<
int
>>
(
"min_input_shape"
);
std
::
vector
<
int
>
max_shapes
=
Attr
<
std
::
vector
<
int
>>
(
"max_input_shape"
);
std
::
vector
<
int
>
opt_shapes
=
Attr
<
std
::
vector
<
int
>>
(
"opt_input_shape"
);
dynamic_shape_lens
=
Attr
<
std
::
vector
<
int
>>
(
"dynamic_shape_lens"
);
int
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
dynamic_shape_lens
.
size
();
++
i
)
{
std
::
vector
<
int
>
tmp1
,
tmp2
,
tmp3
;
for
(
int
j
=
0
;
j
<
dynamic_shape_lens
[
i
];
++
j
)
{
tmp1
.
push_back
(
min_shapes
[
idx
]);
tmp2
.
push_back
(
max_shapes
[
idx
]);
tmp3
.
push_back
(
opt_shapes
[
idx
++
]);
}
min_input_shapes
.
emplace_back
(
tmp1
);
max_input_shapes
.
emplace_back
(
tmp2
);
opt_input_shapes
.
emplace_back
(
tmp3
);
}
for
(
size_t
i
=
0
;
i
<
dynamic_shape_names
.
size
();
++
i
)
{
min_input_shape_
.
insert
(
std
::
make_pair
(
dynamic_shape_names
[
i
],
min_input_shapes
[
i
]));
max_input_shape_
.
insert
(
std
::
make_pair
(
dynamic_shape_names
[
i
],
max_input_shapes
[
i
]));
opt_input_shape_
.
insert
(
std
::
make_pair
(
dynamic_shape_names
[
i
],
opt_input_shapes
[
i
]));
}
}
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
for
(
auto
&
x
:
input_names_
)
{
if
(
param_names_
.
count
(
x
))
continue
;
runtime_input_names_
.
emplace_back
(
x
);
}
// calibration_mode is ture represents we need to
// generate the calibration table data.
calibration_mode_
=
...
...
@@ -210,6 +267,78 @@ class TensorRTEngineOp : public framework::OperatorBase {
return
;
}
auto
*
trt_engine
=
GetEngine
(
scope
,
dev_place
);
if
(
trt_engine
->
with_dynamic_shape
())
{
// get runtime input shapes.
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
runtime_input_shape
;
for
(
auto
name
:
runtime_input_names_
)
{
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
name
);
VLOG
(
4
)
<<
"trt engine runtime input name("
<<
name
<<
"), dims("
<<
t
.
dims
()
<<
")"
;
auto
t_shape
=
framework
::
vectorize
<
int32_t
>
(
t
.
dims
());
runtime_input_shape
.
insert
(
std
::
make_pair
(
name
,
t_shape
));
}
if
(
!
allow_build_at_runtime_
)
{
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
trt_engine
->
min_input_shape
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
trt_engine
->
max_input_shape
();
for
(
auto
&
x
:
runtime_input_names_
)
{
PADDLE_ENFORCE_EQ
(
min_input_shape
.
count
(
x
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input %s not found in TRT engine min_input_shape."
,
x
));
PADDLE_ENFORCE_EQ
(
max_input_shape
.
count
(
x
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input %s not found in TRT engine max_input_shape."
,
x
));
RuntimeDynamicShapeCheck
(
x
,
runtime_input_shape
[
x
],
min_input_shape
[
x
],
max_input_shape
[
x
]);
}
}
else
{
// compare runtime_input_shape and trt_engine dynamic shapes.
std
::
vector
<
std
::
string
>
shape_changed_name
;
bool
is_adjusted
=
trt_engine
->
AdjustDynamicShapeRange
(
runtime_input_shape
,
&
shape_changed_name
);
if
(
is_adjusted
)
{
LOG
(
INFO
)
<<
"Adjust dynamic shape range, rebuild trt engine!"
;
trt_engine
->
ResetContext
();
trt_engine
->
ClearTensorMap
();
auto
*
anc
=
scope
.
parent
();
while
(
anc
&&
anc
->
parent
())
{
anc
=
anc
->
parent
();
}
if
(
anc
==
nullptr
)
{
anc
=
&
scope
;
}
PrepareTRTEngine
(
*
anc
,
trt_engine
);
// update shape_range_info_pbtxt
if
(
!
shape_range_info_path_
.
empty
())
{
inference
::
UpdateShapeRangeInfo
(
shape_range_info_path_
,
trt_engine
->
min_input_shape
(),
trt_engine
->
max_input_shape
(),
trt_engine
->
optim_input_shape
(),
shape_changed_name
);
}
if
(
use_static_engine_
)
{
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
std
::
string
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
inference
::
analysis
::
SaveTrtEngineSerializedDataToFile
(
inference
::
analysis
::
GetTrtEngineSerializedPath
(
model_opt_cache_dir_
,
engine_key_
),
trt_engine_serialized_data
);
LOG
(
INFO
)
<<
"Save TRT Optimized Info to "
<<
inference
::
analysis
::
GetTrtEngineSerializedPath
(
model_opt_cache_dir_
,
engine_key_
);
}
}
}
}
RunTrt
(
scope
,
dev_place
,
trt_engine
);
}
...
...
@@ -273,7 +402,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
).
stream
();
PADDLE_ENFORCE_EQ
(
input_names_
.
empty
(),
false
,
runtime_
input_names_
.
empty
(),
false
,
platform
::
errors
::
PreconditionNotMet
(
"TensorRT engine needs at least one input, but no input is found. "
"Please check if you set the input correctly."
));
...
...
@@ -283,16 +412,12 @@ class TensorRTEngineOp : public framework::OperatorBase {
int
num_inputs
=
0
;
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
num_inputs
+=
1
;
}
num_inputs
+=
runtime_input_names_
.
size
();
const
int
num_bindings
=
num_inputs
+
Outputs
(
"Ys"
).
size
();
std
::
vector
<
void
*>
buffers
(
num_bindings
);
// Bind input tensor to TRT.
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
for
(
const
auto
&
x
:
runtime_input_names_
)
{
// convert input and copy to TRT engine's buffer
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
...
...
@@ -341,22 +466,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
}
else
{
#if IS_TRT_VERSION_GE(6000)
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
engine
->
min_input_shape
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
engine
->
max_input_shape
();
PADDLE_ENFORCE_EQ
(
min_input_shape
.
count
(
x
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input %s not found in TRT engine min_input_shape."
,
x
));
PADDLE_ENFORCE_EQ
(
max_input_shape
.
count
(
x
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input %s not found in TRT engine max_input_shape."
,
x
));
auto
x_min_input_shape
=
min_input_shape
[
x
];
auto
x_max_input_shape
=
max_input_shape
[
x
];
RuntimeDynamicShapeCheck
(
x
,
t_shape
,
x_min_input_shape
,
x_max_input_shape
);
auto
*
trt_context
=
engine
->
context
();
trt_context
->
setBindingDimensions
(
bind_index
,
inference
::
tensorrt
::
Vec2TRT_Dims
(
t_shape
,
x
,
true
));
...
...
@@ -460,7 +569,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key_
+
std
::
to_string
(
predictor_id_
),
max_batch_size_
,
workspace_size_
,
precision_mode_
,
calibrator_
.
get
(),
device_id_
);
calibrator_
.
get
(),
device_id_
,
min_input_shape_
,
max_input_shape_
,
opt_input_shape_
);
PrepareTRTEngine
(
scope
,
trt_engine_
);
}
return
trt_engine_
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
7c96efed
...
...
@@ -56,7 +56,7 @@ void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
using
inference
::
analysis
::
SetAttr
;
TEST
(
TensorRTEngineOp
,
manual
)
{
void
DynamicShapeTest
(
bool
allow_build_at_runtime
)
{
framework
::
ProgramDesc
program
;
auto
*
block_
=
program
.
Proto
()
->
add_blocks
();
block_
->
set_idx
(
0
);
...
...
@@ -116,6 +116,15 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc
.
SetAttr
(
"engine_serialized_data"
,
std
::
string
(
""
));
int
device_id
=
0
;
engine_op_desc
.
SetAttr
(
"gpu_id"
,
device_id
);
engine_op_desc
.
SetAttr
(
"shape_range_info_path"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"model_opt_cache_dir"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
engine_op_desc
.
SetAttr
(
"use_static_engine"
,
true
);
engine_op_desc
.
SetAttr
(
"dynamic_shape_names"
,
std
::
vector
<
std
::
string
>
{
"x"
});
engine_op_desc
.
SetAttr
(
"dynamic_shape_lens"
,
std
::
vector
<
int
>
{
4
});
engine_op_desc
.
SetAttr
(
"min_input_shape"
,
std
::
vector
<
int
>
{
1
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"max_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"opt_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
...
...
@@ -125,7 +134,10 @@ TEST(TensorRTEngineOp, manual) {
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
// Prepare variables.
CreateCUDATensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
if
(
allow_build_at_runtime
)
CreateCUDATensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
3
,
4
,
1
,
1
}));
else
CreateCUDATensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
,
1
,
1
}));
CreateCUDATensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
CreateCUDATensor
(
&
scope
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
...
...
@@ -137,6 +149,11 @@ TEST(TensorRTEngineOp, manual) {
engine_op
->
Run
(
scope
,
place
);
}
TEST
(
TensorRTEngineOp
,
manual
)
{
DynamicShapeTest
(
false
);
DynamicShapeTest
(
true
);
}
void
Execute
(
int
batch_size
,
int
input_dim
,
int
output_dim
,
int
nlayers
=
1
)
{
framework
::
ProgramDesc
program
;
framework
::
Scope
scope
;
...
...
@@ -220,6 +237,10 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc
.
SetAttr
(
"engine_serialized_data"
,
std
::
string
(
""
));
int
device_id
=
0
;
engine_op_desc
.
SetAttr
(
"gpu_id"
,
device_id
);
engine_op_desc
.
SetAttr
(
"shape_range_info_path"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"model_opt_cache_dir"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"allow_build_at_runtime"
,
false
);
engine_op_desc
.
SetAttr
(
"use_static_engine"
,
false
);
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
...
...
@@ -228,7 +249,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
}
// Test with a larger FC layer.
TEST
(
TensorRTEngineOp
,
fc
)
{
Execute
(
40
,
28
,
28
);
}
//
TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
}
// namespace operators
}
// namespace paddle
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
7c96efed
...
...
@@ -538,7 +538,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"switch_ir_optim"
,
&
AnalysisConfig
::
SwitchIrOptim
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"ir_optim"
,
&
AnalysisConfig
::
ir_optim
)
.
def
(
"enable_memory_optim"
,
&
AnalysisConfig
::
EnableMemoryOptim
)
.
def
(
"enable_memory_optim"
,
&
AnalysisConfig
::
EnableMemoryOptim
,
py
::
arg
(
"x"
)
=
true
)
.
def
(
"enable_profile"
,
&
AnalysisConfig
::
EnableProfile
)
.
def
(
"disable_glog_info"
,
&
AnalysisConfig
::
DisableGlogInfo
)
.
def
(
"glog_info_disabled"
,
&
AnalysisConfig
::
glog_info_disabled
)
...
...
@@ -569,6 +570,16 @@ void BindAnalysisConfig(py::module *m) {
&
AnalysisConfig
::
tensorrt_dynamic_shape_enabled
)
.
def
(
"enable_tensorrt_oss"
,
&
AnalysisConfig
::
EnableTensorRtOSS
)
.
def
(
"tensorrt_oss_enabled"
,
&
AnalysisConfig
::
tensorrt_oss_enabled
)
.
def
(
"collect_shape_range_info"
,
&
AnalysisConfig
::
CollectShapeRangeInfo
)
.
def
(
"shape_range_info_path"
,
&
AnalysisConfig
::
shape_range_info_path
)
.
def
(
"shape_range_info_collected"
,
&
AnalysisConfig
::
shape_range_info_collected
)
.
def
(
"enable_tuned_tensorrt_dynamic_shape"
,
&
AnalysisConfig
::
EnableTunedTensorRtDynamicShape
)
.
def
(
"tuned_tensorrt_dynamic_shape"
,
&
AnalysisConfig
::
tuned_tensorrt_dynamic_shape
)
.
def
(
"trt_allow_build_at_runtime"
,
&
AnalysisConfig
::
trt_allow_build_at_runtime
)
.
def
(
"exp_disable_tensorrt_ops"
,
&
AnalysisConfig
::
Exp_DisableTensorRtOPs
)
.
def
(
"enable_tensorrt_dla"
,
&
AnalysisConfig
::
EnableTensorRtDLA
,
py
::
arg
(
"dla_core"
)
=
0
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py
0 → 100644
浏览文件 @
7c96efed
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
paddle
.
enable_static
()
import
paddle.fluid
as
fluid
from
paddle.inference
import
Config
,
Predictor
,
create_predictor
class
TRTTunedDynamicShapeTest
(
unittest
.
TestCase
):
def
get_model
(
self
):
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
conv_out
=
fluid
.
layers
.
conv2d
(
input
=
data
,
num_filters
=
3
,
filter_size
=
3
,
groups
=
1
,
padding
=
0
,
bias_attr
=
False
,
act
=
None
)
exe
.
run
(
startup_program
)
serialized_program
=
paddle
.
static
.
serialize_program
(
data
,
conv_out
,
program
=
main_program
)
serialized_params
=
paddle
.
static
.
serialize_persistables
(
data
,
conv_out
,
executor
=
exe
,
program
=
main_program
)
return
serialized_program
,
serialized_params
def
get_config
(
self
,
model
,
params
,
tuned
=
False
):
config
=
Config
()
config
.
set_model_buffer
(
model
,
len
(
model
),
params
,
len
(
params
))
config
.
enable_use_gpu
(
100
,
0
)
config
.
set_optim_cache_dir
(
'tuned_test'
)
if
tuned
:
config
.
collect_shape_range_info
(
'shape_range.pbtxt'
)
else
:
config
.
enable_tensorrt_engine
(
workspace_size
=
1024
,
max_batch_size
=
1
,
min_subgraph_size
=
0
,
precision_mode
=
paddle
.
inference
.
PrecisionType
.
Float32
,
use_static
=
True
,
use_calib_mode
=
False
)
config
.
enable_tuned_tensorrt_dynamic_shape
(
'shape_range.pbtxt'
,
True
)
return
config
def
predictor_run
(
self
,
config
,
in_data
):
predictor
=
create_predictor
(
config
)
in_names
=
predictor
.
get_input_names
()
in_handle
=
predictor
.
get_input_handle
(
in_names
[
0
])
in_handle
.
copy_from_cpu
(
in_data
)
predictor
.
run
()
def
test_tuned_dynamic_shape_run
(
self
):
program
,
params
=
self
.
get_model
()
config
=
self
.
get_config
(
program
,
params
,
tuned
=
True
)
self
.
predictor_run
(
config
,
np
.
ones
((
1
,
6
,
64
,
64
)).
astype
(
np
.
float32
))
config2
=
self
.
get_config
(
program
,
params
,
tuned
=
False
)
self
.
predictor_run
(
config2
,
np
.
ones
((
1
,
6
,
32
,
32
)).
astype
(
np
.
float32
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录