Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ae576f3c
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ae576f3c
编写于
6月 06, 2019
作者:
Z
Zhaolong Xing
提交者:
GitHub
6月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: when use the load model from memory mode, the RAM occupy is high (#17788)
test=develop
上级
5efe8c72
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
149 addition
and
45 deletion
+149
-45
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+10
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+19
-14
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+12
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+8
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+10
-2
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+1
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+39
-0
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
...e/fluid/inference/tests/api/analyzer_save_model_tester.cc
+11
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-3
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+18
-18
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+2
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ae576f3c
...
@@ -63,6 +63,16 @@ struct Argument {
...
@@ -63,6 +63,16 @@ struct Argument {
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
void
PartiallyRelease
()
{
if
(
Has
(
"model_program_path"
))
{
if
(
Has
(
"model_from_memory"
)
&&
model_from_memory
())
{
model_program_path
().
clear
();
model_program_path
().
shrink_to_fit
();
model_params_path
().
clear
();
model_params_path
().
shrink_to_fit
();
}
}
}
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
public: \
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
ae576f3c
...
@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument,
bool
enable_int8
=
argument
->
tensorrt_precision_mode
()
==
bool
enable_int8
=
argument
->
tensorrt_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
bool
use_calib_mode
=
argument
->
tensorrt_use_calib_mode
();
bool
use_calib_mode
=
argument
->
tensorrt_use_calib_mode
();
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_calib_mode"
,
new
bool
(
use_calib_mode
));
pass
->
Set
(
"use_calib_mode"
,
new
bool
(
use_calib_mode
));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
ae576f3c
...
@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
// TODO(NHZlX)
// There are models with the same structure but the different parameters,
// when runing in the 'use_serialize' mode, there is a bug.
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
));
std
::
to_string
(
0
));
auto
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
// Get "" when there is no cached calibration table data.
// Get "" when there is no cached calibration table data.
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
...
@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"use_calib_mode"
,
use_calib_mode
);
SetAttr
(
op_desc
->
Proto
(),
"use_calib_mode"
,
use_calib_mode
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"predictor_id"
,
predictor_id
);
std
::
string
trt_engine_serialized_data
=
""
;
std
::
string
trt_engine_serialized_data
=
""
;
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
trt_engine_serialized_data
);
...
@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
*
repetitive_params
));
std
::
back_inserter
(
*
repetitive_params
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
tensorrt
::
TensorRTEngine
*
trt_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key
+
std
::
to_string
(
predictor_id
),
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
if
(
need_serialize
)
{
if
(
need_serialize
)
{
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
// we can load the engine info serialized before from the disk.
// we can load the engine info serialized before from the disk.
if
(
!
trt_engine_serialized_data
.
empty
())
{
if
(
!
trt_engine_serialized_data
.
empty
())
{
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine
->
Deserialize
(
trt_engine_serialized_data
);
trt_engine_serialized_data
);
LOG
(
INFO
)
<<
"Load TRT Optimized Info from "
LOG
(
INFO
)
<<
"Load TRT Optimized Info from "
<<
GetTrtEngineSerializedPath
(
<<
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
...
@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// 2. already load serialized trt engine info.
// 2. already load serialized trt engine info.
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
"kernel etc). This process may cost a lot of time."
;
std
::
unique_ptr
<
tensorrt
::
TensorRTEngine
>
trt_engine
(
new
tensorrt
::
TensorRTEngine
(
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
)));
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
.
ConvertBlockToTRTEngine
(
.
ConvertBlockToTRTEngine
(
&
block_desc_temp
,
*
scope
,
&
block_desc_temp
,
*
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
trt_engine
.
get
());
param_set
,
output_mapping
,
trt_engine
);
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
if
(
need_serialize
)
{
if
(
need_serialize
)
{
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
SaveTrtEngineSerializedDataToFile
(
SaveTrtEngineSerializedDataToFile
(
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
),
engine_key
),
trt_engine_serialized_data
);
trt_engine_serialized_data
);
}
}
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
ae576f3c
...
@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
...
@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// Copy the parameter data to a tmp tensor.
// Copy the parameter data to a tmp tensor.
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
// Reallocation the space on GPU
// Reallocation the space on GPU
t
->
mutable_data
<
float
>
(
place
);
t
->
clear
(
);
// Copy parameter data to newly allocated GPU space.
// Copy parameter data to newly allocated GPU space.
TensorCopySync
(
temp_tensor
,
place
,
t
);
TensorCopySync
(
temp_tensor
,
place
,
t
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ae576f3c
...
@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// Model related.
// Model related.
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
prog_file_
);
CP_MEMBER
(
params_file_
);
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
// params_file_ fields.
// params_file_ fields.
prog_file_
=
std
::
move
(
other
.
prog_file_
);
params_file_
=
std
::
move
(
other
.
params_file_
);
// Gpu related.
// Gpu related.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
device_id_
);
CP_MEMBER
(
device_id_
);
...
@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine(
...
@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_auto_config_layout_
=
auto_config_layout
;
anakin_auto_config_layout_
=
auto_config_layout
;
Update
();
Update
();
}
}
void
AnalysisConfig
::
PartiallyRelease
()
{
prog_file_
.
clear
();
prog_file_
.
shrink_to_fit
();
params_file_
.
clear
();
params_file_
.
shrink_to_fit
();
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ae576f3c
...
@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
ARGUMENT_CHECK_FIELD
((
&
argument_
),
ir_analyzed_program
);
ARGUMENT_CHECK_FIELD
((
&
argument_
),
ir_analyzed_program
);
inference_program_
.
reset
(
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
argument_
.
ir_analyzed_program
()));
new
framework
::
ProgramDesc
(
argument_
.
ir_analyzed_program
()));
// The config and argument take a lot of storage,
// when the predictor settings are complete, we release these stores.
argument_
.
PartiallyRelease
();
config_
.
PartiallyRelease
();
LOG
(
INFO
)
<<
"== optimize end =="
;
LOG
(
INFO
)
<<
"== optimize end =="
;
}
}
...
@@ -451,6 +455,8 @@ template <>
...
@@ -451,6 +455,8 @@ template <>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
VLOG
(
3
)
<<
"create AnalysisConfig"
;
PADDLE_ENFORCE
(
config
.
is_valid
(),
"Note: Each config can only be used for one predictor."
);
if
(
config
.
use_gpu
())
{
if
(
config
.
use_gpu
())
{
// 1. GPU memory
// 1. GPU memory
PADDLE_ENFORCE_GE
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
PADDLE_ENFORCE_GE
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
...
@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
// Each config can only be used for one predictor.
config
.
SetInValid
();
auto
predictor_p
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
predictor_p
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
if
(
!
predictor_p
->
Init
(
nullptr
))
{
if
(
!
predictor_p
->
Init
(
nullptr
))
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ae576f3c
...
@@ -232,6 +232,8 @@ struct AnalysisConfig {
...
@@ -232,6 +232,8 @@ struct AnalysisConfig {
bool
force_update_static_cache
=
false
);
bool
force_update_static_cache
=
false
);
/** Tell whether the memory optimization is activated. */
/** Tell whether the memory optimization is activated. */
bool
enable_memory_optim
()
const
;
bool
enable_memory_optim
()
const
;
void
SetInValid
()
const
{
is_valid_
=
false
;
}
bool
is_valid
()
const
{
return
is_valid_
;
}
friend
class
::
paddle
::
AnalysisPredictor
;
friend
class
::
paddle
::
AnalysisPredictor
;
...
@@ -239,6 +241,7 @@ struct AnalysisConfig {
...
@@ -239,6 +241,7 @@ struct AnalysisConfig {
* Get a pass builder for customize the passes in IR analysis phase.
* Get a pass builder for customize the passes in IR analysis phase.
*/
*/
PassStrategy
*
pass_builder
()
const
;
PassStrategy
*
pass_builder
()
const
;
void
PartiallyRelease
();
protected:
protected:
// Update the config.
// Update the config.
...
@@ -249,8 +252,8 @@ struct AnalysisConfig {
...
@@ -249,8 +252,8 @@ struct AnalysisConfig {
protected:
protected:
// Model pathes.
// Model pathes.
std
::
string
model_dir_
;
std
::
string
model_dir_
;
std
::
string
prog_file_
;
mutable
std
::
string
prog_file_
;
std
::
string
params_file_
;
mutable
std
::
string
params_file_
;
// GPU related.
// GPU related.
bool
use_gpu_
{
false
};
bool
use_gpu_
{
false
};
...
@@ -312,6 +315,11 @@ struct AnalysisConfig {
...
@@ -312,6 +315,11 @@ struct AnalysisConfig {
bool
use_mkldnn_quantizer_
{
false
};
bool
use_mkldnn_quantizer_
{
false
};
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
// If the config is already used on a predictor, it becomes invalid.
mutable
bool
is_valid_
{
true
};
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
ae576f3c
...
@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
...
@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_affine_channel_fuse_pass"
,
//
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
ae576f3c
...
@@ -170,6 +170,7 @@ class OpConverter {
...
@@ -170,6 +170,7 @@ class OpConverter {
engine
->
DeclareOutput
(
output
);
engine
->
DeclareOutput
(
output
);
}
}
engine
->
FreezeNetwork
();
engine
->
FreezeNetwork
();
engine
->
ClearWeights
();
}
}
void
RreplenishLayerAndOutput
(
void
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
ae576f3c
...
@@ -149,6 +149,12 @@ class TensorRTEngine {
...
@@ -149,6 +149,12 @@ class TensorRTEngine {
std
::
unordered_map
<
std
::
string
/*name*/
,
std
::
unique_ptr
<
framework
::
Tensor
>>
std
::
unordered_map
<
std
::
string
/*name*/
,
std
::
unique_ptr
<
framework
::
Tensor
>>
weight_map
;
weight_map
;
void
ClearWeights
()
{
for
(
auto
&
weight_pair
:
weight_map
)
{
weight_pair
.
second
.
reset
(
nullptr
);
}
}
private:
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
// ensure that the thread is associated with the correct device by calling
...
@@ -213,6 +219,39 @@ class TensorRTEngine {
...
@@ -213,6 +219,39 @@ class TensorRTEngine {
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
engine__->network()->add##layer__(ARGS);
engine__->network()->add##layer__(ARGS);
class
TRTEngineManager
{
public:
bool
Empty
()
const
{
return
engines_
.
size
()
==
0
;
}
bool
Has
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
TensorRTEngine
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
TensorRTEngine
*
Create
(
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
bool
enable_int8
=
false
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
enable_int8
,
calibrator
,
device_id
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
void
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TensorRTEngine
>>
engines_
;
};
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
ae576f3c
...
@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
...
@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
std
::
unordered_set
<
std
::
string
>
teller_set
{
std
::
unordered_set
<
std
::
string
>
teller_set
{
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"
split"
,
"
prelu"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"prelu"
,
"conv2d_transpose"
,
"leaky_relu"
,
"fc"
}};
"conv2d_transpose"
,
"leaky_relu"
,
"fc"
}};
};
};
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
ae576f3c
...
@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) {
...
@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"cos_sim_2.tmp_0"
);
outputs_name
.
emplace_back
(
"cos_sim_2.tmp_0"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
input_slots_all
,
outputs_name
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
ae576f3c
...
@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) {
...
@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"final_output.tmp_1"
);
outputs_name
.
emplace_back
(
"final_output.tmp_1"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
input_slots_all
,
outputs_name
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
浏览文件 @
ae576f3c
...
@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) {
...
@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) {
mkdir
(
optimModelPath
.
c_str
(),
0777
);
mkdir
(
optimModelPath
.
c_str
(),
0777
);
SaveOptimModel
(
&
cfg
,
optimModelPath
);
SaveOptimModel
(
&
cfg
,
optimModelPath
);
cfg
.
pass_builder
()
->
ClearPasses
();
// Each config can only be applied to one predictor.
int
origin_num_ops
=
GetNumOps
(
cfg
);
AnalysisConfig
cfg2
;
cfg
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
SetConfig
(
&
cfg2
);
int
fused_num_ops
=
GetNumOps
(
cfg
);
cfg2
.
pass_builder
()
->
ClearPasses
();
cfg2
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
origin_num_ops
=
GetNumOps
(
cfg2
);
AnalysisConfig
cfg3
;
SetConfig
(
&
cfg3
);
cfg3
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
fused_num_ops
=
GetNumOps
(
cfg3
);
CHECK_LE
(
fused_num_ops
,
origin_num_ops
);
CHECK_LE
(
fused_num_ops
,
origin_num_ops
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
ae576f3c
...
@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) {
...
@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
out_var_name
);
outputs_name
.
emplace_back
(
out_var_name
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
input_slots_all
,
outputs_name
);
}
}
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ae576f3c
...
@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis(
...
@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis(
}
}
void
CompareAnalysisAndZeroCopy
(
void
CompareAnalysisAndZeroCopy
(
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config1
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
outputs_name
)
{
const
std
::
vector
<
std
::
string
>
&
outputs_name
)
{
int
batch_size
=
FLAGS_batch_size
;
int
batch_size
=
FLAGS_batch_size
;
...
@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy(
...
@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy(
predictor
->
Run
(
inputs
[
0
],
&
analysis_outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
0
],
&
analysis_outputs
,
batch_size
);
// analysis + zero_copy
// analysis + zero_copy
std
::
vector
<
ZeroCopyTensor
>
zerocopy_outputs
;
std
::
vector
<
ZeroCopyTensor
>
zerocopy_outputs
;
reinterpret_cast
<
AnalysisConfig
*>
(
config
)
->
SwitchUseFeedFetchOps
(
false
);
reinterpret_cast
<
AnalysisConfig
*>
(
config
1
)
->
SwitchUseFeedFetchOps
(
false
);
predictor
=
CreateTestPredictor
(
config
,
true
);
predictor
=
CreateTestPredictor
(
config
1
,
true
);
ConvertPaddleTensorToZeroCopyTensor
(
predictor
.
get
(),
inputs
[
0
]);
ConvertPaddleTensorToZeroCopyTensor
(
predictor
.
get
(),
inputs
[
0
]);
predictor
->
ZeroCopyRun
();
predictor
->
ZeroCopyRun
();
for
(
size_t
i
=
0
;
i
<
outputs_name
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outputs_name
.
size
();
i
++
)
{
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
ae576f3c
...
@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private:
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
std
::
unique_ptr
<
TensorRTEngine
>
trt_engine_
;
mutable
TensorRTEngine
*
trt_engine_
{
nullptr
}
;
int
max_batch_size_
;
int
max_batch_size_
;
int
workspace_size_
;
int
workspace_size_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
...
@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
use_calib_mode_
;
bool
use_calib_mode_
;
std
::
string
calibration_data_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
calibration_mode_
;
bool
calibration_mode_
;
int
predictor_id_
;
int
device_id_
;
int
device_id_
;
public:
public:
...
@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
use_calib_mode_
=
Attr
<
bool
>
(
"use_calib_mode"
);
use_calib_mode_
=
Attr
<
bool
>
(
"use_calib_mode"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_serialized_data_
=
Attr
<
std
::
string
>
(
"engine_serialized_data
"
);
predictor_id_
=
Attr
<
int
>
(
"predictor_id
"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
for
(
const
auto
&
param
:
params
)
{
...
@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
}
}
bool
has_engine
=
if
(
!
calibration_mode_
&&
!
engine_serialized_data_
.
empty
())
{
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
.
Has
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
));
if
(
!
calibration_mode_
&&
has_engine
)
{
PADDLE_ENFORCE
(
engine_serialized_data_
.
size
(),
trt_engine_
=
"TRT serialized data should not be empty here,"
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
"there must be error when generate serialized data in TRT "
.
Get
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
"subgraph detect pass."
);
trt_engine_
->
Deserialize
(
engine_serialized_data_
);
}
}
}
}
...
@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
const
platform
::
Place
&
dev_place
)
const
{
if
(
!
trt_engine_
)
{
if
(
!
trt_engine_
)
{
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
trt_engine_
=
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
device_id_
));
.
Create
(
engine_key_
+
std
::
to_string
(
predictor_id_
),
PrepareTRTEngine
(
scope
,
trt_engine_
.
get
());
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
);
PrepareTRTEngine
(
scope
,
trt_engine_
);
}
}
return
trt_engine_
.
get
()
;
return
trt_engine_
;
}
}
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
ae576f3c
...
@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) {
...
@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc
.
SetAttr
(
"workspace_size"
,
static_cast
<
int
>
(
1
<<
20
));
engine_op_desc
.
SetAttr
(
"workspace_size"
,
static_cast
<
int
>
(
1
<<
20
));
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"a_engine"
));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"a_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
...
@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc
.
SetAttr
(
"parameters"
,
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({
"y0"
,
"y1"
,
"y2"
,
"y3"
}));
std
::
vector
<
std
::
string
>
({
"y0"
,
"y1"
,
"y2"
,
"y3"
}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"b_engine"
));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"b_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录