Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ae576f3c
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ae576f3c
编写于
6月 06, 2019
作者:
Z
Zhaolong Xing
提交者:
GitHub
6月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: when use the load model from memory mode, the RAM occupy is high (#17788)
test=develop
上级
5efe8c72
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
149 addition
and
45 deletion
+149
-45
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+10
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+19
-14
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+12
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+8
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+10
-2
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+1
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+39
-0
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
...e/fluid/inference/tests/api/analyzer_save_model_tester.cc
+11
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-3
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+18
-18
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+2
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ae576f3c
...
...
@@ -63,6 +63,16 @@ struct Argument {
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
void
PartiallyRelease
()
{
if
(
Has
(
"model_program_path"
))
{
if
(
Has
(
"model_from_memory"
)
&&
model_from_memory
())
{
model_program_path
().
clear
();
model_program_path
().
shrink_to_fit
();
model_params_path
().
clear
();
model_params_path
().
shrink_to_fit
();
}
}
}
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
ae576f3c
...
...
@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument,
bool
enable_int8
=
argument
->
tensorrt_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
bool
use_calib_mode
=
argument
->
tensorrt_use_calib_mode
();
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_calib_mode"
,
new
bool
(
use_calib_mode
));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
ae576f3c
...
...
@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
// TODO(NHZlX)
// There are models with the same structure but the different parameters,
// when runing in the 'use_serialize' mode, there is a bug.
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
));
auto
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
// Get "" when there is no cached calibration table data.
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
...
...
@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"use_calib_mode"
,
use_calib_mode
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"predictor_id"
,
predictor_id
);
std
::
string
trt_engine_serialized_data
=
""
;
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
...
...
@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
*
repetitive_params
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
tensorrt
::
TensorRTEngine
*
trt_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key
+
std
::
to_string
(
predictor_id
),
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
if
(
need_serialize
)
{
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
// we can load the engine info serialized before from the disk.
if
(
!
trt_engine_serialized_data
.
empty
())
{
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
trt_engine
->
Deserialize
(
trt_engine_serialized_data
);
LOG
(
INFO
)
<<
"Load TRT Optimized Info from "
<<
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
...
...
@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// 2. already load serialized trt engine info.
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
std
::
unique_ptr
<
tensorrt
::
TensorRTEngine
>
trt_engine
(
new
tensorrt
::
TensorRTEngine
(
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
)));
auto
*
scope
=
param_scope
();
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
...
@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
.
ConvertBlockToTRTEngine
(
&
block_desc_temp
,
*
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
trt_engine
.
get
());
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
param_set
,
output_mapping
,
trt_engine
);
if
(
need_serialize
)
{
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
SaveTrtEngineSerializedDataToFile
(
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
),
trt_engine_serialized_data
);
}
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
ae576f3c
...
...
@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// Copy the parameter data to a tmp tensor.
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
// Reallocation the space on GPU
t
->
mutable_data
<
float
>
(
place
);
t
->
clear
(
);
// Copy parameter data to newly allocated GPU space.
TensorCopySync
(
temp_tensor
,
place
,
t
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ae576f3c
...
...
@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// Model related.
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
prog_file_
);
CP_MEMBER
(
params_file_
);
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
// params_file_ fields.
prog_file_
=
std
::
move
(
other
.
prog_file_
);
params_file_
=
std
::
move
(
other
.
params_file_
);
// Gpu related.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
device_id_
);
...
...
@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_auto_config_layout_
=
auto_config_layout
;
Update
();
}
void
AnalysisConfig
::
PartiallyRelease
()
{
prog_file_
.
clear
();
prog_file_
.
shrink_to_fit
();
params_file_
.
clear
();
params_file_
.
shrink_to_fit
();
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ae576f3c
...
...
@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
ARGUMENT_CHECK_FIELD
((
&
argument_
),
ir_analyzed_program
);
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
argument_
.
ir_analyzed_program
()));
// The config and argument take a lot of storage,
// when the predictor settings are complete, we release these stores.
argument_
.
PartiallyRelease
();
config_
.
PartiallyRelease
();
LOG
(
INFO
)
<<
"== optimize end =="
;
}
...
...
@@ -451,6 +455,8 @@ template <>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
PADDLE_ENFORCE
(
config
.
is_valid
(),
"Note: Each config can only be used for one predictor."
);
if
(
config
.
use_gpu
())
{
// 1. GPU memory
PADDLE_ENFORCE_GE
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
...
...
@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
// Each config can only be used for one predictor.
config
.
SetInValid
();
auto
predictor_p
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
if
(
!
predictor_p
->
Init
(
nullptr
))
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ae576f3c
...
...
@@ -232,6 +232,8 @@ struct AnalysisConfig {
bool
force_update_static_cache
=
false
);
/** Tell whether the memory optimization is activated. */
bool
enable_memory_optim
()
const
;
void
SetInValid
()
const
{
is_valid_
=
false
;
}
bool
is_valid
()
const
{
return
is_valid_
;
}
friend
class
::
paddle
::
AnalysisPredictor
;
...
...
@@ -239,6 +241,7 @@ struct AnalysisConfig {
* Get a pass builder for customize the passes in IR analysis phase.
*/
PassStrategy
*
pass_builder
()
const
;
void
PartiallyRelease
();
protected:
// Update the config.
...
...
@@ -249,8 +252,8 @@ struct AnalysisConfig {
protected:
// Model pathes.
std
::
string
model_dir_
;
std
::
string
prog_file_
;
std
::
string
params_file_
;
mutable
std
::
string
prog_file_
;
mutable
std
::
string
params_file_
;
// GPU related.
bool
use_gpu_
{
false
};
...
...
@@ -312,6 +315,11 @@ struct AnalysisConfig {
bool
use_mkldnn_quantizer_
{
false
};
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
// If the config is already used on a predictor, it becomes invalid.
mutable
bool
is_valid_
{
true
};
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
};
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
ae576f3c
...
...
@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass"
,
//
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
ae576f3c
...
...
@@ -170,6 +170,7 @@ class OpConverter {
engine
->
DeclareOutput
(
output
);
}
engine
->
FreezeNetwork
();
engine
->
ClearWeights
();
}
void
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
ae576f3c
...
...
@@ -149,6 +149,12 @@ class TensorRTEngine {
std
::
unordered_map
<
std
::
string
/*name*/
,
std
::
unique_ptr
<
framework
::
Tensor
>>
weight_map
;
void
ClearWeights
()
{
for
(
auto
&
weight_pair
:
weight_map
)
{
weight_pair
.
second
.
reset
(
nullptr
);
}
}
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
...
...
@@ -213,6 +219,39 @@ class TensorRTEngine {
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
engine__->network()->add##layer__(ARGS);
class
TRTEngineManager
{
public:
bool
Empty
()
const
{
return
engines_
.
size
()
==
0
;
}
bool
Has
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
TensorRTEngine
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
TensorRTEngine
*
Create
(
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
bool
enable_int8
=
false
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
enable_int8
,
calibrator
,
device_id
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
void
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TensorRTEngine
>>
engines_
;
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
ae576f3c
...
...
@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
std
::
unordered_set
<
std
::
string
>
teller_set
{
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"
split"
,
"
prelu"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"prelu"
,
"conv2d_transpose"
,
"leaky_relu"
,
"fc"
}};
};
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"cos_sim_2.tmp_0"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"final_output.tmp_1"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) {
mkdir
(
optimModelPath
.
c_str
(),
0777
);
SaveOptimModel
(
&
cfg
,
optimModelPath
);
cfg
.
pass_builder
()
->
ClearPasses
();
int
origin_num_ops
=
GetNumOps
(
cfg
);
cfg
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
fused_num_ops
=
GetNumOps
(
cfg
);
// Each config can only be applied to one predictor.
AnalysisConfig
cfg2
;
SetConfig
(
&
cfg2
);
cfg2
.
pass_builder
()
->
ClearPasses
();
cfg2
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
origin_num_ops
=
GetNumOps
(
cfg2
);
AnalysisConfig
cfg3
;
SetConfig
(
&
cfg3
);
cfg3
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
fused_num_ops
=
GetNumOps
(
cfg3
);
CHECK_LE
(
fused_num_ops
,
origin_num_ops
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
out_var_name
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ae576f3c
...
...
@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis(
}
void
CompareAnalysisAndZeroCopy
(
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config1
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
outputs_name
)
{
int
batch_size
=
FLAGS_batch_size
;
...
...
@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy(
predictor
->
Run
(
inputs
[
0
],
&
analysis_outputs
,
batch_size
);
// analysis + zero_copy
std
::
vector
<
ZeroCopyTensor
>
zerocopy_outputs
;
reinterpret_cast
<
AnalysisConfig
*>
(
config
)
->
SwitchUseFeedFetchOps
(
false
);
predictor
=
CreateTestPredictor
(
config
,
true
);
reinterpret_cast
<
AnalysisConfig
*>
(
config
1
)
->
SwitchUseFeedFetchOps
(
false
);
predictor
=
CreateTestPredictor
(
config
1
,
true
);
ConvertPaddleTensorToZeroCopyTensor
(
predictor
.
get
(),
inputs
[
0
]);
predictor
->
ZeroCopyRun
();
for
(
size_t
i
=
0
;
i
<
outputs_name
.
size
();
i
++
)
{
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
ae576f3c
...
...
@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
std
::
unique_ptr
<
TensorRTEngine
>
trt_engine_
;
mutable
TensorRTEngine
*
trt_engine_
{
nullptr
}
;
int
max_batch_size_
;
int
workspace_size_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
...
...
@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
use_calib_mode_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
calibration_mode_
;
int
predictor_id_
;
int
device_id_
;
public:
...
...
@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
use_calib_mode_
=
Attr
<
bool
>
(
"use_calib_mode"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_serialized_data_
=
Attr
<
std
::
string
>
(
"engine_serialized_data
"
);
predictor_id_
=
Attr
<
int
>
(
"predictor_id
"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
...
...
@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
}
if
(
!
calibration_mode_
&&
!
engine_serialized_data_
.
empty
())
{
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
));
PADDLE_ENFORCE
(
engine_serialized_data_
.
size
(),
"TRT serialized data should not be empty here,"
"there must be error when generate serialized data in TRT "
"subgraph detect pass."
);
trt_engine_
->
Deserialize
(
engine_serialized_data_
);
bool
has_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Has
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
if
(
!
calibration_mode_
&&
has_engine
)
{
trt_engine_
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Get
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
}
}
...
...
@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
!
trt_engine_
)
{
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
));
PrepareTRTEngine
(
scope
,
trt_engine_
.
get
());
trt_engine_
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key_
+
std
::
to_string
(
predictor_id_
),
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
);
PrepareTRTEngine
(
scope
,
trt_engine_
);
}
return
trt_engine_
.
get
()
;
return
trt_engine_
;
}
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
ae576f3c
...
...
@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc
.
SetAttr
(
"workspace_size"
,
static_cast
<
int
>
(
1
<<
20
));
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"a_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
...
@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({
"y0"
,
"y1"
,
"y2"
,
"y3"
}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"b_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录