Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ae576f3c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ae576f3c
编写于
6月 06, 2019
作者:
Z
Zhaolong Xing
提交者:
GitHub
6月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: when use the load model from memory mode, the RAM occupy is high (#17788)
test=develop
上级
5efe8c72
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
149 addition
and
45 deletion
+149
-45
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+10
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+19
-14
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+12
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+8
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+10
-2
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+1
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+39
-0
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
...e/fluid/inference/tests/api/analyzer_save_model_tester.cc
+11
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+4
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-3
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+18
-18
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+2
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ae576f3c
...
...
@@ -63,6 +63,16 @@ struct Argument {
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
void
PartiallyRelease
()
{
if
(
Has
(
"model_program_path"
))
{
if
(
Has
(
"model_from_memory"
)
&&
model_from_memory
())
{
model_program_path
().
clear
();
model_program_path
().
shrink_to_fit
();
model_params_path
().
clear
();
model_params_path
().
shrink_to_fit
();
}
}
}
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
ae576f3c
...
...
@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument,
bool
enable_int8
=
argument
->
tensorrt_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
bool
use_calib_mode
=
argument
->
tensorrt_use_calib_mode
();
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_calib_mode"
,
new
bool
(
use_calib_mode
));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
ae576f3c
...
...
@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
// TODO(NHZlX)
// There are models with the same structure but the different parameters,
// when runing in the 'use_serialize' mode, there is a bug.
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
));
auto
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
// Get "" when there is no cached calibration table data.
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
...
...
@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"use_calib_mode"
,
use_calib_mode
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"predictor_id"
,
predictor_id
);
std
::
string
trt_engine_serialized_data
=
""
;
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
...
...
@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
*
repetitive_params
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
tensorrt
::
TensorRTEngine
*
trt_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key
+
std
::
to_string
(
predictor_id
),
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
));
bool
need_serialize
=
(
use_static_engine
&&
!
load_from_memory
);
if
(
need_serialize
)
{
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
// we can load the engine info serialized before from the disk.
if
(
!
trt_engine_serialized_data
.
empty
())
{
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
trt_engine
->
Deserialize
(
trt_engine_serialized_data
);
LOG
(
INFO
)
<<
"Load TRT Optimized Info from "
<<
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
...
...
@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// 2. already load serialized trt engine info.
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
std
::
unique_ptr
<
tensorrt
::
TensorRTEngine
>
trt_engine
(
new
tensorrt
::
TensorRTEngine
(
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
)));
auto
*
scope
=
param_scope
();
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
...
@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
.
ConvertBlockToTRTEngine
(
&
block_desc_temp
,
*
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
trt_engine
.
get
());
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
param_set
,
output_mapping
,
trt_engine
);
if
(
need_serialize
)
{
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
SaveTrtEngineSerializedDataToFile
(
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
),
trt_engine_serialized_data
);
}
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
ae576f3c
...
...
@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// Copy the parameter data to a tmp tensor.
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
// Reallocation the space on GPU
t
->
mutable_data
<
float
>
(
place
);
t
->
clear
(
);
// Copy parameter data to newly allocated GPU space.
TensorCopySync
(
temp_tensor
,
place
,
t
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ae576f3c
...
...
@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// Model related.
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
prog_file_
);
CP_MEMBER
(
params_file_
);
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
// params_file_ fields.
prog_file_
=
std
::
move
(
other
.
prog_file_
);
params_file_
=
std
::
move
(
other
.
params_file_
);
// Gpu related.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
device_id_
);
...
...
@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_auto_config_layout_
=
auto_config_layout
;
Update
();
}
void
AnalysisConfig
::
PartiallyRelease
()
{
prog_file_
.
clear
();
prog_file_
.
shrink_to_fit
();
params_file_
.
clear
();
params_file_
.
shrink_to_fit
();
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ae576f3c
...
...
@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
ARGUMENT_CHECK_FIELD
((
&
argument_
),
ir_analyzed_program
);
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
argument_
.
ir_analyzed_program
()));
// The config and argument take a lot of storage,
// when the predictor settings are complete, we release these stores.
argument_
.
PartiallyRelease
();
config_
.
PartiallyRelease
();
LOG
(
INFO
)
<<
"== optimize end =="
;
}
...
...
@@ -451,6 +455,8 @@ template <>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
PADDLE_ENFORCE
(
config
.
is_valid
(),
"Note: Each config can only be used for one predictor."
);
if
(
config
.
use_gpu
())
{
// 1. GPU memory
PADDLE_ENFORCE_GE
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
...
...
@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
// Each config can only be used for one predictor.
config
.
SetInValid
();
auto
predictor_p
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
if
(
!
predictor_p
->
Init
(
nullptr
))
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ae576f3c
...
...
@@ -232,6 +232,8 @@ struct AnalysisConfig {
bool
force_update_static_cache
=
false
);
/** Tell whether the memory optimization is activated. */
bool
enable_memory_optim
()
const
;
void
SetInValid
()
const
{
is_valid_
=
false
;
}
bool
is_valid
()
const
{
return
is_valid_
;
}
friend
class
::
paddle
::
AnalysisPredictor
;
...
...
@@ -239,6 +241,7 @@ struct AnalysisConfig {
* Get a pass builder for customize the passes in IR analysis phase.
*/
PassStrategy
*
pass_builder
()
const
;
void
PartiallyRelease
();
protected:
// Update the config.
...
...
@@ -249,8 +252,8 @@ struct AnalysisConfig {
protected:
// Model pathes.
std
::
string
model_dir_
;
std
::
string
prog_file_
;
std
::
string
params_file_
;
mutable
std
::
string
prog_file_
;
mutable
std
::
string
params_file_
;
// GPU related.
bool
use_gpu_
{
false
};
...
...
@@ -312,6 +315,11 @@ struct AnalysisConfig {
bool
use_mkldnn_quantizer_
{
false
};
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
// If the config is already used on a predictor, it becomes invalid.
mutable
bool
is_valid_
{
true
};
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
};
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
ae576f3c
...
...
@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass"
,
//
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
ae576f3c
...
...
@@ -170,6 +170,7 @@ class OpConverter {
engine
->
DeclareOutput
(
output
);
}
engine
->
FreezeNetwork
();
engine
->
ClearWeights
();
}
void
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
ae576f3c
...
...
@@ -149,6 +149,12 @@ class TensorRTEngine {
std
::
unordered_map
<
std
::
string
/*name*/
,
std
::
unique_ptr
<
framework
::
Tensor
>>
weight_map
;
void
ClearWeights
()
{
for
(
auto
&
weight_pair
:
weight_map
)
{
weight_pair
.
second
.
reset
(
nullptr
);
}
}
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
...
...
@@ -213,6 +219,39 @@ class TensorRTEngine {
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
engine__->network()->add##layer__(ARGS);
class
TRTEngineManager
{
public:
bool
Empty
()
const
{
return
engines_
.
size
()
==
0
;
}
bool
Has
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
TensorRTEngine
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
TensorRTEngine
*
Create
(
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
bool
enable_int8
=
false
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
enable_int8
,
calibrator
,
device_id
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
void
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TensorRTEngine
>>
engines_
;
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
ae576f3c
...
...
@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
std
::
unordered_set
<
std
::
string
>
teller_set
{
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"
split"
,
"
prelu"
,
"elementwise_add"
,
"elementwise_mul"
,
"dropout"
,
"prelu"
,
"conv2d_transpose"
,
"leaky_relu"
,
"fc"
}};
};
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"cos_sim_2.tmp_0"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
"final_output.tmp_1"
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) {
mkdir
(
optimModelPath
.
c_str
(),
0777
);
SaveOptimModel
(
&
cfg
,
optimModelPath
);
cfg
.
pass_builder
()
->
ClearPasses
();
int
origin_num_ops
=
GetNumOps
(
cfg
);
cfg
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
fused_num_ops
=
GetNumOps
(
cfg
);
// Each config can only be applied to one predictor.
AnalysisConfig
cfg2
;
SetConfig
(
&
cfg2
);
cfg2
.
pass_builder
()
->
ClearPasses
();
cfg2
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
origin_num_ops
=
GetNumOps
(
cfg2
);
AnalysisConfig
cfg3
;
SetConfig
(
&
cfg3
);
cfg3
.
SetModel
(
optimModelPath
+
"/model"
,
optimModelPath
+
"/params"
);
int
fused_num_ops
=
GetNumOps
(
cfg3
);
CHECK_LE
(
fused_num_ops
,
origin_num_ops
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
ae576f3c
...
...
@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
cfg1
;
SetConfig
(
&
cfg1
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
std
::
vector
<
std
::
string
>
outputs_name
;
outputs_name
.
emplace_back
(
out_var_name
);
CompareAnalysisAndZeroCopy
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
cfg1
),
input_slots_all
,
outputs_name
);
}
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ae576f3c
...
...
@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis(
}
void
CompareAnalysisAndZeroCopy
(
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config
,
PaddlePredictor
::
Config
*
config1
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
outputs_name
)
{
int
batch_size
=
FLAGS_batch_size
;
...
...
@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy(
predictor
->
Run
(
inputs
[
0
],
&
analysis_outputs
,
batch_size
);
// analysis + zero_copy
std
::
vector
<
ZeroCopyTensor
>
zerocopy_outputs
;
reinterpret_cast
<
AnalysisConfig
*>
(
config
)
->
SwitchUseFeedFetchOps
(
false
);
predictor
=
CreateTestPredictor
(
config
,
true
);
reinterpret_cast
<
AnalysisConfig
*>
(
config
1
)
->
SwitchUseFeedFetchOps
(
false
);
predictor
=
CreateTestPredictor
(
config
1
,
true
);
ConvertPaddleTensorToZeroCopyTensor
(
predictor
.
get
(),
inputs
[
0
]);
predictor
->
ZeroCopyRun
();
for
(
size_t
i
=
0
;
i
<
outputs_name
.
size
();
i
++
)
{
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
ae576f3c
...
...
@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
std
::
unique_ptr
<
TensorRTEngine
>
trt_engine_
;
mutable
TensorRTEngine
*
trt_engine_
{
nullptr
}
;
int
max_batch_size_
;
int
workspace_size_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
...
...
@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool
use_calib_mode_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
calibration_mode_
;
int
predictor_id_
;
int
device_id_
;
public:
...
...
@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
use_calib_mode_
=
Attr
<
bool
>
(
"use_calib_mode"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_serialized_data_
=
Attr
<
std
::
string
>
(
"engine_serialized_data
"
);
predictor_id_
=
Attr
<
int
>
(
"predictor_id
"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
...
...
@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
}
if
(
!
calibration_mode_
&&
!
engine_serialized_data_
.
empty
())
{
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
));
PADDLE_ENFORCE
(
engine_serialized_data_
.
size
(),
"TRT serialized data should not be empty here,"
"there must be error when generate serialized data in TRT "
"subgraph detect pass."
);
trt_engine_
->
Deserialize
(
engine_serialized_data_
);
bool
has_engine
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Has
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
if
(
!
calibration_mode_
&&
has_engine
)
{
trt_engine_
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Get
(
engine_key_
+
std
::
to_string
(
predictor_id_
));
}
}
...
...
@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
!
trt_engine_
)
{
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
));
PrepareTRTEngine
(
scope
,
trt_engine_
.
get
());
trt_engine_
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Create
(
engine_key_
+
std
::
to_string
(
predictor_id_
),
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
device_id_
);
PrepareTRTEngine
(
scope
,
trt_engine_
);
}
return
trt_engine_
.
get
()
;
return
trt_engine_
;
}
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
ae576f3c
...
...
@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc
.
SetAttr
(
"workspace_size"
,
static_cast
<
int
>
(
1
<<
20
));
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"a_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
...
@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc
.
SetAttr
(
"parameters"
,
std
::
vector
<
std
::
string
>
({
"y0"
,
"y1"
,
"y2"
,
"y3"
}));
engine_op_desc
.
SetAttr
(
"engine_key"
,
std
::
string
(
"b_engine"
));
engine_op_desc
.
SetAttr
(
"predictor_id"
,
1
);
engine_op_desc
.
SetAttr
(
"calibration_data"
,
std
::
string
(
""
));
engine_op_desc
.
SetAttr
(
"enable_int8"
,
static_cast
<
bool
>
(
false
));
engine_op_desc
.
SetAttr
(
"use_calib_mode"
,
static_cast
<
bool
>
(
false
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录