Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5863c861
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5863c861
编写于
2月 26, 2019
作者:
N
nhzlx
提交者:
ceci3
3月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
6. delete useless predictor id
test=develop
上级
f3d164fa
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
29 addition
and
80 deletion
+29
-80
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+0
-4
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+0
-1
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+8
-12
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-1
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-4
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+0
-37
paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h
paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h
+2
-1
paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h
paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h
+7
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+11
-20
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
5863c861
...
@@ -99,10 +99,6 @@ struct Argument {
...
@@ -99,10 +99,6 @@ struct Argument {
private: \
private: \
unique_ptr_t field__##_;
unique_ptr_t field__##_;
// Each predictor has an unique id.
// For now, this attr will help us to get the right
// trt_engine for each trt_engine_op for each predictor when using trt.
DECL_ARGUMENT_FIELD
(
predictor_id
,
PredictorID
,
int
);
// Model path
// Model path
DECL_ARGUMENT_FIELD
(
model_dir
,
ModelDir
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
model_dir
,
ModelDir
,
std
::
string
);
// Model specified with program and parameters files.
// Model specified with program and parameters files.
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
5863c861
...
@@ -81,7 +81,6 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -81,7 +81,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
pass
->
Set
(
"model_opt_cache_dir"
,
"model_opt_cache_dir"
,
new
std
::
string
(
GetOrCreateModelOptCacheDir
(
model_opt_cache_dir
)));
new
std
::
string
(
GetOrCreateModelOptCacheDir
(
model_opt_cache_dir
)));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
}
}
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
5863c861
...
@@ -209,9 +209,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -209,9 +209,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
int
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
predictor_id
));
std
::
to_string
(
0
));
// Get "" when there is no cached calibration table data.
// Get "" when there is no cached calibration table data.
std
::
string
calibration_data
=
GetTrtCalibTableData
(
std
::
string
calibration_data
=
GetTrtCalibTableData
(
...
@@ -221,9 +220,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -221,9 +220,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
std
::
string
(
""
));
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
std
::
string
(
""
));
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data_path"
,
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
));
std
::
unique_ptr
<
tensorrt
::
TRTInt8Calibrator
>
calibrator
;
std
::
unique_ptr
<
tensorrt
::
TRTInt8Calibrator
>
calibrator
;
if
(
enable_int8
&&
calibration_data
.
size
()
!=
0
)
{
if
(
enable_int8
&&
calibration_data
.
size
()
!=
0
)
{
...
@@ -239,13 +235,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -239,13 +235,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
string
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
std
::
string
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
tensorrt
::
TensorRTEngine
*
trt_engine
=
inference
::
Singleton
<
tensorrt
::
TRTEngineManager
>::
Global
().
Create
(
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
engine_key
,
Get
<
int
>
(
"gpu_device_id"
));
if
(
trt_engine_serialized_data
.
size
()
==
0
)
{
if
(
trt_engine_serialized_data
.
size
()
==
0
)
{
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
"kernel etc). This process may cost a lot of time."
;
std
::
unique_ptr
<
tensorrt
::
TensorRTEngine
>
trt_engine
(
new
tensorrt
::
TensorRTEngine
(
Get
<
int
>
(
"max_batch_size"
),
Get
<
int
>
(
"workspace_size"
),
enable_int8
,
calibrator
.
get
(),
Get
<
int
>
(
"gpu_device_id"
)));
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
@@ -253,7 +249,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -253,7 +249,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
.
ConvertBlockToTRTEngine
(
.
ConvertBlockToTRTEngine
(
&
block_desc_temp
,
*
scope
,
&
block_desc_temp
,
*
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
trt_engine
);
param_set
,
output_mapping
,
trt_engine
.
get
()
);
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
nvinfer1
::
IHostMemory
*
serialized_engine_data
=
trt_engine
->
Serialize
();
trt_engine_serialized_data
=
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
...
@@ -263,11 +259,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -263,11 +259,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
engine_key
),
engine_key
),
trt_engine_serialized_data
);
trt_engine_serialized_data
);
}
else
{
}
else
{
LOG
(
INFO
)
<<
"Load TRT
Engine from optimized serialized data :
"
LOG
(
INFO
)
<<
"Load TRT
Optimized Info from
"
<<
GetTrtEngineSerializedPath
(
<<
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
trt_engine
->
Deserialize
(
trt_engine_serialized_data
);
}
}
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
trt_engine_serialized_data
);
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
5863c861
...
@@ -345,7 +345,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -345,7 +345,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
config_
.
static_memory_optim_force_update_
);
config_
.
static_memory_optim_force_update_
);
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
// Analyze inference_program
argument_
.
SetPredictorID
(
predictor_id_
);
if
(
!
config_
.
model_dir
().
empty
())
{
if
(
!
config_
.
model_dir
().
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
());
argument_
.
SetModelDir
(
config_
.
model_dir
());
}
else
{
}
else
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
5863c861
...
@@ -44,9 +44,7 @@ using framework::NaiveExecutor;
...
@@ -44,9 +44,7 @@ using framework::NaiveExecutor;
*/
*/
class
AnalysisPredictor
:
public
PaddlePredictor
{
class
AnalysisPredictor
:
public
PaddlePredictor
{
public:
public:
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{}
predictor_id_
=
inference
::
GetUniqueId
();
}
~
AnalysisPredictor
();
~
AnalysisPredictor
();
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
...
@@ -146,7 +144,6 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -146,7 +144,6 @@ class AnalysisPredictor : public PaddlePredictor {
const
size_t
max_shape_collect_count_
{
1000
};
const
size_t
max_shape_collect_count_
{
1000
};
int
need_collect_var_shapes_
{
-
1
};
// -1 for default, 0 for false, 1 for true.
int
need_collect_var_shapes_
{
-
1
};
// -1 for default, 0 for false, 1 for true.
std
::
vector
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
batch_var_shapes_
;
std
::
vector
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
batch_var_shapes_
;
int
predictor_id_
;
private:
private:
// Some status here that help to determine the status inside the predictor.
// Some status here that help to determine the status inside the predictor.
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
5863c861
...
@@ -199,43 +199,6 @@ class TensorRTEngine {
...
@@ -199,43 +199,6 @@ class TensorRTEngine {
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
engine__->network()->add##layer__(ARGS);
engine__->network()->add##layer__(ARGS);
/*
* Helper to control the TensorRT engine's creation and deletion.
*/
class
TRTEngineManager
{
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
// Get an engine called `name`.
TensorRTEngine
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
// Create or get an engine called `name`
TensorRTEngine
*
Create
(
int
max_batch
,
int
max_workspace
,
bool
enable_int8
,
TRTInt8Calibrator
*
calibrator
,
const
std
::
string
&
engine_name
,
int
device_id
=
0
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
enable_int8
,
calibrator
,
device_id
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
void
DeleteALL
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TensorRTEngine
>>
engines_
;
std
::
mutex
mut_
;
};
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h
浏览文件 @
5863c861
...
@@ -31,7 +31,8 @@ namespace inference {
...
@@ -31,7 +31,8 @@ namespace inference {
namespace
tensorrt
{
namespace
tensorrt
{
namespace
plugin
{
namespace
plugin
{
class
PluginFactoryTensorRT
:
public
nvinfer1
::
IPluginFactory
{
class
PluginFactoryTensorRT
:
public
nvinfer1
::
IPluginFactory
,
public
DeleteHelper
{
public:
public:
// Deserialization method
// Deserialization method
PluginTensorRT
*
createPlugin
(
const
char
*
layer_name
,
const
void
*
serial_data
,
PluginTensorRT
*
createPlugin
(
const
char
*
layer_name
,
const
void
*
serial_data
,
...
...
paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h
浏览文件 @
5863c861
...
@@ -24,6 +24,13 @@ namespace inference {
...
@@ -24,6 +24,13 @@ namespace inference {
namespace
tensorrt
{
namespace
tensorrt
{
namespace
plugin
{
namespace
plugin
{
// Some trt base classes lack of the destructor.
// We use a assisted class to fix this.
struct
DeleteHelper
{
protected:
virtual
~
DeleteHelper
()
{}
};
template
<
typename
T
>
template
<
typename
T
>
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
);
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
);
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
5863c861
...
@@ -41,7 +41,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -41,7 +41,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private:
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
TensorRTEngine
*
trt_engine_
;
mutable
std
::
unique_ptr
<
TensorRTEngine
>
trt_engine_
;
int
max_batch_size_
;
int
max_batch_size_
;
int
workspace_size_
;
int
workspace_size_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
...
@@ -64,7 +64,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -64,7 +64,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_serialized_data_
=
Attr
<
std
::
string
>
(
"engine_serialized_data"
);
engine_serialized_data_
=
Attr
<
std
::
string
>
(
"engine_serialized_data"
);
trt_engine_
=
nullptr
;
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
for
(
const
auto
&
param
:
params
)
{
...
@@ -78,16 +77,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -78,16 +77,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
}
}
// we will create an engine here.
if
(
!
calibration_mode_
)
{
if
(
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
HasEngine
(
engine_key_
))
{
trt_engine_
=
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
.
Get
(
engine_key_
);
}
}
}
}
protected:
protected:
...
@@ -231,15 +220,17 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -231,15 +220,17 @@ class TensorRTEngineOp : public framework::OperatorBase {
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
GetEngine
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
const
platform
::
Place
&
dev_place
)
const
{
if
(
trt_engine_
==
nullptr
)
{
if
(
trt_engine_
.
get
()
==
nullptr
)
{
trt_engine_
=
trt_engine_
.
reset
(
new
inference
::
tensorrt
::
TensorRTEngine
(
inference
::
Singleton
<
inference
::
tensorrt
::
TRTEngineManager
>::
Global
()
max_batch_size_
,
workspace_size_
,
enable_int8_
,
calibrator_
.
get
(),
.
Create
(
max_batch_size_
,
workspace_size_
,
enable_int8_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
).
device
));
calibrator_
.
get
(),
engine_key_
,
if
(
engine_serialized_data_
.
size
()
>
0
)
{
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
).
device
);
trt_engine_
->
Deserialize
(
engine_serialized_data_
);
PrepareTRTEngine
(
scope
,
trt_engine_
);
}
else
{
PrepareTRTEngine
(
scope
,
trt_engine_
.
get
());
}
}
}
return
trt_engine_
;
return
trt_engine_
.
get
()
;
}
}
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
void
PrepareTRTEngine
(
const
framework
::
Scope
&
scope
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录