Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
c79f06d3
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c79f06d3
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick from feature/anakin-engine: add batch interface for pd-anakin #16178
上级
69d37f81
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
57 addition
and
29 deletion
+57
-29
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+3
-1
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+9
-7
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+8
-3
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+14
-9
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+2
-1
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+12
-6
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+2
-1
未找到文件。
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
c79f06d3
...
...
@@ -90,10 +90,12 @@ class AnakinOpConverter {
for
(
int
i
=
0
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
.
push_back
(
var_shape
[
i
]);
}
input_shape
[
0
]
=
1
;
input_shape
[
0
]
=
engine
->
GetMaxBatch
()
;
engine
->
SetInputShape
(
input
,
input_shape
);
}
// engine->Graph()->RegistAllOut();
engine
->
Optimize
();
engine
->
InitGraph
();
}
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
c79f06d3
...
...
@@ -34,10 +34,12 @@ namespace anakin {
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
device
)
int
device
,
int
max_batch_size
)
:
graph_
(
new
AnakinGraphT
<
TargetT
,
PrecisionType
>
()),
net_
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
need_summary
))
{
device_
=
device
;
max_batch_size_
=
max_batch_size
;
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
@@ -71,8 +73,8 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
for
(
const
auto
&
input
:
inputs
)
{
auto
*
tensor
=
input
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
auto
fluid_input_shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
auto
fluid_input_shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
auto
net_shape
=
anakin_input
->
shape
();
if
(
tensor
->
numel
()
>
net_shape
.
count
())
{
...
...
@@ -84,11 +86,13 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
anakin_input
->
reshape
(
fluid_input_shape
);
net_shape
=
anakin_input
->
shape
();
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
net_shape
);
anakin_input
->
share_from
(
tmp_anakin_tensor
);
// net_shape);
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
cudaDeviceSynchronize
();
net_
->
prediction
();
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CUDAPlace
gpu_place
(
device_
);
...
...
@@ -98,12 +102,10 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
tensor
->
Resize
(
framework
::
make_ddim
(
anakin_output_shape
));
auto
*
fluid_data
=
tensor
->
mutable_data
<
float
>
(
gpu_place
);
memory
::
Copy
(
gpu_place
,
static_cast
<
void
*>
(
fluid_data
),
gpu_place
,
static_cast
<
void
*>
(
anakin_data
),
tensor
->
numel
()
*
sizeof
(
float
),
stream
);
}
cudaDeviceSynchronize
();
}
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
c79f06d3
...
...
@@ -55,7 +55,8 @@ class AnakinEngine {
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
public:
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
);
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
);
~
AnakinEngine
();
void
InitGraph
();
void
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
);
...
...
@@ -70,10 +71,12 @@ class AnakinEngine {
"Add operation's attribution."
);
}
NetT
*
Net
()
{
return
net_
.
get
();
}
GraphT
*
Graph
()
{
return
graph_
.
get
();
}
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
void
Freeze
();
void
Optimize
();
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
int
GetMaxBatch
()
{
return
max_batch_size_
;
}
// void SaveSerializedData(std::string& data) { graph_->save_to_string(data);
// }
// void LoadSerializedData(const std::string& data) {
...
...
@@ -83,6 +86,7 @@ class AnakinEngine {
cudaStream_t
stream
);
private:
int
max_batch_size_
;
int
device_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
...
...
@@ -100,10 +104,11 @@ class AnakinEngineManager {
return
engines_
.
at
(
name
).
get
();
}
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_size
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
c79f06d3
...
...
@@ -150,6 +150,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
tensorrt_use_static_engine
,
TensorRtUseStaticEngine
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
// Memory optimized related.
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
c79f06d3
...
...
@@ -77,6 +77,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
argument
->
engine_opt_info
()));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
}
if
(
pass_name
==
"tensorrt_subgraph_pass"
)
{
...
...
@@ -91,6 +92,10 @@ void IRPassManager::CreatePasses(Argument *argument,
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
bool
use_static_engine
=
argument
->
tensorrt_use_static_engine
();
bool
model_from_memory
=
argument
->
model_from_memory
();
if
((
!
model_from_memory
&&
use_static_engine
))
{
std
::
string
model_opt_cache_dir
=
argument
->
Has
(
"model_dir"
)
?
argument
->
model_dir
()
...
...
@@ -98,9 +103,9 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"model_opt_cache_dir"
,
new
std
::
string
(
GetOrCreateModelOptCacheDir
(
model_opt_cache_dir
)));
}
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"use_static_engine"
,
new
bool
(
argument
->
tensorrt_use_static_engine
()));
pass
->
Set
(
"use_static_engine"
,
new
bool
(
use_static_engine
));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
argument
->
engine_opt_info
()));
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
c79f06d3
...
...
@@ -256,10 +256,11 @@ void AnakinSubgraphPass::CreateAnakinOp(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
predictor_id
));
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
int
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
*
anakin_engine
=
inference
::
Singleton
<
anakin
::
AnakinEngineManager
>::
Global
().
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
engine_key
);
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
engine_key
);
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
c79f06d3
...
...
@@ -245,8 +245,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine_serialized_data
.
empty
())
{
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
*
repetitive_params
));
if
(
use_static_engine
&&
!
load_from_memory
)
{
trt_engine_serialized_data
=
GetTrtEngineSerializedData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
);
}
if
(
trt_engine_serialized_data
.
empty
())
{
LOG
(
INFO
)
<<
"Prepare TRT engine (Optimize model structure, Select OP "
...
...
@@ -267,10 +270,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine_serialized_data
=
std
::
string
((
const
char
*
)
serialized_engine_data
->
data
(),
serialized_engine_data
->
size
());
if
(
use_static_engine
&&
!
load_from_memory
)
{
SaveTrtEngineSerializedDataToFile
(
GetTrtEngineSerializedPath
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
),
trt_engine_serialized_data
);
}
}
else
{
LOG
(
INFO
)
<<
"Load TRT Optimized Info from "
<<
GetTrtEngineSerializedPath
(
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
c79f06d3
...
...
@@ -109,6 +109,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_mkldnn_
);
CP_MEMBER
(
mkldnn_enabled_op_types_
);
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
CP_MEMBER
(
use_feed_fetch_ops_
);
...
...
@@ -352,7 +355,8 @@ void AnalysisConfig::SwitchIrDebug(int x) {
ir_debug_
=
x
;
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
()
{
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
)
{
anakin_max_batchsize_
=
max_batch_size
;
use_anakin_
=
true
;
Update
();
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
c79f06d3
...
...
@@ -379,6 +379,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
c79f06d3
...
...
@@ -145,7 +145,7 @@ struct AnalysisConfig {
/**
* \brief Turn on the usage of Anakin sub-graph engine.
*/
void
EnableAnakinEngine
();
void
EnableAnakinEngine
(
int
max_batch_size
=
1
);
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -270,6 +270,7 @@ struct AnalysisConfig {
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录