Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
94a57f1d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
94a57f1d
编写于
9月 19, 2018
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add trt config to arguments
上级
68fb818a
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
49 addition
and
19 deletion
+49
-19
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+6
-2
paddle/fluid/inference/analysis/subgraph_splitter.cc
paddle/fluid/inference/analysis/subgraph_splitter.cc
+2
-1
paddle/fluid/inference/analysis/subgraph_splitter.h
paddle/fluid/inference/analysis/subgraph_splitter.h
+7
-2
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+1
-1
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
+5
-1
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+10
-2
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+9
-0
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+1
-1
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+2
-2
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+6
-7
未找到文件。
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
94a57f1d
...
...
@@ -97,8 +97,9 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) {
}
}
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
void
CreateTrtEngineOp
(
Node
*
node
,
Argument
*
argument
,
framework
::
proto
::
BlockDesc
*
block
)
{
const
DataFlowGraph
&
graph
=
*
(
argument
->
main_dfg
);
static
int
counter
{
0
};
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
framework
::
OpDesc
desc
;
...
...
@@ -204,7 +205,10 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
PADDLE_ENFORCE
(
!
block
->
vars
().
empty
(),
"the block has no var-desc"
);
// Set attrs
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
->
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"max_batch_size"
,
argument
->
Get
<
int
>
(
"max_batch_size"
));
SetAttr
(
desc
.
Proto
(),
"workspace_size"
,
argument
->
Get
<
int
>
(
"workspace_size"
));
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
SetAttr
(
desc
.
Proto
(),
"output_name_mapping"
,
output_mapping
);
...
...
@@ -248,7 +252,7 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
*
block_desc
.
Proto
()
->
mutable_vars
()
=
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
block_desc
.
Proto
());
CreateTrtEngineOp
(
node
,
argument_
,
block_desc
.
Proto
());
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
...
...
paddle/fluid/inference/analysis/subgraph_splitter.cc
浏览文件 @
94a57f1d
...
...
@@ -309,7 +309,8 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); }
void
SubGraphFuse
::
ReplaceNodesWithSubGraphs
()
{
auto
subgraphs
=
SubGraphSplitter
(
graph_
,
node_inside_subgraph_teller_
)();
for
(
auto
&
subgraph
:
subgraphs
)
{
if
(
subgraph
.
size
()
<=
3
)
continue
;
if
(
subgraph
.
size
()
<=
argument_
->
Get
<
int
>
(
"minimun_subgraph_size"
))
continue
;
std
::
unordered_set
<
Node
*>
subgraph_uniq
(
subgraph
.
begin
(),
subgraph
.
end
());
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
...
...
paddle/fluid/inference/analysis/subgraph_splitter.h
浏览文件 @
94a57f1d
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h"
...
...
@@ -63,8 +64,11 @@ class SubGraphFuse {
public:
using
NodeInsideSubgraphTeller
=
SubGraphSplitter
::
NodeInsideSubgraphTeller
;
SubGraphFuse
(
DataFlowGraph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
)
{}
SubGraphFuse
(
DataFlowGraph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
,
Argument
*
argument
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
),
argument_
(
argument
)
{}
// The main method which run all the logic.
void
operator
()();
...
...
@@ -76,6 +80,7 @@ class SubGraphFuse {
private:
DataFlowGraph
*
graph_
;
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
Argument
*
argument_
;
};
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
浏览文件 @
94a57f1d
...
...
@@ -24,7 +24,7 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
:
node_inside_subgraph_teller_
(
teller
)
{}
void
TensorRTSubGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
)();
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
,
argument_
)();
VLOG
(
4
)
<<
"debug info "
<<
graph
->
HumanReadableInfo
(
false
/*show_values*/
,
true
/*show_functions*/
);
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
浏览文件 @
94a57f1d
...
...
@@ -33,7 +33,10 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {
explicit
TensorRTSubGraphPass
(
const
NodeInsideSubgraphTeller
&
teller
);
bool
Initialize
(
Argument
*
argument
)
override
{
return
true
;
}
bool
Initialize
(
Argument
*
argument
)
override
{
argument_
=
argument
;
return
true
;
}
// This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT.
...
...
@@ -46,6 +49,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {
private:
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
Argument
*
argument_
;
};
}
// namespace analysis
...
...
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
浏览文件 @
94a57f1d
...
...
@@ -34,8 +34,6 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
true
;
VLOG
(
3
)
<<
"Predictor::init()"
;
FLAGS_tensorrt_max_batch_size
=
config_
.
max_batch_size
;
FLAGS_tensorrt_workspace_size
=
config_
.
workspace_size
;
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
{
...
...
@@ -91,6 +89,16 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
void
OptimizeInferenceProgram
()
{
// Analyze inference_program
Argument
argument
;
int
*
minimum_subgraph_size
=
new
int
(
config_
.
minimun_subgraph_size
);
int
*
max_batch_size
=
new
int
(
config_
.
max_batch_size
);
int
*
workspace_size
=
new
int
(
config_
.
workspace_size
);
std
::
string
*
precision_mode
=
new
std
::
string
(
config_
.
precision_mode
);
argument
.
Set
<
int
>
(
"minimun_subgraph_size"
,
minimum_subgraph_size
);
argument
.
Set
<
int
>
(
"max_batch_size"
,
max_batch_size
);
argument
.
Set
<
int
>
(
"workspace_size"
,
workspace_size
);
argument
.
Set
<
std
::
string
>
(
"precision_mode"
,
precision_mode
);
if
(
!
config_
.
model_dir
.
empty
())
{
argument
.
fluid_model_dir
.
reset
(
new
std
::
string
(
config_
.
model_dir
));
}
else
{
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
94a57f1d
...
...
@@ -150,6 +150,15 @@ struct TensorRTConfig : public NativeConfig {
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
workspace_size
{
1
<<
30
};
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as
// default value.
int
minimun_subgraph_size
=
3
;
// Reserved configuration
// We just support "FP32" now, "FP16" and "INT8" will be supported.
std
::
string
precision_mode
=
"FP32"
;
};
// NOTE WIP, not stable yet.
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
94a57f1d
...
...
@@ -99,7 +99,7 @@ TEST(trt_models_test, main) {
std
::
vector
<
std
::
string
>
infer_models
=
{
"mobilenet"
,
"resnet50"
,
"resnext50"
};
for
(
auto
&
model_dir
:
infer_models
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
model_dir
);
CompareTensorRTWithFluid
(
5
,
FLAGS_dirname
+
"/"
+
model_dir
);
}
}
}
// namespace paddle
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
94a57f1d
...
...
@@ -22,8 +22,6 @@
namespace
paddle
{
DEFINE_int32
(
tensorrt_engine_batch_size
,
1
,
"the batch_size of TensorRT"
);
DEFINE_int32
(
tensorrt_max_batch_size
,
1
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
16
<<
20
,
"TensorRT workspace size"
);
namespace
operators
{
...
...
@@ -34,6 +32,8 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Ys"
,
"A list of outputs"
).
AsDuplicable
();
AddAttr
<
std
::
string
>
(
"subgraph"
,
"the subgraph."
);
AddAttr
<
std
::
string
>
(
"engine_uniq_key"
,
"unique key for the TRT engine."
);
AddAttr
<
int
>
(
"max_batch_size"
,
"the maximum batch size."
);
AddAttr
<
int
>
(
"workspace_size"
,
"the maximum batch size."
);
AddComment
(
"TensorRT engine operator."
);
}
};
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
94a57f1d
...
...
@@ -28,8 +28,6 @@
namespace
paddle
{
DECLARE_int32
(
tensorrt_engine_batch_size
);
DECLARE_int32
(
tensorrt_max_batch_size
);
DECLARE_int32
(
tensorrt_workspace_size
);
namespace
operators
{
...
...
@@ -92,14 +90,14 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
engine_name
=
context
.
Attr
<
std
::
string
>
(
"engine_uniq_key"
);
int
max_batch_size
=
context
.
Attr
<
int
>
(
"max_batch_size"
);
if
(
!
Singleton
<
TRT_EngineManager
>::
Global
().
HasEngine
(
engine_name
))
{
Prepare
(
context
);
}
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Get
(
engine_name
);
auto
input_names
=
context
.
op
().
Inputs
(
"Xs"
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"should pass more than one inputs"
);
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
FLAGS_tensorrt_max_batch_size
);
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
max_batch_size
);
std
::
vector
<
std
::
string
>
output_maps
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
...
...
@@ -173,8 +171,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Get the ProgramDesc and pass to convert.
framework
::
proto
::
BlockDesc
block_desc
;
block_desc
.
ParseFromString
(
context
.
Attr
<
std
::
string
>
(
"subgraph"
));
int
max_batch
=
FLAGS_tensorrt_max_batch_size
;
auto
max_workspace
=
FLAGS_tensorrt_workspace_size
;
int
max_batch_size
=
context
.
Attr
<
int
>
(
"max_batch_size"
);
int
workspace_size
=
context
.
Attr
<
int
>
(
"workspace_size"
);
auto
params
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
std
::
unordered_set
<
std
::
string
>
parameters
;
for
(
const
auto
&
param
:
params
)
{
...
...
@@ -186,7 +185,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// TODO(Superjomn) replace this with a different stream
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Create
(
max_batch
,
max_workspac
e
,
nullptr
/*engine hold its own stream*/
,
max_batch
_size
,
workspace_siz
e
,
nullptr
/*engine hold its own stream*/
,
context
.
Attr
<
std
::
string
>
(
"engine_uniq_key"
),
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
()).
device
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录