Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7a019cd6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7a019cd6
编写于
8月 08, 2018
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
差异文件
merge develop
上级
e823ce68
46fe9ba0
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
137 addition
and
32 deletion
+137
-32
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+12
-3
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+93
-12
paddle/fluid/inference/analysis/subgraph_splitter.cc
paddle/fluid/inference/analysis/subgraph_splitter.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+2
-1
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+7
-13
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+14
-2
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+7
-0
未找到文件。
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
7a019cd6
...
...
@@ -24,7 +24,7 @@
namespace
paddle
{
DEFINE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
,
fals
e
,
DEFINE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
,
tru
e
,
"Enable subgraph to TensorRT engine for acceleration"
);
DEFINE_string
(
inference_analysis_graphviz_log_root
,
"./"
,
...
...
@@ -42,10 +42,19 @@ class DfgPassManagerImpl final : public DfgPassManager {
// TODO(Superjomn) set the key with pass reprs.
AddPass
(
"fluid-to-data-flow-graph"
,
new
FluidToDataFlowGraphPass
);
if
(
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
)
{
auto
trt_teller
=
[](
const
Node
*
node
)
{
auto
trt_teller
=
[
&
](
const
Node
*
node
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"elementwise_add"
,
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
});
if
(
!
node
->
IsFunction
())
return
false
;
return
static_cast
<
const
Function
*>
(
node
)
->
func_type
()
==
"mul"
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
if
(
teller_set
.
count
(
func
->
func_type
()))
return
true
;
else
{
return
false
;
}
};
AddPass
(
"tensorrt-subgraph-marker"
,
new
TensorRTSubgraphNodeMarkPass
(
trt_teller
));
AddPass
(
"tensorrt-subgraph"
,
new
TensorRTSubGraphPass
(
trt_teller
));
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
7a019cd6
...
...
@@ -23,7 +23,7 @@
namespace
paddle
{
namespace
inference
{
DEFINE_int32
(
tensorrt_max_batchsize
,
3
00
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_max_batchsize
,
3
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
2048
,
"TensorRT workspace size"
);
namespace
analysis
{
...
...
@@ -88,34 +88,113 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) {
}
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
const
framework
::
proto
::
BlockDesc
&
block
)
{
framework
::
proto
::
BlockDesc
*
block
)
{
static
int
counter
{
0
};
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
framework
::
OpDesc
desc
;
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
// collect inputs
std
::
vector
<
std
::
string
>
io
;
std
::
unordered_set
<
std
::
string
>
input_names
;
for
(
auto
*
x
:
func
->
inlinks
)
{
i
o
.
push_back
(
x
->
name
());
i
nput_names
.
insert
(
x
->
name
());
}
desc
.
SetInput
(
"Xs"
,
io
);
desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
// collect outputs
io
.
clear
();
std
::
unordered_set
<
std
::
string
>
output_names
;
for
(
auto
*
x
:
func
->
outlinks
)
{
io
.
push_back
(
x
->
name
());
output_names
.
insert
(
x
->
name
());
}
desc
.
SetOutput
(
"Ys"
,
io
);
std
::
vector
<
std
::
string
>
output_temp
(
output_names
.
begin
(),
output_names
.
end
());
desc
.
SetOutput
(
"Ys"
,
output_temp
);
desc
.
SetType
(
"tensorrt_engine"
);
PADDLE_ENFORCE
(
!
block
.
vars
().
empty
(),
"the block has no var-desc"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
// Why we do this?
// During the transition from fluid OP to tensorrt OP, we map
// the input and output Tensor(fluid data structure) of fluid OP
// to the correspondin ITensor (trt data structure) through the
// Tensor name. When we set up ITensor for an variable, we must
// ensure that it has not been set before.
// If there is variable in the fluid graph, which is not only the
// input of a OP, but also the output of a Op, there will be problems.
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
auto
subgraph_nodes
=
func
->
subgraph
;
for
(
int
index
=
0
;
index
<
block
->
ops_size
();
index
++
)
{
framework
::
proto
::
OpDesc
*
op
=
block
->
mutable_ops
(
index
);
auto
correspond_node
=
subgraph_nodes
[
index
];
PADDLE_ENFORCE_EQ
(
correspond_node
->
name
(),
op
->
type
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var2id
;
for
(
auto
*
in_var
:
correspond_node
->
inlinks
)
{
var2id
[
in_var
->
name
()]
=
in_var
->
id
();
}
// rename for the input variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
inputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
in_var
=
op
->
mutable_inputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
if
(
input_names
.
count
(
arg_value
))
{
replaced_names
.
push_back
(
arg_value
);
}
else
{
replaced_names
.
push_back
(
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]));
}
}
in_var
->
clear_arguments
();
for
(
size_t
k
=
0
;
k
<
replaced_names
.
size
();
k
++
)
{
in_var
->
add_arguments
(
replaced_names
[
k
]);
}
}
var2id
.
clear
();
for
(
auto
out_var
:
correspond_node
->
outlinks
)
{
var2id
[
out_var
->
name
()]
=
out_var
->
id
();
}
// rename for the output variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
outputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
out_var
=
op
->
mutable_outputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
out_var
->
arguments_size
();
k
++
)
{
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
if
(
output_names
.
count
(
arg_value
))
{
output_name_map
[
arg_value
]
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
}
replaced_names
.
push_back
(
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]));
}
out_var
->
clear_arguments
();
for
(
size_t
k
=
0
;
k
<
replaced_names
.
size
();
k
++
)
{
out_var
->
add_arguments
(
replaced_names
[
k
]);
}
}
}
// When tensorrt engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std
::
vector
<
std
::
string
>
output_mapping
;
for
(
auto
name
:
output_names
)
{
PADDLE_ENFORCE
(
output_name_map
.
count
(
name
)
!=
0
);
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
PADDLE_ENFORCE
(
!
block
->
vars
().
empty
(),
"the block has no var-desc"
);
// Set attrs
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
.
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
->
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
FLAGS_tensorrt_max_batchsize
);
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
FLAGS_tensorrt_workspace_size
);
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
SetAttr
(
desc
.
Proto
(),
"output_name_mapping"
,
output_mapping
);
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
());
}
...
...
@@ -147,15 +226,17 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
LOG
(
INFO
)
<<
"transformed variable size: "
<<
block_desc
.
Proto
()
->
vars
().
size
();
// copy ops.
for
(
auto
*
node
:
block_node
->
subgraph
)
{
auto
*
op
=
block_desc
.
AppendOp
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
());
op
->
Proto
()
->
ParseFromString
(
node
->
pb_msg
());
}
*
block_desc
.
Proto
()
->
mutable_vars
()
=
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
*
block_desc
.
Proto
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
block_desc
.
Proto
());
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
...
...
paddle/fluid/inference/analysis/subgraph_splitter.cc
浏览文件 @
7a019cd6
...
...
@@ -76,7 +76,7 @@ void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) {
std
::
vector
<
std
::
vector
<
Node
*>>
SubGraphSplitter
::
ExtractSubGraphs
()
{
std
::
vector
<
Node
*>
marked_nodes
;
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
graph_
).
nodes
())
{
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
graph_
).
nodes
_in_TS
())
{
if
(
node
.
attr
(
kMarkerAttrName
).
Bool
())
{
marked_nodes
.
push_back
(
&
node
);
}
...
...
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
7a019cd6
# Add TRT tests
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
activation_op.cc
DEPS tensorrt_engine operator scope framework_proto op_registry
)
nv_test
(
test_op_converter SRCS test_op_converter.cc DEPS
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
7a019cd6
...
...
@@ -55,7 +55,6 @@ class OpConverter {
it
=
Registry
<
OpConverter
>::
Lookup
(
"fc"
);
}
}
if
(
op_desc
.
Type
().
find
(
"elementwise"
)
!=
std
::
string
::
npos
)
{
static
std
::
unordered_set
<
std
::
string
>
add_tensor_op_set
{
"add"
,
"mul"
,
"sub"
,
"div"
,
"max"
,
"min"
,
"pow"
};
...
...
@@ -72,6 +71,8 @@ class OpConverter {
"Unsupported elementwise type"
+
op_type
);
it
=
Registry
<
OpConverter
>::
Lookup
(
"elementwise_"
+
op_type
+
"_weight"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_desc
.
Type
());
}
else
{
PADDLE_ENFORCE
(
add_tensor_op_set
.
count
(
op_type
)
>
0
,
"Unsupported elementwise type"
+
op_type
);
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
7a019cd6
...
...
@@ -55,18 +55,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
case
2
:
return
nvinfer1
::
Dims2
(
1
,
shape
[
1
]);
case
3
:
return
nvinfer1
::
Dims3
(
1
,
shape
[
1
],
shape
[
2
]);
case
4
:
return
nvinfer1
::
Dims4
(
1
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
default:
return
nvinfer1
::
Dims
();
}
return
nvinfer1
::
Dims
();
PADDLE_ENFORCE_EQ
(
shape
.
size
(),
4UL
);
return
nvinfer1
::
DimsCHW
(
shape
[
1
],
shape
[
2
],
shape
[
3
]);
}
}
// namespace
...
...
@@ -86,6 +76,9 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
parameters
.
insert
(
param
);
}
std
::
vector
<
std
::
string
>
output_maps
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
// TODO(Superjomn) replace this with a different stream
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Create
(
max_batch
,
max_workspace
,
nullptr
/*engine hold its own stream*/
,
...
...
@@ -97,6 +90,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
// Add inputs
VLOG
(
4
)
<<
"declare inputs"
;
for
(
auto
&
input
:
context
.
Inputs
(
"Xs"
))
{
if
(
parameters
.
count
(
input
))
continue
;
VLOG
(
4
)
<<
"declare input "
<<
input
;
auto
*
var
=
block
.
FindVar
(
input
);
// TensorRT engine need to create parameters. The parameter's description
...
...
@@ -122,7 +116,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
block_desc
,
parameters
,
context
.
scope
(),
engine
);
// Add outputs
for
(
auto
&
output
:
context
.
Outputs
(
"Ys"
)
)
{
for
(
auto
&
output
:
output_maps
)
{
engine
->
DeclareOutput
(
output
);
}
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
7a019cd6
...
...
@@ -66,8 +66,17 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
context
.
Attr
<
int
>
(
"max_batch"
));
std
::
vector
<
std
::
string
>
output_maps
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
auto
params
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
std
::
unordered_set
<
std
::
string
>
parameters
;
for
(
const
auto
&
param
:
params
)
{
parameters
.
insert
(
param
);
}
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
context
.
Inputs
(
"Xs"
))
{
if
(
parameters
.
count
(
x
))
continue
;
// convert input and copy to TRT engine's buffer
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
context
.
scope
(),
x
);
...
...
@@ -82,10 +91,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Execute the engine.
PADDLE_ENFORCE_GT
(
FLAGS_tensorrt_engine_batch_size
,
0
);
engine
->
Execute
(
FLAGS_tensorrt_engine_batch_size
);
// Convert output tensor from engine to fluid
int
output_index
=
0
;
for
(
const
auto
&
y
:
context
.
Outputs
(
"Ys"
))
{
// convert output and copy to fluid.
nvinfer1
::
ITensor
*
trt_t
=
engine
->
GetITensor
(
y
);
nvinfer1
::
ITensor
*
trt_t
=
engine
->
GetITensor
(
output_maps
[
output_index
]
);
auto
dims
=
trt_t
->
getDimensions
();
// Use the output ITensor's dims to reshape the Fluid Tensor.
std
::
vector
<
int
>
ddim
(
dims
.
d
,
dims
.
d
+
dims
.
nbDims
);
...
...
@@ -102,7 +113,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// TODO(Superjomn) change this float to dtype size.
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
FLAGS_tensorrt_engine_batch_size
;
engine
->
GetOutputInCPU
(
y
,
engine
->
GetOutputInCPU
(
output_maps
[
output_index
]
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
//} else {
...
...
@@ -110,6 +121,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
// size * sizeof(float));
//}
output_index
+=
1
;
}
cudaStreamSynchronize
(
*
engine
->
stream
());
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
7a019cd6
...
...
@@ -103,6 +103,9 @@ TEST(TensorRTEngineOp, manual) {
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"engine_uniq_key"
,
"a_engine"
);
SetAttr
<
std
::
vector
<
std
::
string
>>
(
engine_op_desc
.
Proto
(),
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
SetAttr
<
std
::
vector
<
std
::
string
>>
(
engine_op_desc
.
Proto
(),
"output_name_mapping"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
*
engine_op_desc
.
Proto
());
...
...
@@ -196,6 +199,10 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
std
::
vector
<
std
::
string
>
({
"y0"
,
"y1"
,
"y2"
,
"y3"
}));
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"engine_uniq_key"
,
"b_engine"
);
SetAttr
<
std
::
vector
<
std
::
string
>>
(
engine_op_desc
.
Proto
(),
"output_name_mapping"
,
std
::
vector
<
std
::
string
>
({
"z3"
}));
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
*
engine_op_desc
.
Proto
());
// Execute them.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录