Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b42ced8e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b42ced8e
编写于
7月 20, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
7月 20, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bugfix/tensorrt analysis fix subgraph trigger (#12266)
上级
c5c17a14
变更
27
显示空白变更内容
内联
并排
Showing
27 changed file
with
342 addition
and
188 deletion
+342
-188
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+3
-2
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+3
-2
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+9
-1
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+45
-0
paddle/fluid/inference/analysis/data_flow_graph.h
paddle/fluid/inference/analysis/data_flow_graph.h
+3
-31
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+52
-38
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
.../fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+4
-0
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
...fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
+1
-1
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
...fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
+14
-2
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
...nference/analysis/fluid_to_data_flow_graph_pass_tester.cc
+4
-4
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+3
-0
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-1
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+1
-1
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+2
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-1
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+2
-1
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+25
-10
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+2
-1
paddle/fluid/inference/api/test_api.cc
paddle/fluid/inference/api/test_api.cc
+2
-1
paddle/fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
.../fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
+52
-23
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+4
-4
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+55
-37
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+6
-1
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+4
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+4
-2
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+18
-4
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+21
-19
未找到文件。
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
b42ced8e
...
...
@@ -22,8 +22,6 @@
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
DEFINE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
,
false
,
"Enable subgraph to TensorRT engine for acceleration"
);
...
...
@@ -31,6 +29,9 @@ DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
DEFINE_string
(
inference_analysis_graphviz_log_root
,
"./"
,
"Graphviz debuger for data flow graphs."
);
namespace
inference
{
namespace
analysis
{
class
DfgPassManagerImpl
final
:
public
DfgPassManager
{
public:
DfgPassManagerImpl
()
{
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
b42ced8e
...
...
@@ -45,14 +45,15 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
DECLARE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
inference_analysis_graphviz_log_root
);
namespace
inference
{
namespace
analysis
{
class
Analyzer
:
public
OrderedRegistry
<
PassManager
>
{
public:
// Register all the pass-managers.
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
b42ced8e
...
...
@@ -13,13 +13,21 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST_F
(
DFG_Tester
,
main
)
{
TEST_F
(
DFG_Tester
,
analysis_without_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
false
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
}
TEST_F
(
DFG_Tester
,
analysis_with_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
true
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
}
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
浏览文件 @
b42ced8e
...
...
@@ -222,10 +222,19 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return
stack_
.
top
();
}
inline
bool
CheckNodeIndegreeEquals
(
const
Node
&
node
,
size_t
n
)
{
return
node
.
inlinks
.
size
()
==
n
;
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
...
...
@@ -233,6 +242,11 @@ GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
p
->
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
continue
;
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
...
...
@@ -292,6 +306,37 @@ Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
return
sorted_
[
cursor_
];
}
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inlinks
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inlinks
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
type
()
==
Node
::
Type
::
kValue
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outlinks
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
type
()
==
Node
::
Type
::
kValue
)
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph.h
浏览文件 @
b42ced8e
...
...
@@ -133,7 +133,7 @@ struct GraphTraits<DataFlowGraph> {
private:
std
::
vector
<
Node
*>
sorted_
;
in
t
cursor_
{
0
};
size_
t
cursor_
{
0
};
};
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
...
...
@@ -173,36 +173,8 @@ struct GraphTraits<DataFlowGraph> {
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
static
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inlinks
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inlinks
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
type
()
==
Node
::
Type
::
kValue
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outlinks
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
type
()
==
Node
::
Type
::
kValue
)
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
);
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
b42ced8e
...
...
@@ -22,14 +22,18 @@
namespace
paddle
{
namespace
inference
{
DEFINE_int32
(
tensorrt_max_batchsize
,
300
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
2048
,
"TensorRT workspace size"
);
namespace
analysis
{
using
framework
::
proto
::
ProgramDesc
;
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
);
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
);
bool
DataFlowGraphToFluidPass
::
Initialize
(
Argument
*
argument
)
{
bool
DataFlowGraphToFluidPass
::
Initialize
(
Argument
*
argument
)
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
)
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
)
PADDLE_ENFORCE
(
!
argument
->
transformed_program_desc
);
...
...
@@ -47,32 +51,34 @@ bool DataFlowGraphToFluidPass::Initialize(Argument* argument) {
bool
DataFlowGraphToFluidPass
::
Finalize
()
{
return
true
;
}
void
DataFlowGraphToFluidPass
::
Run
(
DataFlowGraph
*
graph
)
{
auto
traits
=
GraphTraits
<
DataFlowGraph
>
(
graph
);
for
(
auto
it
=
traits
.
nodes
().
begin
();
it
!=
traits
.
nodes
().
end
();
++
it
)
{
if
(
it
->
deleted
())
continue
;
void
DataFlowGraphToFluidPass
::
Run
(
DataFlowGraph
*
graph
)
{
LOG
(
INFO
)
<<
"graph.inputs "
<<
graph
->
inputs
.
size
(
);
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
graph
).
nodes_in_TS
()
)
{
if
(
node
.
deleted
())
continue
;
switch
(
it
->
type
())
{
switch
(
node
.
type
())
{
case
Node
::
Type
::
kFunction
:
{
LOG
(
INFO
)
<<
"add function "
<<
it
->
repr
();
AddFluidOp
(
&
(
*
it
)
);
LOG
(
INFO
)
<<
"add function "
<<
node
.
repr
();
AddFluidOp
(
&
node
);
}
break
;
case
Node
::
Type
::
kFunctionBlock
:
{
LOG
(
INFO
)
<<
"add engine op "
<<
it
->
repr
()
<<
" , "
<<
static_cast
<
FunctionBlock
*>
(
&
(
*
it
)
)
->
subgraph
.
size
();
AddEngineOp
(
&
(
*
it
)
);
LOG
(
INFO
)
<<
"add engine op "
<<
node
.
repr
()
<<
" , "
<<
static_cast
<
FunctionBlock
*>
(
&
node
)
->
subgraph
.
size
();
AddEngineOp
(
&
node
);
}
break
;
default:
continue
;
}
}
PADDLE_ENFORCE
(
argument_
->
transformed_program_desc
.
get
());
}
void
DataFlowGraphToFluidPass
::
AddFluidOp
(
Node
*
node
)
{
auto
*
ori_op
=
static_cast
<
framework
::
proto
::
OpDesc
*>
(
node
->
pb_desc
());
void
DataFlowGraphToFluidPass
::
AddFluidOp
(
Node
*
node
)
{
auto
*
ori_op
=
static_cast
<
framework
::
proto
::
OpDesc
*>
(
node
->
pb_desc
());
// currently only the main block is analyzed.
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
*
op
=
*
ori_op
;
// copy the attributes, by default, these will not be changed
// by analysis phrase.
// The inputs and outputs of the existing ops are not changed by tensorrt
...
...
@@ -80,43 +86,42 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node* node) {
// NOTE It might be changed by other passes in the long run.
}
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
const
framework
::
proto
::
BlockDesc
&
block
)
{
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
const
framework
::
proto
::
BlockDesc
&
block
)
{
static
int
counter
{
0
};
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
framework
::
OpDesc
desc
;
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
// collect inputs
std
::
vector
<
std
::
string
>
io
;
for
(
auto
*
x
:
func
->
inlinks
)
{
for
(
auto
*
x
:
func
->
inlinks
)
{
io
.
push_back
(
x
->
name
());
}
desc
.
SetInput
(
"Xs"
,
io
);
// collect outputs
io
.
clear
();
for
(
auto
*
x
:
func
->
outlinks
)
{
for
(
auto
*
x
:
func
->
outlinks
)
{
io
.
push_back
(
x
->
name
());
}
desc
.
SetOutput
(
"Ys"
,
io
);
desc
.
SetType
(
"tensorrt_engine"
);
PADDLE_ENFORCE
(
!
block
.
vars
().
empty
(),
"the block has no var-desc"
);
// Set attrs
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
.
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"engine_unique_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
100
);
// TODO(Superjomn) add config latter
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
1024
);
// TODO(Superjomn) add config latter
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
FLAGS_tensorrt_max_batchsize
);
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
FLAGS_tensorrt_workspace_size
);
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
());
}
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
)
{
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
)
{
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsValue
())
continue
;
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"pb_msg should be set first"
);
framework
::
proto
::
VarDesc
var
;
...
...
@@ -128,21 +133,30 @@ std::vector<std::string> ExtractParameters(
return
parameters
;
}
void
DataFlowGraphToFluidPass
::
AddEngineOp
(
Node
*
node
)
{
void
DataFlowGraphToFluidPass
::
AddEngineOp
(
Node
*
node
)
{
// TODO(Superjomn) Here need to expose some arguments for default setting.
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
node
);
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
node
);
framework
::
proto
::
BlockDesc
proto
;
framework
::
BlockDesc
block_desc
(
nullptr
,
&
proto
);
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
LOG
(
INFO
)
<<
"origin variable size: "
<<
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
().
size
();
LOG
(
INFO
)
<<
"transformed variable size: "
<<
block_desc
.
Proto
()
->
vars
().
size
();
// copy ops.
for
(
auto
*
node
:
block_node
->
subgraph
)
{
auto
*
op
=
block_desc
.
AppendOp
();
for
(
auto
*
node
:
block_node
->
subgraph
)
{
auto
*
op
=
block_desc
.
AppendOp
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
());
op
->
Proto
()
->
ParseFromString
(
node
->
pb_msg
());
}
*
block_desc
.
Proto
()
->
mutable_vars
()
=
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
*
block_desc
.
Proto
());
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
op
->
ParseFromString
(
node
->
pb_msg
());
}
...
...
@@ -151,7 +165,7 @@ namespace {
class
DFG_DebuggerPass
:
public
DFG_GraphvizDrawPass
{
public:
using
Config
=
DFG_GraphvizDrawPass
::
Config
;
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
:
DFG_GraphvizDrawPass
(
config
)
{}
std
::
string
repr
()
const
override
{
return
"dfg-to-fluid-debuger-pass"
;
}
...
...
@@ -160,7 +174,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
};
}
// namespace
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_inference_analysis_graphviz_log_root
,
"data_flow_graph_to_fluid_graphviz_debugger"
));
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
浏览文件 @
b42ced8e
...
...
@@ -26,6 +26,10 @@
namespace
paddle
{
namespace
inference
{
DECLARE_int32
(
tensorrt_max_batchsize
);
DECLARE_int32
(
tensorrt_workspace_size
);
namespace
analysis
{
class
DataFlowGraphToFluidPass
final
:
public
DataFlowGraphPass
{
public:
...
...
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
浏览文件 @
b42ced8e
...
...
@@ -40,7 +40,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
no
++
;
}
// DFG is sensitive to ProgramDesc, be careful to change the existing models.
ASSERT_EQ
(
no
,
8
2
);
ASSERT_EQ
(
no
,
8
3
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
浏览文件 @
b42ced8e
...
...
@@ -28,7 +28,6 @@ bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
);
PADDLE_ENFORCE
(
argument
);
if
(
!
argument
->
main_dfg
)
{
LOG
(
INFO
)
<<
"Init DFG"
;
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
}
desc_
=
argument
->
origin_program_desc
.
get
();
...
...
@@ -51,6 +50,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
v
->
SetPbMsg
(
var
.
SerializeAsString
());
var2id
[
var
.
name
()]
=
v
->
id
();
}
for
(
int
i
=
0
;
i
<
main_block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
main_block
.
ops
(
i
);
auto
*
o
=
graph
->
nodes
.
Create
(
Node
::
Type
::
kFunction
);
...
...
@@ -62,19 +62,31 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
o
->
SetPbMsg
(
op
.
SerializeAsString
());
// set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name.
std
::
unordered_set
<
Node
*>
inlinks
;
for
(
int
j
=
0
;
j
<
op
.
inputs_size
();
j
++
)
{
auto
&
in_var
=
op
.
inputs
(
j
);
for
(
int
k
=
0
;
k
<
in_var
.
arguments_size
();
k
++
)
{
auto
*
in
=
graph
->
nodes
.
GetMutable
(
var2id
.
at
(
in_var
.
arguments
(
k
)));
in
->
outlinks
.
push_back
(
o
);
o
->
inlinks
.
push_back
(
in
);
inlinks
.
insert
(
in
);
}
}
for
(
int
j
=
0
;
j
<
op
.
outputs_size
();
j
++
)
{
auto
&
out_var
=
op
.
outputs
(
j
);
for
(
int
k
=
0
;
k
<
out_var
.
arguments_size
();
k
++
)
{
auto
*
out
=
graph
->
nodes
.
GetMutable
(
var2id
[
out_var
.
arguments
(
k
)]);
if
(
inlinks
.
count
(
out
))
{
// Loop found, for example, a = op(a), use SSA, change to a1 = op(a).
auto
*
out_alias
=
graph
->
nodes
.
Create
(
Node
::
Type
::
kValue
);
out_alias
->
SetName
(
out
->
name
());
out_alias
->
SetPbDesc
(
out
->
pb_desc
());
out_alias
->
SetPbMsg
(
out
->
pb_msg
());
var2id
[
out_alias
->
name
()]
=
out_alias
->
id
();
// update a -> a0
LOG
(
INFO
)
<<
"loop found in graph, create SSA alias node ["
<<
out_alias
->
repr
()
<<
"] for ["
<<
out
->
repr
()
<<
"]"
;
out
=
out_alias
;
}
out
->
inlinks
.
push_back
(
o
);
o
->
outlinks
.
push_back
(
out
);
}
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
浏览文件 @
b42ced8e
...
...
@@ -24,12 +24,12 @@ namespace analysis {
TEST_F
(
DFG_Tester
,
Init
)
{
FluidToDataFlowGraphPass
pass
;
pass
.
Initialize
(
&
argument
);
DataFlowGraph
graph
;
pass
.
Run
(
&
graph
);
pass
.
Run
(
argument
.
main_dfg
.
get
());
// Analysis is sensitive to ProgramDesc, careful to change the original model.
ASSERT_EQ
(
graph
.
nodes
.
size
(),
37
UL
);
ASSERT_EQ
(
argument
.
main_dfg
->
nodes
.
size
(),
38
UL
);
pass
.
Finalize
();
LOG
(
INFO
)
<<
'\n'
<<
graph
.
DotString
();
ASSERT_FALSE
(
argument
.
main_dfg
->
DotString
().
empty
());
EXPECT_FALSE
(
argument
.
main_dfg
->
inputs
.
empty
());
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
浏览文件 @
b42ced8e
...
...
@@ -25,6 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
void
TensorRTSubGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
)();
VLOG
(
4
)
<<
"debug info "
<<
graph
->
HumanReadableInfo
(
false
/*show_values*/
,
true
/*show_functions*/
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
b42ced8e
...
...
@@ -82,7 +82,7 @@ inference_api_test(test_api_impl
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_
fluid_api
)
DEPS paddle_inference_api analysis tensorrt_engine paddle_
inference_api paddle_fluid_api tensorrt_converter
)
inference_api_test
(
test_api_tensorrt_subgraph_engine ARGS test_word2vec
)
endif
()
...
...
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
b42ced8e
...
...
@@ -39,7 +39,7 @@ bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
bool
PaddleInferenceAnakinPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
for
(
const
auto
&
input
:
inputs
)
{
if
(
input
.
dtype
!=
PaddleDType
::
FLOAT32
)
{
LOG
(
ERROR
)
<<
"Only support float type inputs. "
<<
input
.
name
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
b42ced8e
...
...
@@ -37,7 +37,8 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
// should be allocated first.
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
override
;
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
b42ced8e
...
...
@@ -108,7 +108,8 @@ NativePaddlePredictor::~NativePaddlePredictor() {
}
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
VLOG
(
3
)
<<
"Predictor::predict"
;
Timer
timer
;
timer
.
tic
();
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
b42ced8e
...
...
@@ -38,7 +38,8 @@ class NativePaddlePredictor : public PaddlePredictor {
bool
Init
(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
);
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
override
;
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
...
...
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
浏览文件 @
b42ced8e
...
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
...
...
@@ -64,16 +65,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
return
false
;
}
// Analyze inference_program
Argument
argument
;
argument
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Singleton
<
Analyzer
>::
Global
().
Run
(
&
argument
);
CHECK
(
argument
.
transformed_program_desc
);
VLOG
(
5
)
<<
"transformed program:
\n
"
<<
argument
.
transformed_program_desc
->
SerializeAsString
();
VLOG
(
5
)
<<
"to prepare executor"
;
*
inference_program_
->
Proto
()
=
*
argument
.
transformed_program_desc
;
OptimizeInferenceProgram
();
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
5
)
<<
"to create variables"
;
...
...
@@ -86,6 +78,29 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
return
true
;
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
{
PADDLE_ENFORCE_GT
(
batch_size
,
0
,
"TensorRT engine needs the argument batch_size set"
);
FLAGS_tensorrt_engine_batch_size
=
batch_size
;
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
void
OptimizeInferenceProgram
()
{
// Analyze inference_program
Argument
argument
;
argument
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Singleton
<
Analyzer
>::
Global
().
Run
(
&
argument
);
CHECK
(
argument
.
transformed_program_desc
);
VLOG
(
5
)
<<
"transformed program:
\n
"
<<
argument
.
transformed_program_desc
->
SerializeAsString
();
VLOG
(
5
)
<<
"to prepare executor"
;
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument
.
transformed_program_desc
));
}
private:
TensorRTConfig
config_
;
};
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
b42ced8e
...
...
@@ -98,7 +98,8 @@ class PaddlePredictor {
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
=
0
;
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
...
...
paddle/fluid/inference/api/test_api.cc
浏览文件 @
b42ced8e
...
...
@@ -35,7 +35,8 @@ class DemoPredictor : public PaddlePredictor {
LOG
(
INFO
)
<<
"I get other_config "
<<
config
.
other_config
;
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
override
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
0
)
override
{
LOG
(
INFO
)
<<
"Run"
;
return
false
;
}
...
...
paddle/fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
浏览文件 @
b42ced8e
...
...
@@ -15,50 +15,79 @@
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
Main
(
bool
use_gpu
)
{
void
CompareTensorRTWithFluid
(
bool
enable_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
enable_tensorrt
;
//# 1. Create PaddlePredictor with a config.
TensorRTConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
auto
predictor
=
NativeConfig
config0
;
config0
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config0
.
use_gpu
=
true
;
config0
.
fraction_of_gpu_memory
=
0.3
;
config0
.
device
=
0
;
TensorRTConfig
config1
;
config1
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config1
.
use_gpu
=
true
;
config1
.
fraction_of_gpu_memory
=
0.3
;
config1
.
device
=
0
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
<
TensorRTConfig
,
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
config
);
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
config
1
);
for
(
int
batch_id
=
0
;
batch_id
<
3
;
batch_id
++
)
{
for
(
int
batch_id
=
0
;
batch_id
<
1
;
batch_id
++
)
{
//# 2. Prepare input.
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
std
::
vector
<
int64_t
>
data
(
20
);
for
(
int
i
=
0
;
i
<
20
;
i
++
)
data
[
i
]
=
i
;
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
)),
PaddleTensor
tensor
{
.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
10
,
1
}),
.
data
=
PaddleBuf
(
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
)),
.
dtype
=
PaddleDType
::
INT64
};
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
slots
(
4
,
tensor
);
//# 3. Run
std
::
vector
<
PaddleTensor
>
outputs
;
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
slots
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
slots
,
&
outputs1
,
10
));
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
ASSERT_EQ
(
outputs0
.
size
(),
1UL
);
ASSERT_EQ
(
outputs1
.
size
(),
1UL
);
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
}
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
main
)
{
Main
(
true
);
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
without_tensorrt
)
{
CompareTensorRTWithFluid
(
false
);
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
with_tensorrt
)
{
CompareTensorRTWithFluid
(
true
);
}
}
// namespace paddle
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
b42ced8e
...
...
@@ -93,6 +93,10 @@ class OpConverter {
framework
::
Scope
*
scope_
{
nullptr
};
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
struct trt_##op_type__##_converter : public ::paddle::framework::Registrar { \
trt_##op_type__##_converter() { \
...
...
@@ -111,7 +115,3 @@ class OpConverter {
extern int TouchConverterRegister_##op_type__(); \
static int use_op_converter_trt_##op_type__ __attribute__((unused)) = \
TouchConverterRegister_##op_type__();
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
b42ced8e
...
...
@@ -26,18 +26,20 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
PADDLE_ENFORCE
(
false
,
"not implemented"
);
}
void
TensorRTEngine
::
Execute
(
int
batch_size
)
{
std
::
vector
<
void
*>
buffers
;
for
(
auto
&
buf
:
buffers_
)
{
batch_size_
=
batch_size
;
std
::
vector
<
void
*>
buffers
;
for
(
auto
&
buf
:
buffers_
)
{
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated"
);
PADDLE_ENFORCE_GT
(
buf
.
max_size
,
0
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
buffers
.
push_back
(
buf
.
buffer
);
}
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
cudaStreamSynchronize
(
*
stream_
);
}
...
...
@@ -45,7 +47,7 @@ void TensorRTEngine::Execute(int batch_size) {
TensorRTEngine
::~
TensorRTEngine
()
{
cudaStreamSynchronize
(
*
stream_
);
// clean buffer
for
(
auto
&
buf
:
buffers_
)
{
for
(
auto
&
buf
:
buffers_
)
{
if
(
buf
.
device
==
DeviceType
::
GPU
&&
buf
.
buffer
!=
nullptr
)
{
PADDLE_ENFORCE_EQ
(
0
,
cudaFree
(
buf
.
buffer
));
buf
.
buffer
=
nullptr
;
...
...
@@ -70,32 +72,37 @@ void TensorRTEngine::FreezeNetwork() {
// allocate GPU buffers.
buffers_
.
resize
(
buffer_sizes_
.
size
());
for
(
auto
&
item
:
buffer_sizes_
)
{
for
(
auto
&
item
:
buffer_sizes_
)
{
// The output buffers are not set in the network building phrase, need to
// infer from the TesorRT network.
if
(
item
.
second
==
0
)
{
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
item
.
first
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
}
auto
&
buf
=
buffer
(
item
.
first
);
auto
&
buf
=
buffer
(
item
.
first
);
buf
.
max_size
=
item
.
second
*
max_batch_
;
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
item
.
second
));
VLOG
(
4
)
<<
"buffer malloc "
<<
item
.
first
<<
" "
<<
item
.
second
<<
" "
<<
buf
.
buffer
;
buf
.
size
=
buf
.
max_size
=
item
.
second
;
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
buf
.
max_size
));
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
// buf.size will changed in the runtime.
buf
.
size
=
0
;
buf
.
device
=
DeviceType
::
GPU
;
}
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
DeclareInput
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
TensorRTEngine
::
DeclareInput
(
const
std
::
string
&
name
,
nvinfer1
::
DataType
dtype
,
const
nvinfer1
::
Dims
&
dims
)
{
const
nvinfer1
::
Dims
&
dims
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate input name %s"
,
name
);
PADDLE_ENFORCE
(
infer_network_
!=
nullptr
,
"should initnetwork first"
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
...
...
@@ -104,12 +111,12 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
return
input
;
}
void
TensorRTEngine
::
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
)
{
void
TensorRTEngine
::
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
auto
*
output
=
layer
->
getOutput
(
offset
);
auto
*
output
=
layer
->
getOutput
(
offset
);
SetITensor
(
name
,
output
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
output
->
setName
(
name
.
c_str
());
...
...
@@ -121,11 +128,11 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
buffer_sizes_
[
name
]
=
0
;
}
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
output
->
setName
(
name
.
c_str
());
PADDLE_ENFORCE
(
!
output
->
isNetworkInput
());
...
...
@@ -135,38 +142,45 @@ void TensorRTEngine::DeclareOutput(const std::string& name) {
buffer_sizes_
[
name
]
=
0
;
}
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
return
buffer
(
name
).
buffer
;
}
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
// determine data size
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
cudaMemcpyDeviceToDevice
,
*
stream_
),
0
);
}
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
VLOG
(
4
)
<<
"get output in cpu"
;
auto
&
buf
=
buffer
(
name
);
// Update needed buffer size.
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
name
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
buf
.
size
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_LE
(
buf
.
size
,
buf
.
max_size
);
// determine data size
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
cudaMemcpyDeviceToHost
,
*
stream_
));
// DEBUG
memset
(
dst
,
0
,
buf
.
size
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpy
(
dst
,
buf
.
buffer
,
buf
.
size
,
cudaMemcpyDeviceToHost
));
}
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"call FreezeNetwork first."
);
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
...
...
@@ -174,19 +188,23 @@ Buffer& TensorRTEngine::buffer(const std::string& name) {
return
buffers_
[
slot_offset
];
}
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
const
void
*
data
,
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
const
void
*
data
,
size_t
size
)
{
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_NOT_NULL
(
data
);
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
buf
.
size
=
size
;
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
buf
.
buffer
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
}
void
TensorRTEngine
::
SetInputFromGPU
(
const
std
::
string
&
name
,
const
void
*
data
,
void
TensorRTEngine
::
SetInputFromGPU
(
const
std
::
string
&
name
,
const
void
*
data
,
size_t
size
)
{
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
buf
.
size
=
size
;
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
...
...
@@ -194,15 +212,15 @@ void TensorRTEngine::SetInputFromGPU(const std::string& name, const void* data,
cudaMemcpyDeviceToDevice
,
*
stream_
));
}
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
)
{
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
)
{
PADDLE_ENFORCE
(
tensor
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
"duplicate ITensor name %s"
,
name
);
itensor_map_
[
name
]
=
tensor
;
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
itensor_map_
.
count
(
name
),
"no ITensor %s"
,
name
);
return
itensor_map_
[
name
];
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
b42ced8e
...
...
@@ -57,7 +57,9 @@ class TensorRTEngine : public EngineBase {
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
stream_
(
stream
?
stream
:
&
default_stream_
),
logger_
(
logger
)
{}
logger_
(
logger
)
{
cudaStreamCreate
(
&
default_stream_
);
}
virtual
~
TensorRTEngine
();
...
...
@@ -121,6 +123,9 @@ class TensorRTEngine : public EngineBase {
int
max_batch_
;
// the max memory size the engine uses
int
max_workspace_
;
// batch size of the current data, will be updated each Executation.
int
batch_size_
{
-
1
};
cudaStream_t
*
stream_
;
// If stream_ is not set from outside, hold its own stream.
cudaStream_t
default_stream_
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
b42ced8e
...
...
@@ -103,6 +103,10 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
b42ced8e
...
...
@@ -168,6 +168,8 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP(relu);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"fake_dequantize"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(fake_dequantize_max_abs);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"tensorrt_engine_op"
)
message
(
STATUS
"Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
)
else
()
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
endif
()
...
...
@@ -237,9 +239,9 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library
(
softmax_op DEPS softmax
)
op_library
(
sequence_softmax_op DEPS softmax
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
op_library
(
tensorrt_engine_op DEPS tensorrt_engine
)
op_library
(
tensorrt_engine_op DEPS tensorrt_engine
tensorrt_converter
)
nv_test
(
test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op
tensorrt_engine tensorrt_converter
DEPS tensorrt_engine_op
analysis
)
else
()
set
(
DEPS_OPS
${
DEPS_OPS
}
tensorrt_engine_op
)
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
b42ced8e
...
...
@@ -24,6 +24,9 @@
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
DEFINE_int32
(
tensorrt_engine_batch_size
,
1
,
"the batch_size of TensorRT"
);
namespace
operators
{
using
inference
::
Singleton
;
...
...
@@ -52,7 +55,6 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
case
2
:
return
nvinfer1
::
Dims2
(
shape
[
0
],
shape
[
1
]);
...
...
@@ -90,27 +92,36 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
engine
->
InitNetwork
();
framework
::
BlockDesc
block
(
nullptr
/*programdesc*/
,
&
block_desc
);
VLOG
(
4
)
<<
"parsed var size "
<<
block
.
AllVars
().
size
();
// Add inputs
VLOG
(
4
)
<<
"declare inputs"
;
for
(
auto
&
input
:
context
.
Inputs
(
"Xs"
))
{
VLOG
(
4
)
<<
"declare input "
<<
input
;
auto
*
var
=
block
.
FindVar
(
input
);
// TensorRT engine need to create parameters. The parameter's description
// should be set in
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
PADDLE_ENFORCE_EQ
(
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
"TensorRT engine only takes LoDTensor as input"
);
auto
shape
=
var
->
GetShape
();
// For the special batch_size placeholder -1, drop it and pass the real
// shape of data.
// TODO(Superjomn) fix this with batch broadcast, or it can't handle
// variational batch size.
if
(
shape
[
0
]
==
-
1
)
{
shape
[
0
]
=
FLAGS_tensorrt_engine_batch_size
;
}
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var
->
GetShape
()
));
Vec2TRT_Dims
(
shape
));
}
inference
::
Singleton
<
inference
::
tensorrt
::
OpConverter
>::
Global
().
ConvertBlock
(
block_desc
,
parameters
,
context
.
scope
(),
engine
);
// Add outputs
VLOG
(
4
)
<<
"declare outputs"
;
for
(
auto
&
output
:
context
.
Outputs
(
"Ys"
))
{
VLOG
(
4
)
<<
"declare output "
<<
output
;
engine
->
DeclareOutput
(
output
);
}
...
...
@@ -151,4 +162,7 @@ REGISTER_OP_CPU_KERNEL(
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
// A trick to compile with the needed TensorRT op converter.
USE_TRT_CONVERTER
(
mul
)
#endif // PADDLE_WITH_CUDA
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
b42ced8e
...
...
@@ -24,6 +24,9 @@
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
DECLARE_int32
(
tensorrt_engine_batch_size
);
namespace
operators
{
using
inference
::
Singleton
;
...
...
@@ -53,7 +56,6 @@ template <typename DeviceContext, typename T>
class
TensorRTEngineKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
VLOG
(
4
)
<<
"TensorRTEngineKernel executing"
;
auto
engine_name
=
context
.
Attr
<
std
::
string
>
(
"engine_uniq_key"
);
if
(
!
Singleton
<
TRT_EngineManager
>::
Global
().
HasEngine
(
engine_name
))
{
Prepare
(
context
);
...
...
@@ -61,11 +63,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Get
(
engine_name
);
auto
input_names
=
context
.
op
().
Inputs
(
"Xs"
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"should pass more than one inputs"
);
// Try to determine a batch_size
auto
&
tensor0
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
context
.
scope
(),
input_names
.
front
());
int
batch_size
=
tensor0
.
dims
()[
0
];
PADDLE_ENFORCE_LE
(
batch_size
,
context
.
Attr
<
int
>
(
"max_batch"
));
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
context
.
Attr
<
int
>
(
"max_batch"
));
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
context
.
Inputs
(
"Xs"
))
{
...
...
@@ -81,8 +80,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
}
}
// Execute the engine.
PADDLE_ENFORCE_GT
(
batch_size
,
0
);
engine
->
Execute
(
batch_size
);
PADDLE_ENFORCE_GT
(
FLAGS_tensorrt_engine_
batch_size
,
0
);
engine
->
Execute
(
FLAGS_tensorrt_engine_
batch_size
);
// Convert output tensor from engine to fluid
for
(
const
auto
&
y
:
context
.
Outputs
(
"Ys"
))
{
// convert output and copy to fluid.
...
...
@@ -94,18 +93,21 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
if
(
platform
::
is_cpu_place
(
fluid_t
->
place
()))
{
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
// TODO(Superjomn) find some way to determine which device to output the
// tensor.
// if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size.
engine
->
GetOutputInCPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
}
else
{
engine
->
GetOutputInGPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CUDAPlace
()),
engine
->
GetOutputInCPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
}
//} else {
// engine->GetOutputInGPU(
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
// size * sizeof(float));
//}
}
cudaStreamSynchronize
(
*
engine
->
stream
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录