Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
b42ced8e
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b42ced8e
编写于
7月 20, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
7月 20, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bugfix/tensorrt analysis fix subgraph trigger (#12266)
上级
c5c17a14
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
342 addition
and
188 deletion
+342
-188
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+3
-2
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+3
-2
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+9
-1
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+45
-0
paddle/fluid/inference/analysis/data_flow_graph.h
paddle/fluid/inference/analysis/data_flow_graph.h
+3
-31
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+52
-38
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
.../fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+4
-0
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
...fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
+1
-1
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
...fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
+14
-2
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
...nference/analysis/fluid_to_data_flow_graph_pass_tester.cc
+4
-4
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+3
-0
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-1
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+1
-1
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+2
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-1
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+2
-1
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+25
-10
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+2
-1
paddle/fluid/inference/api/test_api.cc
paddle/fluid/inference/api/test_api.cc
+2
-1
paddle/fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
.../fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
+52
-23
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+4
-4
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+55
-37
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+6
-1
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+4
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+4
-2
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+18
-4
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+21
-19
未找到文件。
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
b42ced8e
...
@@ -22,8 +22,6 @@
...
@@ -22,8 +22,6 @@
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
DEFINE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
,
false
,
DEFINE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
,
false
,
"Enable subgraph to TensorRT engine for acceleration"
);
"Enable subgraph to TensorRT engine for acceleration"
);
...
@@ -31,6 +29,9 @@ DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
...
@@ -31,6 +29,9 @@ DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
DEFINE_string
(
inference_analysis_graphviz_log_root
,
"./"
,
DEFINE_string
(
inference_analysis_graphviz_log_root
,
"./"
,
"Graphviz debuger for data flow graphs."
);
"Graphviz debuger for data flow graphs."
);
namespace
inference
{
namespace
analysis
{
class
DfgPassManagerImpl
final
:
public
DfgPassManager
{
class
DfgPassManagerImpl
final
:
public
DfgPassManager
{
public:
public:
DfgPassManagerImpl
()
{
DfgPassManagerImpl
()
{
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
b42ced8e
...
@@ -45,14 +45,15 @@ limitations under the License. */
...
@@ -45,14 +45,15 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
// flag if not available.
DECLARE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
);
DECLARE_bool
(
inference_analysis_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
inference_analysis_graphviz_log_root
);
DECLARE_string
(
inference_analysis_graphviz_log_root
);
namespace
inference
{
namespace
analysis
{
class
Analyzer
:
public
OrderedRegistry
<
PassManager
>
{
class
Analyzer
:
public
OrderedRegistry
<
PassManager
>
{
public:
public:
// Register all the pass-managers.
// Register all the pass-managers.
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
b42ced8e
...
@@ -13,13 +13,21 @@
...
@@ -13,13 +13,21 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
TEST_F
(
DFG_Tester
,
main
)
{
TEST_F
(
DFG_Tester
,
analysis_without_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
false
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
}
TEST_F
(
DFG_Tester
,
analysis_with_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
true
;
Analyzer
analyser
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
analyser
.
Run
(
&
argument
);
}
}
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
浏览文件 @
b42ced8e
...
@@ -222,10 +222,19 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
...
@@ -222,10 +222,19 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return
stack_
.
top
();
return
stack_
.
top
();
}
}
inline
bool
CheckNodeIndegreeEquals
(
const
Node
&
node
,
size_t
n
)
{
return
node
.
inlinks
.
size
()
==
n
;
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
...
@@ -233,6 +242,11 @@ GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
...
@@ -233,6 +242,11 @@ GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
while
(
!
to_visit
.
empty
())
{
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
for
(
auto
*
p
:
queue
)
{
if
(
p
->
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
continue
;
}
inlink_visited
.
clear
();
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
...
@@ -292,6 +306,37 @@ Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
...
@@ -292,6 +306,37 @@ Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
return
sorted_
[
cursor_
];
return
sorted_
[
cursor_
];
}
}
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inlinks
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inlinks
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
type
()
==
Node
::
Type
::
kValue
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outlinks
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
type
()
==
Node
::
Type
::
kValue
)
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph.h
浏览文件 @
b42ced8e
...
@@ -133,7 +133,7 @@ struct GraphTraits<DataFlowGraph> {
...
@@ -133,7 +133,7 @@ struct GraphTraits<DataFlowGraph> {
private:
private:
std
::
vector
<
Node
*>
sorted_
;
std
::
vector
<
Node
*>
sorted_
;
in
t
cursor_
{
0
};
size_
t
cursor_
{
0
};
};
};
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
...
@@ -173,36 +173,8 @@ struct GraphTraits<DataFlowGraph> {
...
@@ -173,36 +173,8 @@ struct GraphTraits<DataFlowGraph> {
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
// sub-graph.
static
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
);
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inlinks
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inlinks
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
type
()
==
Node
::
Type
::
kValue
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outlinks
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
type
()
==
Node
::
Type
::
kValue
)
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
b42ced8e
...
@@ -22,14 +22,18 @@
...
@@ -22,14 +22,18 @@
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
DEFINE_int32
(
tensorrt_max_batchsize
,
300
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
2048
,
"TensorRT workspace size"
);
namespace
analysis
{
namespace
analysis
{
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
proto
::
ProgramDesc
;
std
::
vector
<
std
::
string
>
ExtractParameters
(
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
);
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
);
bool
DataFlowGraphToFluidPass
::
Initialize
(
Argument
*
argument
)
{
bool
DataFlowGraphToFluidPass
::
Initialize
(
Argument
*
argument
)
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
)
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
)
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
)
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
)
PADDLE_ENFORCE
(
!
argument
->
transformed_program_desc
);
PADDLE_ENFORCE
(
!
argument
->
transformed_program_desc
);
...
@@ -47,76 +51,77 @@ bool DataFlowGraphToFluidPass::Initialize(Argument* argument) {
...
@@ -47,76 +51,77 @@ bool DataFlowGraphToFluidPass::Initialize(Argument* argument) {
bool
DataFlowGraphToFluidPass
::
Finalize
()
{
return
true
;
}
bool
DataFlowGraphToFluidPass
::
Finalize
()
{
return
true
;
}
void
DataFlowGraphToFluidPass
::
Run
(
DataFlowGraph
*
graph
)
{
void
DataFlowGraphToFluidPass
::
Run
(
DataFlowGraph
*
graph
)
{
auto
traits
=
GraphTraits
<
DataFlowGraph
>
(
graph
);
LOG
(
INFO
)
<<
"graph.inputs "
<<
graph
->
inputs
.
size
(
);
for
(
auto
it
=
traits
.
nodes
().
begin
();
it
!=
traits
.
nodes
().
end
();
++
it
)
{
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
graph
).
nodes_in_TS
()
)
{
if
(
it
->
deleted
())
continue
;
if
(
node
.
deleted
())
continue
;
switch
(
it
->
type
())
{
switch
(
node
.
type
())
{
case
Node
::
Type
::
kFunction
:
{
case
Node
::
Type
::
kFunction
:
{
LOG
(
INFO
)
<<
"add function "
<<
it
->
repr
();
LOG
(
INFO
)
<<
"add function "
<<
node
.
repr
();
AddFluidOp
(
&
(
*
it
)
);
AddFluidOp
(
&
node
);
}
break
;
}
break
;
case
Node
::
Type
::
kFunctionBlock
:
{
case
Node
::
Type
::
kFunctionBlock
:
{
LOG
(
INFO
)
<<
"add engine op "
<<
it
->
repr
()
<<
" , "
LOG
(
INFO
)
<<
"add engine op "
<<
node
.
repr
()
<<
" , "
<<
static_cast
<
FunctionBlock
*>
(
&
(
*
it
)
)
->
subgraph
.
size
();
<<
static_cast
<
FunctionBlock
*>
(
&
node
)
->
subgraph
.
size
();
AddEngineOp
(
&
(
*
it
)
);
AddEngineOp
(
&
node
);
}
break
;
}
break
;
default:
default:
continue
;
continue
;
}
}
}
}
PADDLE_ENFORCE
(
argument_
->
transformed_program_desc
.
get
());
}
}
void
DataFlowGraphToFluidPass
::
AddFluidOp
(
Node
*
node
)
{
void
DataFlowGraphToFluidPass
::
AddFluidOp
(
Node
*
node
)
{
auto
*
ori_op
=
static_cast
<
framework
::
proto
::
OpDesc
*>
(
node
->
pb_desc
());
auto
*
ori_op
=
static_cast
<
framework
::
proto
::
OpDesc
*>
(
node
->
pb_desc
());
// currently only the main block is analyzed.
// currently only the main block is analyzed.
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
auto
*
op
=
main_block
->
add_ops
();
*
op
=
*
ori_op
;
// copy the attributes, by default, these will not be changed
*
op
=
*
ori_op
;
// copy the attributes, by default, these will not be changed
// by analysis phrase.
// by analysis phrase.
// The inputs and outputs of the existing ops are not changed by tensorrt
// The inputs and outputs of the existing ops are not changed by tensorrt
// subgraph pass.
// subgraph pass.
// NOTE It might be changed by other passes in the long run.
// NOTE It might be changed by other passes in the long run.
}
}
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
void
CreateTrtEngineOp
(
Node
*
node
,
const
DataFlowGraph
&
graph
,
const
framework
::
proto
::
BlockDesc
&
block
)
{
const
framework
::
proto
::
BlockDesc
&
block
)
{
static
int
counter
{
0
};
static
int
counter
{
0
};
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
// collect inputs
// collect inputs
std
::
vector
<
std
::
string
>
io
;
std
::
vector
<
std
::
string
>
io
;
for
(
auto
*
x
:
func
->
inlinks
)
{
for
(
auto
*
x
:
func
->
inlinks
)
{
io
.
push_back
(
x
->
name
());
io
.
push_back
(
x
->
name
());
}
}
desc
.
SetInput
(
"Xs"
,
io
);
desc
.
SetInput
(
"Xs"
,
io
);
// collect outputs
// collect outputs
io
.
clear
();
io
.
clear
();
for
(
auto
*
x
:
func
->
outlinks
)
{
for
(
auto
*
x
:
func
->
outlinks
)
{
io
.
push_back
(
x
->
name
());
io
.
push_back
(
x
->
name
());
}
}
desc
.
SetOutput
(
"Ys"
,
io
);
desc
.
SetOutput
(
"Ys"
,
io
);
desc
.
SetType
(
"tensorrt_engine"
);
desc
.
SetType
(
"tensorrt_engine"
);
PADDLE_ENFORCE
(
!
block
.
vars
().
empty
(),
"the block has no var-desc"
);
// Set attrs
// Set attrs
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
.
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
.
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"engine_unique_key"
,
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
FLAGS_tensorrt_max_batchsize
);
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
100
);
// TODO(Superjomn) add config latter
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
FLAGS_tensorrt_workspace_size
);
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
1024
);
// TODO(Superjomn) add config latter
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
());
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
());
}
}
std
::
vector
<
std
::
string
>
ExtractParameters
(
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
)
{
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
)
{
std
::
vector
<
std
::
string
>
parameters
;
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsValue
())
continue
;
if
(
!
node
->
IsValue
())
continue
;
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"pb_msg should be set first"
);
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"pb_msg should be set first"
);
framework
::
proto
::
VarDesc
var
;
framework
::
proto
::
VarDesc
var
;
...
@@ -128,21 +133,30 @@ std::vector<std::string> ExtractParameters(
...
@@ -128,21 +133,30 @@ std::vector<std::string> ExtractParameters(
return
parameters
;
return
parameters
;
}
}
void
DataFlowGraphToFluidPass
::
AddEngineOp
(
Node
*
node
)
{
void
DataFlowGraphToFluidPass
::
AddEngineOp
(
Node
*
node
)
{
// TODO(Superjomn) Here need to expose some arguments for default setting.
// TODO(Superjomn) Here need to expose some arguments for default setting.
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
node
);
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
node
);
framework
::
proto
::
BlockDesc
proto
;
framework
::
proto
::
BlockDesc
proto
;
framework
::
BlockDesc
block_desc
(
nullptr
,
&
proto
);
framework
::
BlockDesc
block_desc
(
nullptr
,
&
proto
);
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
LOG
(
INFO
)
<<
"origin variable size: "
<<
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
().
size
();
LOG
(
INFO
)
<<
"transformed variable size: "
<<
block_desc
.
Proto
()
->
vars
().
size
();
// copy ops.
// copy ops.
for
(
auto
*
node
:
block_node
->
subgraph
)
{
for
(
auto
*
node
:
block_node
->
subgraph
)
{
auto
*
op
=
block_desc
.
AppendOp
();
auto
*
op
=
block_desc
.
AppendOp
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
());
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
());
op
->
Proto
()
->
ParseFromString
(
node
->
pb_msg
());
op
->
Proto
()
->
ParseFromString
(
node
->
pb_msg
());
}
}
*
block_desc
.
Proto
()
->
mutable_vars
()
=
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
*
block_desc
.
Proto
());
CreateTrtEngineOp
(
node
,
*
argument_
->
main_dfg
,
*
block_desc
.
Proto
());
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
auto
*
op
=
main_block
->
add_ops
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
op
->
ParseFromString
(
node
->
pb_msg
());
op
->
ParseFromString
(
node
->
pb_msg
());
}
}
...
@@ -151,7 +165,7 @@ namespace {
...
@@ -151,7 +165,7 @@ namespace {
class
DFG_DebuggerPass
:
public
DFG_GraphvizDrawPass
{
class
DFG_DebuggerPass
:
public
DFG_GraphvizDrawPass
{
public:
public:
using
Config
=
DFG_GraphvizDrawPass
::
Config
;
using
Config
=
DFG_GraphvizDrawPass
::
Config
;
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
:
DFG_GraphvizDrawPass
(
config
)
{}
:
DFG_GraphvizDrawPass
(
config
)
{}
std
::
string
repr
()
const
override
{
return
"dfg-to-fluid-debuger-pass"
;
}
std
::
string
repr
()
const
override
{
return
"dfg-to-fluid-debuger-pass"
;
}
...
@@ -160,7 +174,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
...
@@ -160,7 +174,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
};
};
}
// namespace
}
// namespace
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_inference_analysis_graphviz_log_root
,
FLAGS_inference_analysis_graphviz_log_root
,
"data_flow_graph_to_fluid_graphviz_debugger"
));
"data_flow_graph_to_fluid_graphviz_debugger"
));
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
浏览文件 @
b42ced8e
...
@@ -26,6 +26,10 @@
...
@@ -26,6 +26,10 @@
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
DECLARE_int32
(
tensorrt_max_batchsize
);
DECLARE_int32
(
tensorrt_workspace_size
);
namespace
analysis
{
namespace
analysis
{
class
DataFlowGraphToFluidPass
final
:
public
DataFlowGraphPass
{
class
DataFlowGraphToFluidPass
final
:
public
DataFlowGraphPass
{
public:
public:
...
...
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
浏览文件 @
b42ced8e
...
@@ -40,7 +40,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
...
@@ -40,7 +40,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
no
++
;
no
++
;
}
}
// DFG is sensitive to ProgramDesc, be careful to change the existing models.
// DFG is sensitive to ProgramDesc, be careful to change the existing models.
ASSERT_EQ
(
no
,
8
2
);
ASSERT_EQ
(
no
,
8
3
);
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
浏览文件 @
b42ced8e
...
@@ -28,7 +28,6 @@ bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
...
@@ -28,7 +28,6 @@ bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
);
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
);
PADDLE_ENFORCE
(
argument
);
PADDLE_ENFORCE
(
argument
);
if
(
!
argument
->
main_dfg
)
{
if
(
!
argument
->
main_dfg
)
{
LOG
(
INFO
)
<<
"Init DFG"
;
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
}
}
desc_
=
argument
->
origin_program_desc
.
get
();
desc_
=
argument
->
origin_program_desc
.
get
();
...
@@ -51,6 +50,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
...
@@ -51,6 +50,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
v
->
SetPbMsg
(
var
.
SerializeAsString
());
v
->
SetPbMsg
(
var
.
SerializeAsString
());
var2id
[
var
.
name
()]
=
v
->
id
();
var2id
[
var
.
name
()]
=
v
->
id
();
}
}
for
(
int
i
=
0
;
i
<
main_block
.
ops_size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
main_block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
main_block
.
ops
(
i
);
const
auto
&
op
=
main_block
.
ops
(
i
);
auto
*
o
=
graph
->
nodes
.
Create
(
Node
::
Type
::
kFunction
);
auto
*
o
=
graph
->
nodes
.
Create
(
Node
::
Type
::
kFunction
);
...
@@ -62,19 +62,31 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
...
@@ -62,19 +62,31 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
o
->
SetPbMsg
(
op
.
SerializeAsString
());
o
->
SetPbMsg
(
op
.
SerializeAsString
());
// set inputs and outputs
// set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name.
std
::
unordered_set
<
Node
*>
inlinks
;
for
(
int
j
=
0
;
j
<
op
.
inputs_size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
op
.
inputs_size
();
j
++
)
{
auto
&
in_var
=
op
.
inputs
(
j
);
auto
&
in_var
=
op
.
inputs
(
j
);
for
(
int
k
=
0
;
k
<
in_var
.
arguments_size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
in_var
.
arguments_size
();
k
++
)
{
auto
*
in
=
graph
->
nodes
.
GetMutable
(
var2id
.
at
(
in_var
.
arguments
(
k
)));
auto
*
in
=
graph
->
nodes
.
GetMutable
(
var2id
.
at
(
in_var
.
arguments
(
k
)));
in
->
outlinks
.
push_back
(
o
);
in
->
outlinks
.
push_back
(
o
);
o
->
inlinks
.
push_back
(
in
);
o
->
inlinks
.
push_back
(
in
);
inlinks
.
insert
(
in
);
}
}
}
}
for
(
int
j
=
0
;
j
<
op
.
outputs_size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
op
.
outputs_size
();
j
++
)
{
auto
&
out_var
=
op
.
outputs
(
j
);
auto
&
out_var
=
op
.
outputs
(
j
);
for
(
int
k
=
0
;
k
<
out_var
.
arguments_size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
out_var
.
arguments_size
();
k
++
)
{
auto
*
out
=
graph
->
nodes
.
GetMutable
(
var2id
[
out_var
.
arguments
(
k
)]);
auto
*
out
=
graph
->
nodes
.
GetMutable
(
var2id
[
out_var
.
arguments
(
k
)]);
if
(
inlinks
.
count
(
out
))
{
// Loop found, for example, a = op(a), use SSA, change to a1 = op(a).
auto
*
out_alias
=
graph
->
nodes
.
Create
(
Node
::
Type
::
kValue
);
out_alias
->
SetName
(
out
->
name
());
out_alias
->
SetPbDesc
(
out
->
pb_desc
());
out_alias
->
SetPbMsg
(
out
->
pb_msg
());
var2id
[
out_alias
->
name
()]
=
out_alias
->
id
();
// update a -> a0
LOG
(
INFO
)
<<
"loop found in graph, create SSA alias node ["
<<
out_alias
->
repr
()
<<
"] for ["
<<
out
->
repr
()
<<
"]"
;
out
=
out_alias
;
}
out
->
inlinks
.
push_back
(
o
);
out
->
inlinks
.
push_back
(
o
);
o
->
outlinks
.
push_back
(
out
);
o
->
outlinks
.
push_back
(
out
);
}
}
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
浏览文件 @
b42ced8e
...
@@ -24,12 +24,12 @@ namespace analysis {
...
@@ -24,12 +24,12 @@ namespace analysis {
TEST_F
(
DFG_Tester
,
Init
)
{
TEST_F
(
DFG_Tester
,
Init
)
{
FluidToDataFlowGraphPass
pass
;
FluidToDataFlowGraphPass
pass
;
pass
.
Initialize
(
&
argument
);
pass
.
Initialize
(
&
argument
);
DataFlowGraph
graph
;
pass
.
Run
(
argument
.
main_dfg
.
get
());
pass
.
Run
(
&
graph
);
// Analysis is sensitive to ProgramDesc, careful to change the original model.
// Analysis is sensitive to ProgramDesc, careful to change the original model.
ASSERT_EQ
(
graph
.
nodes
.
size
(),
37
UL
);
ASSERT_EQ
(
argument
.
main_dfg
->
nodes
.
size
(),
38
UL
);
pass
.
Finalize
();
pass
.
Finalize
();
LOG
(
INFO
)
<<
'\n'
<<
graph
.
DotString
();
ASSERT_FALSE
(
argument
.
main_dfg
->
DotString
().
empty
());
EXPECT_FALSE
(
argument
.
main_dfg
->
inputs
.
empty
());
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
浏览文件 @
b42ced8e
...
@@ -25,6 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
...
@@ -25,6 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
void
TensorRTSubGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
void
TensorRTSubGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
)();
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
)();
VLOG
(
4
)
<<
"debug info "
<<
graph
->
HumanReadableInfo
(
false
/*show_values*/
,
true
/*show_functions*/
);
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
b42ced8e
...
@@ -82,7 +82,7 @@ inference_api_test(test_api_impl
...
@@ -82,7 +82,7 @@ inference_api_test(test_api_impl
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_
fluid_api
)
DEPS paddle_inference_api analysis tensorrt_engine paddle_
inference_api paddle_fluid_api tensorrt_converter
)
inference_api_test
(
test_api_tensorrt_subgraph_engine ARGS test_word2vec
)
inference_api_test
(
test_api_tensorrt_subgraph_engine ARGS test_word2vec
)
endif
()
endif
()
...
...
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
b42ced8e
...
@@ -39,7 +39,7 @@ bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
...
@@ -39,7 +39,7 @@ bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
bool
PaddleInferenceAnakinPredictor
::
Run
(
bool
PaddleInferenceAnakinPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
for
(
const
auto
&
input
:
inputs
)
{
for
(
const
auto
&
input
:
inputs
)
{
if
(
input
.
dtype
!=
PaddleDType
::
FLOAT32
)
{
if
(
input
.
dtype
!=
PaddleDType
::
FLOAT32
)
{
LOG
(
ERROR
)
<<
"Only support float type inputs. "
<<
input
.
name
LOG
(
ERROR
)
<<
"Only support float type inputs. "
<<
input
.
name
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
b42ced8e
...
@@ -37,7 +37,8 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
...
@@ -37,7 +37,8 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
// should be allocated first.
// should be allocated first.
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
override
;
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
b42ced8e
...
@@ -108,7 +108,8 @@ NativePaddlePredictor::~NativePaddlePredictor() {
...
@@ -108,7 +108,8 @@ NativePaddlePredictor::~NativePaddlePredictor() {
}
}
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
VLOG
(
3
)
<<
"Predictor::predict"
;
VLOG
(
3
)
<<
"Predictor::predict"
;
Timer
timer
;
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
b42ced8e
...
@@ -38,7 +38,8 @@ class NativePaddlePredictor : public PaddlePredictor {
...
@@ -38,7 +38,8 @@ class NativePaddlePredictor : public PaddlePredictor {
bool
Init
(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
);
bool
Init
(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
);
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
override
;
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
...
...
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
浏览文件 @
b42ced8e
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -64,16 +65,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
...
@@ -64,16 +65,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
return
false
;
return
false
;
}
}
// Analyze inference_program
OptimizeInferenceProgram
();
Argument
argument
;
argument
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Singleton
<
Analyzer
>::
Global
().
Run
(
&
argument
);
CHECK
(
argument
.
transformed_program_desc
);
VLOG
(
5
)
<<
"transformed program:
\n
"
<<
argument
.
transformed_program_desc
->
SerializeAsString
();
VLOG
(
5
)
<<
"to prepare executor"
;
*
inference_program_
->
Proto
()
=
*
argument
.
transformed_program_desc
;
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
5
)
<<
"to create variables"
;
VLOG
(
5
)
<<
"to create variables"
;
...
@@ -86,6 +78,29 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
...
@@ -86,6 +78,29 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
return
true
;
return
true
;
}
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
{
PADDLE_ENFORCE_GT
(
batch_size
,
0
,
"TensorRT engine needs the argument batch_size set"
);
FLAGS_tensorrt_engine_batch_size
=
batch_size
;
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
void
OptimizeInferenceProgram
()
{
// Analyze inference_program
Argument
argument
;
argument
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Singleton
<
Analyzer
>::
Global
().
Run
(
&
argument
);
CHECK
(
argument
.
transformed_program_desc
);
VLOG
(
5
)
<<
"transformed program:
\n
"
<<
argument
.
transformed_program_desc
->
SerializeAsString
();
VLOG
(
5
)
<<
"to prepare executor"
;
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument
.
transformed_program_desc
));
}
private:
private:
TensorRTConfig
config_
;
TensorRTConfig
config_
;
};
};
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
b42ced8e
...
@@ -98,7 +98,8 @@ class PaddlePredictor {
...
@@ -98,7 +98,8 @@ class PaddlePredictor {
// responsible for the output tensor's buffer, either allocated or passed from
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
=
0
;
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
// Clone a predictor that share the model weights, the Cloned predictor should
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
// be thread-safe.
...
...
paddle/fluid/inference/api/test_api.cc
浏览文件 @
b42ced8e
...
@@ -35,7 +35,8 @@ class DemoPredictor : public PaddlePredictor {
...
@@ -35,7 +35,8 @@ class DemoPredictor : public PaddlePredictor {
LOG
(
INFO
)
<<
"I get other_config "
<<
config
.
other_config
;
LOG
(
INFO
)
<<
"I get other_config "
<<
config
.
other_config
;
}
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
override
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
0
)
override
{
LOG
(
INFO
)
<<
"Run"
;
LOG
(
INFO
)
<<
"Run"
;
return
false
;
return
false
;
}
}
...
...
paddle/fluid/inference/api/test_api_tensorrt_subgraph_engine.cc
浏览文件 @
b42ced8e
...
@@ -15,50 +15,79 @@
...
@@ -15,50 +15,79 @@
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
namespace
paddle
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
Main
(
bool
use_gpu
)
{
void
CompareTensorRTWithFluid
(
bool
enable_tensorrt
)
{
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine
=
enable_tensorrt
;
//# 1. Create PaddlePredictor with a config.
//# 1. Create PaddlePredictor with a config.
TensorRTConfig
config
;
NativeConfig
config0
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config0
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
use_gpu
=
use_gpu
;
config0
.
use_gpu
=
true
;
config
.
fraction_of_gpu_memory
=
0.15
;
config0
.
fraction_of_gpu_memory
=
0.3
;
config
.
device
=
0
;
config0
.
device
=
0
;
auto
predictor
=
TensorRTConfig
config1
;
config1
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config1
.
use_gpu
=
true
;
config1
.
fraction_of_gpu_memory
=
0.3
;
config1
.
device
=
0
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
<
TensorRTConfig
,
CreatePaddlePredictor
<
TensorRTConfig
,
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
config
);
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
config
1
);
for
(
int
batch_id
=
0
;
batch_id
<
3
;
batch_id
++
)
{
for
(
int
batch_id
=
0
;
batch_id
<
1
;
batch_id
++
)
{
//# 2. Prepare input.
//# 2. Prepare input.
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
std
::
vector
<
int64_t
>
data
(
20
);
for
(
int
i
=
0
;
i
<
20
;
i
++
)
data
[
i
]
=
i
;
PaddleTensor
tensor
{.
name
=
""
,
PaddleTensor
tensor
{
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
name
=
""
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
)),
.
shape
=
std
::
vector
<
int
>
({
10
,
1
}),
.
dtype
=
PaddleDType
::
INT64
};
.
data
=
PaddleBuf
(
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
)),
.
dtype
=
PaddleDType
::
INT64
};
// For simplicity, we set all the slots with the same data.
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
slots
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
slots
(
4
,
tensor
);
//# 3. Run
//# 3. Run
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs0
;
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
slots
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
slots
,
&
outputs1
,
10
));
//# 4. Get output.
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
ASSERT_EQ
(
outputs0
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
ASSERT_EQ
(
outputs1
.
size
(),
1UL
);
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
}
}
}
}
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
main
)
{
Main
(
true
);
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
without_tensorrt
)
{
CompareTensorRTWithFluid
(
false
);
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
with_tensorrt
)
{
CompareTensorRTWithFluid
(
true
);
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
b42ced8e
...
@@ -93,6 +93,10 @@ class OpConverter {
...
@@ -93,6 +93,10 @@ class OpConverter {
framework
::
Scope
*
scope_
{
nullptr
};
framework
::
Scope
*
scope_
{
nullptr
};
};
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
struct trt_##op_type__##_converter : public ::paddle::framework::Registrar { \
struct trt_##op_type__##_converter : public ::paddle::framework::Registrar { \
trt_##op_type__##_converter() { \
trt_##op_type__##_converter() { \
...
@@ -111,7 +115,3 @@ class OpConverter {
...
@@ -111,7 +115,3 @@ class OpConverter {
extern int TouchConverterRegister_##op_type__(); \
extern int TouchConverterRegister_##op_type__(); \
static int use_op_converter_trt_##op_type__ __attribute__((unused)) = \
static int use_op_converter_trt_##op_type__ __attribute__((unused)) = \
TouchConverterRegister_##op_type__();
TouchConverterRegister_##op_type__();
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
b42ced8e
...
@@ -26,18 +26,20 @@ namespace paddle {
...
@@ -26,18 +26,20 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
PADDLE_ENFORCE
(
false
,
"not implemented"
);
PADDLE_ENFORCE
(
false
,
"not implemented"
);
}
}
void
TensorRTEngine
::
Execute
(
int
batch_size
)
{
void
TensorRTEngine
::
Execute
(
int
batch_size
)
{
std
::
vector
<
void
*>
buffers
;
batch_size_
=
batch_size
;
for
(
auto
&
buf
:
buffers_
)
{
std
::
vector
<
void
*>
buffers
;
for
(
auto
&
buf
:
buffers_
)
{
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated"
);
PADDLE_ENFORCE_GT
(
buf
.
max_size
,
0
);
PADDLE_ENFORCE_GT
(
buf
.
max_size
,
0
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
buffers
.
push_back
(
buf
.
buffer
);
buffers
.
push_back
(
buf
.
buffer
);
}
}
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
cudaStreamSynchronize
(
*
stream_
);
cudaStreamSynchronize
(
*
stream_
);
}
}
...
@@ -45,7 +47,7 @@ void TensorRTEngine::Execute(int batch_size) {
...
@@ -45,7 +47,7 @@ void TensorRTEngine::Execute(int batch_size) {
TensorRTEngine
::~
TensorRTEngine
()
{
TensorRTEngine
::~
TensorRTEngine
()
{
cudaStreamSynchronize
(
*
stream_
);
cudaStreamSynchronize
(
*
stream_
);
// clean buffer
// clean buffer
for
(
auto
&
buf
:
buffers_
)
{
for
(
auto
&
buf
:
buffers_
)
{
if
(
buf
.
device
==
DeviceType
::
GPU
&&
buf
.
buffer
!=
nullptr
)
{
if
(
buf
.
device
==
DeviceType
::
GPU
&&
buf
.
buffer
!=
nullptr
)
{
PADDLE_ENFORCE_EQ
(
0
,
cudaFree
(
buf
.
buffer
));
PADDLE_ENFORCE_EQ
(
0
,
cudaFree
(
buf
.
buffer
));
buf
.
buffer
=
nullptr
;
buf
.
buffer
=
nullptr
;
...
@@ -70,32 +72,37 @@ void TensorRTEngine::FreezeNetwork() {
...
@@ -70,32 +72,37 @@ void TensorRTEngine::FreezeNetwork() {
// allocate GPU buffers.
// allocate GPU buffers.
buffers_
.
resize
(
buffer_sizes_
.
size
());
buffers_
.
resize
(
buffer_sizes_
.
size
());
for
(
auto
&
item
:
buffer_sizes_
)
{
for
(
auto
&
item
:
buffer_sizes_
)
{
// The output buffers are not set in the network building phrase, need to
// infer from the TesorRT network.
if
(
item
.
second
==
0
)
{
if
(
item
.
second
==
0
)
{
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
item
.
first
.
c_str
());
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
item
.
first
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
}
}
auto
&
buf
=
buffer
(
item
.
first
);
auto
&
buf
=
buffer
(
item
.
first
);
buf
.
max_size
=
item
.
second
*
max_batch_
;
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
item
.
second
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
buf
.
max_size
));
VLOG
(
4
)
<<
"buffer malloc "
<<
item
.
first
<<
" "
<<
item
.
second
<<
" "
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
<<
buf
.
buffer
;
// buf.size will changed in the runtime.
buf
.
size
=
buf
.
max_size
=
item
.
second
;
buf
.
size
=
0
;
buf
.
device
=
DeviceType
::
GPU
;
buf
.
device
=
DeviceType
::
GPU
;
}
}
}
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
DeclareInput
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
TensorRTEngine
::
DeclareInput
(
const
std
::
string
&
name
,
nvinfer1
::
DataType
dtype
,
nvinfer1
::
DataType
dtype
,
const
nvinfer1
::
Dims
&
dims
)
{
const
nvinfer1
::
Dims
&
dims
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate input name %s"
,
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate input name %s"
,
name
);
name
);
PADDLE_ENFORCE
(
infer_network_
!=
nullptr
,
"should initnetwork first"
);
PADDLE_ENFORCE
(
infer_network_
!=
nullptr
,
"should initnetwork first"
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
...
@@ -104,12 +111,12 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
...
@@ -104,12 +111,12 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
return
input
;
return
input
;
}
}
void
TensorRTEngine
::
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
void
TensorRTEngine
::
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
)
{
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
name
);
auto
*
output
=
layer
->
getOutput
(
offset
);
auto
*
output
=
layer
->
getOutput
(
offset
);
SetITensor
(
name
,
output
);
SetITensor
(
name
,
output
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
output
->
setName
(
name
.
c_str
());
output
->
setName
(
name
.
c_str
());
...
@@ -121,11 +128,11 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
...
@@ -121,11 +128,11 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
buffer_sizes_
[
name
]
=
0
;
buffer_sizes_
[
name
]
=
0
;
}
}
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
name
);
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
output
->
setName
(
name
.
c_str
());
output
->
setName
(
name
.
c_str
());
PADDLE_ENFORCE
(
!
output
->
isNetworkInput
());
PADDLE_ENFORCE
(
!
output
->
isNetworkInput
());
...
@@ -135,38 +142,45 @@ void TensorRTEngine::DeclareOutput(const std::string& name) {
...
@@ -135,38 +142,45 @@ void TensorRTEngine::DeclareOutput(const std::string& name) {
buffer_sizes_
[
name
]
=
0
;
buffer_sizes_
[
name
]
=
0
;
}
}
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
return
buffer
(
name
).
buffer
;
return
buffer
(
name
).
buffer
;
}
}
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
// determine data size
// determine data size
auto
it
=
buffer_sizes_
.
find
(
name
);
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
cudaMemcpyDeviceToDevice
,
*
stream_
),
cudaMemcpyDeviceToDevice
,
*
stream_
),
0
);
0
);
}
}
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
VLOG
(
4
)
<<
"get output in cpu"
;
auto
&
buf
=
buffer
(
name
);
// Update needed buffer size.
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
name
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
buf
.
size
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_LE
(
buf
.
size
,
buf
.
max_size
);
// determine data size
// determine data size
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
// DEBUG
cudaMemcpyDeviceToHost
,
*
stream_
));
memset
(
dst
,
0
,
buf
.
size
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpy
(
dst
,
buf
.
buffer
,
buf
.
size
,
cudaMemcpyDeviceToHost
));
}
}
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"call FreezeNetwork first."
);
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"call FreezeNetwork first."
);
auto
it
=
buffer_sizes_
.
find
(
name
);
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
...
@@ -174,19 +188,23 @@ Buffer& TensorRTEngine::buffer(const std::string& name) {
...
@@ -174,19 +188,23 @@ Buffer& TensorRTEngine::buffer(const std::string& name) {
return
buffers_
[
slot_offset
];
return
buffers_
[
slot_offset
];
}
}
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
const
void
*
data
,
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
const
void
*
data
,
size_t
size
)
{
size_t
size
)
{
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_NOT_NULL
(
data
);
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
buf
.
size
=
size
;
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
buf
.
buffer
,
data
,
size
,
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
buf
.
buffer
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
cudaMemcpyHostToDevice
,
*
stream_
));
}
}
void
TensorRTEngine
::
SetInputFromGPU
(
const
std
::
string
&
name
,
const
void
*
data
,
void
TensorRTEngine
::
SetInputFromGPU
(
const
std
::
string
&
name
,
const
void
*
data
,
size_t
size
)
{
size_t
size
)
{
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
buf
.
size
=
size
;
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE_LE
(
size
,
buf
.
max_size
,
"buffer is too small"
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
PADDLE_ENFORCE
(
buf
.
device
==
DeviceType
::
GPU
);
...
@@ -194,15 +212,15 @@ void TensorRTEngine::SetInputFromGPU(const std::string& name, const void* data,
...
@@ -194,15 +212,15 @@ void TensorRTEngine::SetInputFromGPU(const std::string& name, const void* data,
cudaMemcpyDeviceToDevice
,
*
stream_
));
cudaMemcpyDeviceToDevice
,
*
stream_
));
}
}
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
)
{
nvinfer1
::
ITensor
*
tensor
)
{
PADDLE_ENFORCE
(
tensor
!=
nullptr
);
PADDLE_ENFORCE
(
tensor
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
"duplicate ITensor name %s"
,
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
"duplicate ITensor name %s"
,
name
);
name
);
itensor_map_
[
name
]
=
tensor
;
itensor_map_
[
name
]
=
tensor
;
}
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
itensor_map_
.
count
(
name
),
"no ITensor %s"
,
name
);
PADDLE_ENFORCE
(
itensor_map_
.
count
(
name
),
"no ITensor %s"
,
name
);
return
itensor_map_
[
name
];
return
itensor_map_
[
name
];
}
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
b42ced8e
...
@@ -57,7 +57,9 @@ class TensorRTEngine : public EngineBase {
...
@@ -57,7 +57,9 @@ class TensorRTEngine : public EngineBase {
:
max_batch_
(
max_batch
),
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
max_workspace_
(
max_workspace
),
stream_
(
stream
?
stream
:
&
default_stream_
),
stream_
(
stream
?
stream
:
&
default_stream_
),
logger_
(
logger
)
{}
logger_
(
logger
)
{
cudaStreamCreate
(
&
default_stream_
);
}
virtual
~
TensorRTEngine
();
virtual
~
TensorRTEngine
();
...
@@ -121,6 +123,9 @@ class TensorRTEngine : public EngineBase {
...
@@ -121,6 +123,9 @@ class TensorRTEngine : public EngineBase {
int
max_batch_
;
int
max_batch_
;
// the max memory size the engine uses
// the max memory size the engine uses
int
max_workspace_
;
int
max_workspace_
;
// batch size of the current data, will be updated each Executation.
int
batch_size_
{
-
1
};
cudaStream_t
*
stream_
;
cudaStream_t
*
stream_
;
// If stream_ is not set from outside, hold its own stream.
// If stream_ is not set from outside, hold its own stream.
cudaStream_t
default_stream_
;
cudaStream_t
default_stream_
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
b42ced8e
...
@@ -103,6 +103,10 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
...
@@ -103,6 +103,10 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
b42ced8e
...
@@ -168,6 +168,8 @@ function(op_library TARGET)
...
@@ -168,6 +168,8 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP(relu);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(relu);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"fake_dequantize"
)
elseif
(
${
TARGET
}
STREQUAL
"fake_dequantize"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(fake_dequantize_max_abs);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(fake_dequantize_max_abs);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"tensorrt_engine_op"
)
message
(
STATUS
"Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
)
else
()
else
()
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
endif
()
endif
()
...
@@ -237,9 +239,9 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
...
@@ -237,9 +239,9 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library
(
softmax_op DEPS softmax
)
op_library
(
softmax_op DEPS softmax
)
op_library
(
sequence_softmax_op DEPS softmax
)
op_library
(
sequence_softmax_op DEPS softmax
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
op_library
(
tensorrt_engine_op DEPS tensorrt_engine
)
op_library
(
tensorrt_engine_op DEPS tensorrt_engine
tensorrt_converter
)
nv_test
(
test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc
nv_test
(
test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op
tensorrt_engine tensorrt_converter
DEPS tensorrt_engine_op
analysis
)
analysis
)
else
()
else
()
set
(
DEPS_OPS
${
DEPS_OPS
}
tensorrt_engine_op
)
set
(
DEPS_OPS
${
DEPS_OPS
}
tensorrt_engine_op
)
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
b42ced8e
...
@@ -24,6 +24,9 @@
...
@@ -24,6 +24,9 @@
#include "paddle/fluid/operators/tensorrt_engine_op.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
namespace
paddle
{
DEFINE_int32
(
tensorrt_engine_batch_size
,
1
,
"the batch_size of TensorRT"
);
namespace
operators
{
namespace
operators
{
using
inference
::
Singleton
;
using
inference
::
Singleton
;
...
@@ -52,7 +55,6 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
...
@@ -52,7 +55,6 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
switch
(
shape
.
size
())
{
case
2
:
case
2
:
return
nvinfer1
::
Dims2
(
shape
[
0
],
shape
[
1
]);
return
nvinfer1
::
Dims2
(
shape
[
0
],
shape
[
1
]);
...
@@ -90,27 +92,36 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
...
@@ -90,27 +92,36 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
engine
->
InitNetwork
();
engine
->
InitNetwork
();
framework
::
BlockDesc
block
(
nullptr
/*programdesc*/
,
&
block_desc
);
framework
::
BlockDesc
block
(
nullptr
/*programdesc*/
,
&
block_desc
);
VLOG
(
4
)
<<
"parsed var size "
<<
block
.
AllVars
().
size
();
// Add inputs
// Add inputs
VLOG
(
4
)
<<
"declare inputs"
;
VLOG
(
4
)
<<
"declare inputs"
;
for
(
auto
&
input
:
context
.
Inputs
(
"Xs"
))
{
for
(
auto
&
input
:
context
.
Inputs
(
"Xs"
))
{
VLOG
(
4
)
<<
"declare input "
<<
input
;
VLOG
(
4
)
<<
"declare input "
<<
input
;
auto
*
var
=
block
.
FindVar
(
input
);
auto
*
var
=
block
.
FindVar
(
input
);
// TensorRT engine need to create parameters. The parameter's description
// should be set in
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
PADDLE_ENFORCE_EQ
(
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
PADDLE_ENFORCE_EQ
(
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
"TensorRT engine only takes LoDTensor as input"
);
"TensorRT engine only takes LoDTensor as input"
);
auto
shape
=
var
->
GetShape
();
auto
shape
=
var
->
GetShape
();
// For the special batch_size placeholder -1, drop it and pass the real
// shape of data.
// TODO(Superjomn) fix this with batch broadcast, or it can't handle
// variational batch size.
if
(
shape
[
0
]
==
-
1
)
{
shape
[
0
]
=
FLAGS_tensorrt_engine_batch_size
;
}
engine
->
DeclareInput
(
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var
->
GetShape
()
));
Vec2TRT_Dims
(
shape
));
}
}
inference
::
Singleton
<
inference
::
tensorrt
::
OpConverter
>::
Global
().
ConvertBlock
(
inference
::
Singleton
<
inference
::
tensorrt
::
OpConverter
>::
Global
().
ConvertBlock
(
block_desc
,
parameters
,
context
.
scope
(),
engine
);
block_desc
,
parameters
,
context
.
scope
(),
engine
);
// Add outputs
// Add outputs
VLOG
(
4
)
<<
"declare outputs"
;
for
(
auto
&
output
:
context
.
Outputs
(
"Ys"
))
{
for
(
auto
&
output
:
context
.
Outputs
(
"Ys"
))
{
VLOG
(
4
)
<<
"declare output "
<<
output
;
engine
->
DeclareOutput
(
output
);
engine
->
DeclareOutput
(
output
);
}
}
...
@@ -151,4 +162,7 @@ REGISTER_OP_CPU_KERNEL(
...
@@ -151,4 +162,7 @@ REGISTER_OP_CPU_KERNEL(
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
ops
::
TensorRTEngineKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
// A trick to compile with the needed TensorRT op converter.
USE_TRT_CONVERTER
(
mul
)
#endif // PADDLE_WITH_CUDA
#endif // PADDLE_WITH_CUDA
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
b42ced8e
...
@@ -24,6 +24,9 @@
...
@@ -24,6 +24,9 @@
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
paddle
{
DECLARE_int32
(
tensorrt_engine_batch_size
);
namespace
operators
{
namespace
operators
{
using
inference
::
Singleton
;
using
inference
::
Singleton
;
...
@@ -53,7 +56,6 @@ template <typename DeviceContext, typename T>
...
@@ -53,7 +56,6 @@ template <typename DeviceContext, typename T>
class
TensorRTEngineKernel
:
public
framework
::
OpKernel
<
T
>
{
class
TensorRTEngineKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
VLOG
(
4
)
<<
"TensorRTEngineKernel executing"
;
auto
engine_name
=
context
.
Attr
<
std
::
string
>
(
"engine_uniq_key"
);
auto
engine_name
=
context
.
Attr
<
std
::
string
>
(
"engine_uniq_key"
);
if
(
!
Singleton
<
TRT_EngineManager
>::
Global
().
HasEngine
(
engine_name
))
{
if
(
!
Singleton
<
TRT_EngineManager
>::
Global
().
HasEngine
(
engine_name
))
{
Prepare
(
context
);
Prepare
(
context
);
...
@@ -61,11 +63,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
...
@@ -61,11 +63,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Get
(
engine_name
);
auto
*
engine
=
Singleton
<
TRT_EngineManager
>::
Global
().
Get
(
engine_name
);
auto
input_names
=
context
.
op
().
Inputs
(
"Xs"
);
auto
input_names
=
context
.
op
().
Inputs
(
"Xs"
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"should pass more than one inputs"
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"should pass more than one inputs"
);
// Try to determine a batch_size
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
auto
&
tensor0
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
context
.
Attr
<
int
>
(
"max_batch"
));
context
.
scope
(),
input_names
.
front
());
int
batch_size
=
tensor0
.
dims
()[
0
];
PADDLE_ENFORCE_LE
(
batch_size
,
context
.
Attr
<
int
>
(
"max_batch"
));
// Convert input tensor from fluid to engine.
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
context
.
Inputs
(
"Xs"
))
{
for
(
const
auto
&
x
:
context
.
Inputs
(
"Xs"
))
{
...
@@ -81,8 +80,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
...
@@ -81,8 +80,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
}
}
}
}
// Execute the engine.
// Execute the engine.
PADDLE_ENFORCE_GT
(
batch_size
,
0
);
PADDLE_ENFORCE_GT
(
FLAGS_tensorrt_engine_
batch_size
,
0
);
engine
->
Execute
(
batch_size
);
engine
->
Execute
(
FLAGS_tensorrt_engine_
batch_size
);
// Convert output tensor from engine to fluid
// Convert output tensor from engine to fluid
for
(
const
auto
&
y
:
context
.
Outputs
(
"Ys"
))
{
for
(
const
auto
&
y
:
context
.
Outputs
(
"Ys"
))
{
// convert output and copy to fluid.
// convert output and copy to fluid.
...
@@ -94,18 +93,21 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
...
@@ -94,18 +93,21 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
if
(
platform
::
is_cpu_place
(
fluid_t
->
place
()))
{
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
// TODO(Superjomn) change this float to dtype size.
engine
->
GetOutputInCPU
(
// TODO(Superjomn) find some way to determine which device to output the
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
// tensor.
size
*
sizeof
(
float
));
// if (platform::is_cpu_place(fluid_t->place())) {
}
else
{
// TODO(Superjomn) change this float to dtype size.
engine
->
GetOutputInGPU
(
engine
->
GetOutputInCPU
(
y
,
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CUDAPlace
()),
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
size
*
sizeof
(
float
));
}
//} else {
// engine->GetOutputInGPU(
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
// size * sizeof(float));
//}
}
}
cudaStreamSynchronize
(
*
engine
->
stream
());
cudaStreamSynchronize
(
*
engine
->
stream
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录