Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
21ba32b0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
21ba32b0
编写于
8月 21, 2018
作者:
Z
Zhaolong Xing
提交者:
GitHub
8月 21, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #12843 from NHZlX/fix_ssa_bug_for_trt
fix ssa bug with batch_norm and refine the trt
上级
cd32ddac
c999895e
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
45 addition
and
21 deletion
+45
-21
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+2
-1
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+1
-1
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+0
-5
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
.../fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+0
-3
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+12
-1
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+8
-0
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+2
-2
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+15
-6
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+5
-2
未找到文件。
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
21ba32b0
...
...
@@ -72,7 +72,8 @@ class DfgPassManagerImpl final : public DfgPassManager {
if
(
FLAGS_IA_enable_tensorrt_subgraph_engine
)
{
auto
trt_teller
=
[
&
](
const
Node
*
node
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"elementwise_add"
,
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
});
{
"elementwise_add"
,
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"depthwise_conv2d"
,
"batch_norm"
});
if
(
!
node
->
IsFunction
())
return
false
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
浏览文件 @
21ba32b0
...
...
@@ -97,6 +97,7 @@ void DataFlowGraph::Build(const framework::proto::ProgramDesc &prog) {
auto
*
in
=
nodes
.
GetMutable
(
var2id
.
at
(
in_var
.
arguments
(
k
)));
in
->
outlinks
.
push_back
(
o
);
o
->
inlinks
.
push_back
(
in
);
unique_written_vars
.
insert
(
in
);
}
}
for
(
int
j
=
0
;
j
<
op
.
outputs_size
();
j
++
)
{
...
...
@@ -117,7 +118,6 @@ void DataFlowGraph::Build(const framework::proto::ProgramDesc &prog) {
}
out
->
inlinks
.
push_back
(
o
);
o
->
outlinks
.
push_back
(
out
);
unique_written_vars
.
insert
(
out
);
}
}
}
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
21ba32b0
...
...
@@ -23,9 +23,6 @@
namespace
paddle
{
namespace
inference
{
DEFINE_int32
(
tensorrt_max_batchsize
,
1
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
2048
,
"TensorRT workspace size"
);
namespace
analysis
{
using
framework
::
proto
::
ProgramDesc
;
...
...
@@ -199,8 +196,6 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
// Set attrs
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
->
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"max_batch"
,
FLAGS_tensorrt_max_batchsize
);
SetAttr
(
desc
.
Proto
(),
"max_workspace"
,
FLAGS_tensorrt_workspace_size
);
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()));
SetAttr
(
desc
.
Proto
(),
"output_name_mapping"
,
output_mapping
);
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
());
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
浏览文件 @
21ba32b0
...
...
@@ -27,9 +27,6 @@
namespace
paddle
{
namespace
inference
{
DECLARE_int32
(
tensorrt_max_batchsize
);
DECLARE_int32
(
tensorrt_workspace_size
);
namespace
analysis
{
class
DataFlowGraphToFluidPass
final
:
public
DataFlowGraphPass
{
public:
...
...
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
浏览文件 @
21ba32b0
...
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
...
...
@@ -32,7 +33,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
FLAGS_tensorrt_max_batch_size
=
config_
.
max_batch_size
;
FLAGS_tensorrt_workspace_size
=
config_
.
workspace_size
;
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
{
...
...
@@ -150,3 +152,12 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
}
}
// namespace paddle
USE_TRT_CONVERTER
(
elementwise_add_weight
);
USE_TRT_CONVERTER
(
mul
);
USE_TRT_CONVERTER
(
conv2d
);
USE_TRT_CONVERTER
(
relu
);
USE_TRT_CONVERTER
(
fc
);
USE_TRT_CONVERTER
(
pool2d
);
USE_TRT_CONVERTER
(
softmax
);
USE_TRT_CONVERTER
(
batch_norm
);
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
21ba32b0
...
...
@@ -137,6 +137,14 @@ struct AnakinConfig : public PaddlePredictor::Config {
struct
TensorRTConfig
:
public
NativeConfig
{
// Determine whether a subgraph will be executed by TRT.
int
min_subgraph_size
{
1
};
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
// equivalent to the runtime batch size.
int
max_batch_size
{
1
};
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
workspace_size
{
1
<<
30
};
};
// A factory to help create different predictors.
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
21ba32b0
...
...
@@ -22,6 +22,8 @@
namespace
paddle
{
DEFINE_int32
(
tensorrt_engine_batch_size
,
1
,
"the batch_size of TensorRT"
);
DEFINE_int32
(
tensorrt_max_batch_size
,
1
,
"TensorRT maximum batch size"
);
DEFINE_int32
(
tensorrt_workspace_size
,
16
<<
20
,
"TensorRT workspace size"
);
namespace
operators
{
...
...
@@ -32,8 +34,6 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Ys"
,
"A list of outputs"
).
AsDuplicable
();
AddAttr
<
std
::
string
>
(
"subgraph"
,
"the subgraph."
);
AddAttr
<
std
::
string
>
(
"engine_uniq_key"
,
"unique key for the TRT engine."
);
AddAttr
<
int
>
(
"max_batch"
,
"the maximum batch size."
);
AddAttr
<
int
>
(
"max_workspace"
,
"the maximum batch size."
);
AddComment
(
"TensorRT engine operator."
);
}
};
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
21ba32b0
...
...
@@ -28,6 +28,8 @@
namespace
paddle
{
DECLARE_int32
(
tensorrt_engine_batch_size
);
DECLARE_int32
(
tensorrt_max_batch_size
);
DECLARE_int32
(
tensorrt_workspace_size
);
namespace
operators
{
...
...
@@ -54,8 +56,10 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t>& shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
PADDLE_ENFORCE_EQ
(
shape
.
size
(),
4UL
);
PADDLE_ENFORCE
(
shape
.
size
()
==
4UL
||
shape
.
size
()
==
2UL
);
if
(
shape
.
size
()
==
4UL
)
return
nvinfer1
::
DimsCHW
(
shape
[
1
],
shape
[
2
],
shape
[
3
]);
return
nvinfer1
::
DimsCHW
(
shape
[
1
],
1
,
1
);
}
}
// namespace
...
...
@@ -95,7 +99,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
input_names
=
context
.
op
().
Inputs
(
"Xs"
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"should pass more than one inputs"
);
PADDLE_ENFORCE_LE
(
FLAGS_tensorrt_engine_batch_size
,
context
.
Attr
<
int
>
(
"max_batch"
)
);
FLAGS_tensorrt_max_batch_size
);
std
::
vector
<
std
::
string
>
output_maps
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
...
...
@@ -132,7 +136,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
nvinfer1
::
ITensor
*
trt_t
=
engine
->
GetITensor
(
output_maps
[
output_index
]);
auto
dims
=
trt_t
->
getDimensions
();
// Use the output ITensor's dims to reshape the Fluid Tensor.
std
::
vector
<
int
>
ddim
(
dims
.
d
,
dims
.
d
+
dims
.
nbDims
);
// The ITensor doesn't contain the batch size dim.
std
::
vector
<
int
>
ddim
;
ddim
.
push_back
(
FLAGS_tensorrt_engine_batch_size
);
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
i
++
)
{
ddim
.
push_back
(
dims
.
d
[
i
]);
}
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
...
...
@@ -168,8 +177,8 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Get the ProgramDesc and pass to convert.
framework
::
proto
::
BlockDesc
block_desc
;
block_desc
.
ParseFromString
(
context
.
Attr
<
std
::
string
>
(
"subgraph"
));
int
max_batch
=
context
.
Attr
<
int
>
(
"max_batch"
)
;
auto
max_workspace
=
context
.
Attr
<
int
>
(
"max_workspace"
)
;
int
max_batch
=
FLAGS_tensorrt_max_batch_size
;
auto
max_workspace
=
FLAGS_tensorrt_workspace_size
;
auto
params
=
context
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
std
::
unordered_set
<
std
::
string
>
parameters
;
for
(
const
auto
&
param
:
params
)
{
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
21ba32b0
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/tensorrt_engine_op.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -57,6 +58,8 @@ void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
using
inference
::
analysis
::
SetAttr
;
TEST
(
TensorRTEngineOp
,
manual
)
{
FLAGS_tensorrt_engine_batch_size
=
2
;
FLAGS_tensorrt_max_batch_size
=
2
;
framework
::
ProgramDesc
program
;
auto
*
block_
=
program
.
Proto
()
->
add_blocks
();
block_
->
set_idx
(
0
);
...
...
@@ -98,8 +101,6 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
block_
->
SerializeAsString
());
SetAttr
<
int
>
(
engine_op_desc
.
Proto
(),
"max_batch"
,
100
);
SetAttr
<
int
>
(
engine_op_desc
.
Proto
(),
"max_workspace"
,
1
<<
10
);
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"engine_uniq_key"
,
"a_engine"
);
SetAttr
<
std
::
vector
<
std
::
string
>>
(
engine_op_desc
.
Proto
(),
"parameters"
,
std
::
vector
<
std
::
string
>
({}));
...
...
@@ -128,6 +129,8 @@ TEST(TensorRTEngineOp, manual) {
}
void
Execute
(
int
batch_size
,
int
input_dim
,
int
output_dim
,
int
nlayers
=
1
)
{
FLAGS_tensorrt_engine_batch_size
=
batch_size
;
FLAGS_tensorrt_max_batch_size
=
batch_size
;
framework
::
ProgramDesc
program
;
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录