Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
65b61db1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
65b61db1
编写于
11月 07, 2018
作者:
Z
Zhaolong Xing
提交者:
GitHub
11月 07, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13927 from NHZlX/fix_googlenet_bug_with_rule
Fix googlenet bug with rule
上级
ea8984c9
5700fafd
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
50 addition
and
8 deletion
+50
-8
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+20
-1
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+4
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+12
-0
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+11
-6
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
未找到文件。
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
浏览文件 @
65b61db1
...
...
@@ -18,6 +18,21 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
bool
to_skip_merging_optimize
(
TensorRTEngine
*
engine_
,
const
std
::
vector
<
int
>&
filters
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
std
::
string
input_name
)
{
if
(
engine_
->
itensor_quote_num
[
input_name
]
>
0
)
{
return
true
;
}
if
(
filters
[
0
]
==
1
&&
filters
[
1
]
==
1
&&
strides
[
0
]
==
1
&&
strides
[
1
]
==
1
&&
paddings
[
0
]
==
0
&&
paddings
[
1
]
==
0
)
engine_
->
itensor_quote_num
[
input_name
]
+=
1
;
return
false
;
}
class
Conv2dOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
...
...
@@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Output"
).
size
(),
1
);
auto
*
X
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Input"
).
front
());
// Declare weights
auto
*
Y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
Y_v
);
...
...
@@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter {
std
::
move
(
weight_tensor
);
layer
->
getOutput
(
0
)
->
setName
(
output_name
.
c_str
());
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
if
(
test_mode
)
{
if
(
test_mode
||
to_skip_merging_optimize
(
engine_
,
{
filter_h
,
filter_w
},
strides
,
paddings
,
op_desc
.
Input
(
"Input"
).
front
()))
{
engine_
->
DeclareOutput
(
output_name
);
}
}
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
65b61db1
...
...
@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
buffer_sizes_
[
name
]
=
0
;
}
bool
TensorRTEngine
::
HasDeclared
(
const
std
::
string
&
name
)
{
return
buffer_sizes_
.
count
(
name
)
>
0
;
}
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
65b61db1
...
...
@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase {
const
std
::
string
&
name
);
// Set the itensor_map_[name] as the network's output, and set its name.
void
DeclareOutput
(
const
std
::
string
&
name
);
// Check if the ITensor has been declared
bool
HasDeclared
(
const
std
::
string
&
name
);
// GPU memory address for an ITensor with specific name. One can operate on
// these memory directly for acceleration, for example, output the converted
...
...
@@ -132,6 +134,16 @@ class TensorRTEngine : public EngineBase {
std
::
unordered_map
<
std
::
string
/*name*/
,
std
::
unique_ptr
<
framework
::
Tensor
>>
weight_map
;
// TODO: (NHZLX)
// In the normal case, the paddle-trt exists bug when runing the googlenet.
// When there are more than two convolutions of 1 * 1 with the same input, the
// paddle-tensorrt will do the merging optimization, which fuse those conv
// into
// one conv, and then trigger bug. So, We should use strategy to avoid this
// optimization for the time being. This bug will be fixed in the future.
std
::
unordered_map
<
std
::
string
/*name*/
,
int
/*ITensor_quote_num*/
>
itensor_quote_num
;
private:
// the max batch size
int
max_batch_
;
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
65b61db1
...
...
@@ -93,11 +93,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
}
}
TEST
(
trt_models_test
,
m
ain
)
{
std
::
vector
<
std
::
string
>
infer_models
=
{
"mobilenet"
,
"resnet50"
,
"resnext50"
};
for
(
auto
&
model_dir
:
infer_models
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
model_dir
);
}
TEST
(
trt_models_test
,
m
obilenet
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/mobilenet"
);
}
TEST
(
trt_models_test
,
resnet50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/resnet50"
);
}
TEST
(
trt_models_test
,
resnext50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/resnext50"
);
}
}
// namespace paddle
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
65b61db1
...
...
@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Add outputs
for
(
auto
&
output
:
output_maps
)
{
engine
->
DeclareOutput
(
output
);
if
(
!
engine
->
HasDeclared
(
output
))
{
engine
->
DeclareOutput
(
output
);
}
}
engine
->
FreezeNetwork
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录