Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1e21e8b5
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
1e21e8b5
编写于
4月 04, 2019
作者:
石
石晓伟
提交者:
GitHub
4月 04, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16611 from Shixiaowei02/release/1.4
Cherry-pick from 16498 : Deal with softmax layer in anakin subgraph
上级
af07056c
3f0b97df
变更
54
隐藏空白更改
内联
并排
Showing
54 changed file
with
137 addition
and
82 deletion
+137
-82
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+1
-1
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+1
-2
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+1
-0
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+1
-0
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+1
-0
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+1
-0
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+1
-0
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+1
-0
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+1
-0
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+1
-0
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+1
-0
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+1
-0
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+3
-3
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+1
-0
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+1
-0
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+1
-0
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+1
-0
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+1
-0
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+6
-6
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+2
-0
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+1
-0
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+1
-0
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+1
-0
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+1
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+1
-0
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+1
-0
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+9
-8
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+1
-0
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+1
-0
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+1
-0
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+1
-0
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+1
-0
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+1
-0
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+1
-0
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+1
-0
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+10
-1
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+1
-0
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+1
-0
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+1
-0
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+1
-0
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+1
-0
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+1
-0
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+1
-0
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+16
-1
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+0
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+8
-8
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+25
-6
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+9
-10
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+3
-1
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+0
-32
未找到文件。
cmake/external/protobuf.cmake
浏览文件 @
1e21e8b5
...
...
@@ -201,7 +201,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
"-DCMAKE_GENERATOR_PLATFORM=x64"
)
ENDIF
()
SET
(
PROTOBUF_REPO
"https://github.com/
google
/protobuf.git"
)
SET
(
PROTOBUF_REPO
"https://github.com/
protocolbuffers
/protobuf.git"
)
SET
(
PROTOBUF_TAG
"9f75c5aa851cd877fb0d93ccc31b8567a6706546"
)
ExternalProject_Add
(
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
1e21e8b5
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
...
...
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
1e21e8b5
...
...
@@ -34,6 +34,7 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
}
void
ActivationOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ class ActivationOpConverter : public AnakinOpConverter {
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ActivationOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
1e21e8b5
...
...
@@ -29,6 +29,7 @@ namespace inference {
namespace
anakin
{
void
BatchNormOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class BatchNormOpConverter : public AnakinOpConverter {
BatchNormOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
BatchNormOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
1e21e8b5
...
...
@@ -29,6 +29,7 @@ namespace inference {
namespace
anakin
{
void
ConcatOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class ConcatOpConverter : public AnakinOpConverter {
ConcatOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ConcatOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
1e21e8b5
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
Conv2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class Conv2dOpConverter : public AnakinOpConverter {
Conv2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
1e21e8b5
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
Conv2dFusionOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class Conv2dFusionOpConverter : public AnakinOpConverter {
Conv2dFusionOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dFusionOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
1e21e8b5
...
...
@@ -27,9 +27,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
DensityPriorBoxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
DensityPriorBoxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
image_name
=
op_desc
.
Input
(
"Image"
).
front
();
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ class DensityPriorBoxOpConverter : public AnakinOpConverter {
DensityPriorBoxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DensityPriorBoxOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
1e21e8b5
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
DetectionOutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ class DetectionOutOpConverter : public AnakinOpConverter {
DetectionOutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DetectionOutOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
1e21e8b5
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
DropoutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class DropoutOpConverter : public AnakinOpConverter {
DropoutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DropoutOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
1e21e8b5
...
...
@@ -30,9 +30,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
ElementwiseAddOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
ElementwiseAddOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
...
...
@@ -50,9 +50,9 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
}
void
ElementwiseMulOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
ElementwiseMulOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
...
...
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
ElementwiseAddOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseAddOpConverter
()
{}
...
...
@@ -37,6 +38,7 @@ class ElementwiseMulOpConverter : public AnakinOpConverter {
ElementwiseMulOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseMulOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ namespace inference {
namespace
anakin
{
void
FcBaseOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class FcBaseOpConverter : public AnakinOpConverter {
FcBaseOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FcBaseOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
1e21e8b5
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
FlattenOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class FlattenOpConverter : public AnakinOpConverter {
FlattenOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FlattenOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
1e21e8b5
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
Im2SequenceConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class Im2SequenceConverter : public AnakinOpConverter {
Im2SequenceConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Im2SequenceConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
1e21e8b5
...
...
@@ -40,8 +40,10 @@ class AnakinOpConverter {
AnakinOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{}
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinNvEngine
*
engine
,
bool
test_mode
=
false
)
{
...
...
@@ -58,16 +60,17 @@ class AnakinOpConverter {
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_type
);
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
(
*
it
)(
op
,
block_desc
,
scope
,
test_mode
);
}
void
ConvertBlock
(
const
framework
::
proto
::
BlockDesc
&
block
,
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinNvEngine
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
auto
&
op
=
block
.
ops
(
i
);
ConvertOp
(
op
,
parameters
,
scope
,
engine
);
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
auto
&
op
=
block
->
ops
(
i
);
ConvertOp
(
op
,
*
block_desc
,
parameters
,
scope
,
engine
);
}
}
...
...
@@ -77,9 +80,7 @@ class AnakinOpConverter {
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
AnakinNvEngine
*
engine
)
{
framework
::
proto
::
BlockDesc
*
block_proto
=
block_desc
->
Proto
();
ConvertBlock
(
*
block_proto
,
parameters
,
*
scope
,
engine
);
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
engine
->
Freeze
();
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
...
...
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
1e21e8b5
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
Pool2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class Pool2dOpConverter : public AnakinOpConverter {
Pool2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Pool2dOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
1e21e8b5
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ class ReluOpConverter : public AnakinOpConverter {
ReluOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReluOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
1e21e8b5
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ReshapeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class ReshapeOpConverter : public AnakinOpConverter {
ReshapeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReshapeOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
1e21e8b5
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
1e21e8b5
...
...
@@ -27,6 +27,7 @@ class ScaleOpConverter : public AnakinOpConverter {
ScaleOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ScaleOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
1e21e8b5
...
...
@@ -24,6 +24,7 @@ namespace inference {
namespace
anakin
{
void
SoftMaxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -32,8 +33,16 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_var_desc
=
block_desc
.
FindVar
(
input
);
PADDLE_ENFORCE
(
input_var_desc
,
"Cant find %s variable When runing Anakin Softmax converter."
,
input
);
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
2
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
)
);
}
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class SoftMaxOpConverter : public AnakinOpConverter {
SoftMaxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SoftMaxOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
1e21e8b5
...
...
@@ -30,6 +30,7 @@ namespace inference {
namespace
anakin
{
void
SplitOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class SplitOpConverter : public AnakinOpConverter {
SplitOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SplitOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
1e21e8b5
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
SumOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
...
...
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class SumOpConverter : public AnakinOpConverter {
SumOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SumOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
1e21e8b5
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
TransposeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
1e21e8b5
...
...
@@ -25,6 +25,7 @@ class TransposeOpConverter : public AnakinOpConverter {
TransposeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
TransposeOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
1e21e8b5
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
...
...
@@ -112,6 +113,17 @@ class AnakinConvertValidation {
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
RandomizeTensor
(
x_tensor
,
place_
,
ctx
);
std
::
vector
<
int64_t
>
dim_vec_int64
;
for
(
auto
&
ele
:
dim_vec
)
{
dim_vec_int64
.
push_back
(
static_cast
<
int64_t
>
(
ele
));
}
// Add var_desc to block_desc
auto
*
block_desc
=
program_desc_
.
MutableBlock
(
framework
::
kRootBlockIndex
);
auto
*
var_desc
=
block_desc
->
Var
(
name
);
var_desc
->
SetShape
(
dim_vec_int64
);
}
void
SetOp
(
const
framework
::
proto
::
OpDesc
&
desc
)
{
...
...
@@ -119,8 +131,10 @@ class AnakinConvertValidation {
op_desc_
.
reset
(
new
framework
::
OpDesc
(
desc
,
nullptr
));
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
...
...
@@ -194,6 +208,7 @@ class AnakinConvertValidation {
cudaStream_t
stream_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
framework
::
ProgramDesc
program_desc_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
framework
::
Scope
*
scope_
;
platform
::
CUDAPlace
place_
;
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
1e21e8b5
...
...
@@ -91,7 +91,6 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine"
);
anakin_input
->
reshape
(
fluid_input_shape
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
1e21e8b5
...
...
@@ -168,6 +168,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
anakin_max_input_shape
,
AnakinMaxInputShape
,
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
// Memory optimized related.
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
1e21e8b5
...
...
@@ -151,13 +151,20 @@ void AnakinSubgraphPass::CreateAnakinOp(
op_desc
->
SetType
(
"anakin_engine"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
graph_var_map
[
node
->
Name
()]
=
node
;
}
}
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
RenameAndGetOutputs
(
subgraph_nodes
,
&
block_desc
,
input_names_with_id
,
&
output_names_with_id
,
&
output_names
,
&
output_name_map
,
false
);
graph_var_map
,
false
);
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
...
...
@@ -168,13 +175,6 @@ void AnakinSubgraphPass::CreateAnakinOp(
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
auto
*
vars
=
block_desc
.
Proto
()
->
mutable_vars
();
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
*
vars
->
Add
()
=
*
node
->
Var
()
->
Proto
();
}
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
浏览文件 @
1e21e8b5
...
...
@@ -60,6 +60,7 @@ void RenameAndGetOutputs(
std
::
set
<
std
::
string
>
*
output_names_with_id
,
std
::
set
<
std
::
string
>
*
output_names
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
output_name_map
,
const
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
&
graph_var_map
,
bool
is_trt
)
{
//// In the normal case, the paddle-trt exists bug when runing the googlenet.
// When there are more than two convolutions of 1 * 1 with the same input, the
...
...
@@ -69,6 +70,15 @@ void RenameAndGetOutputs(
std
::
unordered_map
<
std
::
string
/*name*/
,
int
/*ITensor_quote_num*/
>
same_hierarchy_conv2d_num_map
;
auto
add_block_var
=
[
&
](
const
std
::
string
&
graph_arg
,
const
std
::
string
&
block_arg
)
{
auto
arg_var_node
=
graph_var_map
.
find
(
graph_arg
);
PADDLE_ENFORCE
(
arg_var_node
!=
graph_var_map
.
end
());
auto
*
var_t
=
block_desc
->
Var
(
block_arg
);
var_t
->
SetShape
(
arg_var_node
->
second
->
Var
()
->
GetShape
());
var_t
->
SetDataType
(
arg_var_node
->
second
->
Var
()
->
GetDataType
());
};
for
(
size_t
index
=
0
;
index
<
block_desc
->
OpSize
();
++
index
)
{
framework
::
proto
::
OpDesc
*
op
=
block_desc
->
Op
(
index
)
->
Proto
();
framework
::
OpDesc
op_desc
(
*
op
,
nullptr
);
...
...
@@ -87,13 +97,20 @@ void RenameAndGetOutputs(
auto
*
in_var
=
op
->
mutable_inputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
// all the arguments
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
const
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
const
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
input_names_with_id
.
count
(
arg_value_with_id
))
{
replaced_names
.
push_back
(
arg_value
);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value
);
}
}
else
{
replaced_names
.
push_back
(
arg_value_with_id
);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value_with_id
);
}
}
}
in_var
->
clear_arguments
();
...
...
@@ -105,7 +122,6 @@ void RenameAndGetOutputs(
for
(
auto
out_var
:
correspond_node
->
outputs
)
{
var2id
[
out_var
->
Name
()]
=
out_var
->
id
();
}
if
(
op_desc
.
Type
()
==
"conv2d"
&&
is_trt
)
{
auto
input_var_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
filter_var_name
=
op_desc
.
Input
(
"Filter"
).
front
();
...
...
@@ -125,15 +141,18 @@ void RenameAndGetOutputs(
same_hierarchy_conv2d_num_map
[
input_var_name
]
+=
1
;
}
}
// rename for the output variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
outputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
out_var
=
op
->
mutable_outputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
out_var
->
arguments_size
();
k
++
)
{
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
const
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
const
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value_with_id
);
}
if
(
output_names_with_id
->
count
(
arg_value_with_id
))
{
(
*
output_name_map
)[
arg_value
]
=
arg_value_with_id
;
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
浏览文件 @
1e21e8b5
...
...
@@ -42,6 +42,7 @@ void RenameAndGetOutputs(
std
::
set
<
std
::
string
>
*
output_names_with_id
,
std
::
set
<
std
::
string
>
*
output_names
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
output_name_map
,
const
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
&
graph_var_map
,
bool
is_trt
=
true
);
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
1e21e8b5
...
...
@@ -142,6 +142,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
graph_var_map
[
node
->
Name
()]
=
node
;
}
}
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
// The following procedure is used to rename all the intermediate
...
...
@@ -157,7 +164,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
RenameAndGetOutputs
(
subgraph_nodes
,
&
block_desc
,
input_names_with_id
,
&
output_names_with_id
,
&
output_names
,
&
output_name_map
);
&
output_names_with_id
,
&
output_names
,
&
output_name_map
,
graph_var_map
);
// When tensorrt engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
...
...
@@ -168,14 +176,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
auto
*
vars
=
block_desc
.
Proto
()
->
mutable_vars
();
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
*
vars
->
Add
()
=
*
node
->
Var
()
->
Proto
();
}
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
...
...
@@ -213,7 +213,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
std
::
string
trt_engine_serialized_data
=
""
;
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
1e21e8b5
...
...
@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -315,6 +316,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
specify_input_name_
;
ss
<<
cpu_math_library_num_threads_
;
ss
<<
use_anakin_
;
ss
<<
anakin_min_subgraph_size_
;
return
ss
.
str
();
}
...
...
@@ -386,10 +388,11 @@ void AnalysisConfig::SwitchIrDebug(int x) {
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shap
e
)
{
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_siz
e
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
use_anakin_
=
true
;
Update
();
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
1e21e8b5
...
...
@@ -385,6 +385,7 @@ void AnalysisPredictor::PrepareArgument() {
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
1e21e8b5
...
...
@@ -151,7 +151,8 @@ struct AnalysisConfig {
*/
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
);
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -288,6 +289,7 @@ struct AnalysisConfig {
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
1e21e8b5
...
...
@@ -120,40 +120,8 @@ class AnakinEngineOp : public framework::OperatorBase {
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
.
Get
(
engine_key_
);
}
return
anakin_engine_
;
}
void
Prepare
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
,
AnakinNvEngineT
*
engine
)
const
{
LOG
(
INFO
)
<<
"Prepare Anakin engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
framework
::
proto
::
BlockDesc
block_desc
;
block_desc
.
ParseFromString
(
Attr
<
std
::
string
>
(
"subgraph"
));
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
.
ConvertBlock
(
block_desc
,
param_names_
,
scope
,
engine
);
engine
->
Freeze
();
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
auto
t_shape
=
framework
::
vectorize2int
(
t
.
dims
());
// all input shape should be 4 dims
if
(
t_shape
.
size
()
==
2
)
{
t_shape
.
push_back
(
1
);
t_shape
.
push_back
(
1
);
}
engine
->
SetInputShape
(
x
,
t_shape
);
}
engine
->
Optimize
();
engine
->
InitGraph
();
}
};
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录