Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5dea0bdd
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5dea0bdd
编写于
4月 01, 2019
作者:
石
石晓伟
提交者:
GitHub
4月 01, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16498 from Shixiaowei02/feature/anakin-engine
merge feature/anakin-engine to develop
上级
76b49f02
7b9fc710
变更
54
隐藏空白更改
内联
并排
Showing
54 changed file
with
137 addition
and
82 deletion
+137
-82
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+1
-1
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+1
-2
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+1
-0
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+1
-0
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+1
-0
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+1
-0
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+1
-0
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+1
-0
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+1
-0
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+1
-0
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+1
-0
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+1
-0
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+3
-3
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+1
-0
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+1
-0
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+1
-0
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+1
-0
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+1
-0
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+6
-6
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+2
-0
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+1
-0
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+1
-0
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+1
-0
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+1
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+1
-0
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+1
-0
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+9
-8
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+1
-0
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+1
-0
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+1
-0
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+1
-0
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+1
-0
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+1
-0
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+1
-0
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+1
-0
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+10
-1
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+1
-0
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+1
-0
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+1
-0
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+1
-0
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+1
-0
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+1
-0
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+1
-0
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+16
-1
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+0
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+8
-8
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+25
-6
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
+1
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+9
-10
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+3
-1
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+0
-32
未找到文件。
cmake/external/protobuf.cmake
浏览文件 @
5dea0bdd
...
...
@@ -201,7 +201,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
"-DCMAKE_GENERATOR_PLATFORM=x64"
)
ENDIF
()
SET
(
PROTOBUF_REPO
"https://github.com/
google
/protobuf.git"
)
SET
(
PROTOBUF_REPO
"https://github.com/
protocolbuffers
/protobuf.git"
)
SET
(
PROTOBUF_TAG
"9f75c5aa851cd877fb0d93ccc31b8567a6706546"
)
ExternalProject_Add
(
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
5dea0bdd
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
...
...
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
5dea0bdd
...
...
@@ -34,6 +34,7 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
}
void
ActivationOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ class ActivationOpConverter : public AnakinOpConverter {
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ActivationOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
5dea0bdd
...
...
@@ -29,6 +29,7 @@ namespace inference {
namespace
anakin
{
void
BatchNormOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class BatchNormOpConverter : public AnakinOpConverter {
BatchNormOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
BatchNormOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
5dea0bdd
...
...
@@ -29,6 +29,7 @@ namespace inference {
namespace
anakin
{
void
ConcatOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class ConcatOpConverter : public AnakinOpConverter {
ConcatOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ConcatOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
5dea0bdd
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
Conv2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class Conv2dOpConverter : public AnakinOpConverter {
Conv2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
5dea0bdd
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
Conv2dFusionOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class Conv2dFusionOpConverter : public AnakinOpConverter {
Conv2dFusionOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dFusionOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
5dea0bdd
...
...
@@ -27,9 +27,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
DensityPriorBoxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
DensityPriorBoxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
image_name
=
op_desc
.
Input
(
"Image"
).
front
();
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ class DensityPriorBoxOpConverter : public AnakinOpConverter {
DensityPriorBoxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DensityPriorBoxOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
5dea0bdd
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
DetectionOutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ class DetectionOutOpConverter : public AnakinOpConverter {
DetectionOutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DetectionOutOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
5dea0bdd
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
DropoutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class DropoutOpConverter : public AnakinOpConverter {
DropoutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DropoutOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
5dea0bdd
...
...
@@ -30,9 +30,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
ElementwiseAddOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
ElementwiseAddOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
...
...
@@ -50,9 +50,9 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
}
void
ElementwiseMulOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
void
ElementwiseMulOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
...
...
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
ElementwiseAddOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseAddOpConverter
()
{}
...
...
@@ -37,6 +38,7 @@ class ElementwiseMulOpConverter : public AnakinOpConverter {
ElementwiseMulOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseMulOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ namespace inference {
namespace
anakin
{
void
FcBaseOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class FcBaseOpConverter : public AnakinOpConverter {
FcBaseOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FcBaseOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
5dea0bdd
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
FlattenOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class FlattenOpConverter : public AnakinOpConverter {
FlattenOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FlattenOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
5dea0bdd
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
Im2SequenceConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class Im2SequenceConverter : public AnakinOpConverter {
Im2SequenceConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Im2SequenceConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
5dea0bdd
...
...
@@ -40,8 +40,10 @@ class AnakinOpConverter {
AnakinOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{}
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinNvEngine
*
engine
,
bool
test_mode
=
false
)
{
...
...
@@ -58,16 +60,17 @@ class AnakinOpConverter {
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_type
);
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
(
*
it
)(
op
,
block_desc
,
scope
,
test_mode
);
}
void
ConvertBlock
(
const
framework
::
proto
::
BlockDesc
&
block
,
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinNvEngine
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
auto
&
op
=
block
.
ops
(
i
);
ConvertOp
(
op
,
parameters
,
scope
,
engine
);
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
auto
&
op
=
block
->
ops
(
i
);
ConvertOp
(
op
,
*
block_desc
,
parameters
,
scope
,
engine
);
}
}
...
...
@@ -77,9 +80,7 @@ class AnakinOpConverter {
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
AnakinNvEngine
*
engine
)
{
framework
::
proto
::
BlockDesc
*
block_proto
=
block_desc
->
Proto
();
ConvertBlock
(
*
block_proto
,
parameters
,
*
scope
,
engine
);
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
engine
->
Freeze
();
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
...
...
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
5dea0bdd
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
Pool2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class Pool2dOpConverter : public AnakinOpConverter {
Pool2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Pool2dOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
5dea0bdd
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ class ReluOpConverter : public AnakinOpConverter {
ReluOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReluOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
5dea0bdd
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ReshapeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class ReshapeOpConverter : public AnakinOpConverter {
ReshapeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReshapeOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
5dea0bdd
...
...
@@ -26,6 +26,7 @@ namespace inference {
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
5dea0bdd
...
...
@@ -27,6 +27,7 @@ class ScaleOpConverter : public AnakinOpConverter {
ScaleOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ScaleOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
5dea0bdd
...
...
@@ -24,6 +24,7 @@ namespace inference {
namespace
anakin
{
void
SoftMaxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -32,8 +33,16 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_var_desc
=
block_desc
.
FindVar
(
input
);
PADDLE_ENFORCE
(
input_var_desc
,
"Cant find %s variable When runing Anakin Softmax converter."
,
input
);
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
2
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
)
);
}
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class SoftMaxOpConverter : public AnakinOpConverter {
SoftMaxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SoftMaxOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
5dea0bdd
...
...
@@ -30,6 +30,7 @@ namespace inference {
namespace
anakin
{
void
SplitOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class SplitOpConverter : public AnakinOpConverter {
SplitOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SplitOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
5dea0bdd
...
...
@@ -31,6 +31,7 @@ namespace inference {
namespace
anakin
{
void
SumOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
...
...
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class SumOpConverter : public AnakinOpConverter {
SumOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SumOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
5dea0bdd
...
...
@@ -28,6 +28,7 @@ namespace inference {
namespace
anakin
{
void
TransposeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
5dea0bdd
...
...
@@ -25,6 +25,7 @@ class TransposeOpConverter : public AnakinOpConverter {
TransposeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
TransposeOpConverter
()
{}
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
5dea0bdd
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
...
...
@@ -112,6 +113,17 @@ class AnakinConvertValidation {
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
RandomizeTensor
(
x_tensor
,
place_
,
ctx
);
std
::
vector
<
int64_t
>
dim_vec_int64
;
for
(
auto
&
ele
:
dim_vec
)
{
dim_vec_int64
.
push_back
(
static_cast
<
int64_t
>
(
ele
));
}
// Add var_desc to block_desc
auto
*
block_desc
=
program_desc_
.
MutableBlock
(
framework
::
kRootBlockIndex
);
auto
*
var_desc
=
block_desc
->
Var
(
name
);
var_desc
->
SetShape
(
dim_vec_int64
);
}
void
SetOp
(
const
framework
::
proto
::
OpDesc
&
desc
)
{
...
...
@@ -119,8 +131,10 @@ class AnakinConvertValidation {
op_desc_
.
reset
(
new
framework
::
OpDesc
(
desc
,
nullptr
));
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
...
...
@@ -194,6 +208,7 @@ class AnakinConvertValidation {
cudaStream_t
stream_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
framework
::
ProgramDesc
program_desc_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
framework
::
Scope
*
scope_
;
platform
::
CUDAPlace
place_
;
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
5dea0bdd
...
...
@@ -91,7 +91,6 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine"
);
anakin_input
->
reshape
(
fluid_input_shape
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
5dea0bdd
...
...
@@ -168,6 +168,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
anakin_max_input_shape
,
AnakinMaxInputShape
,
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
// Memory optimized related.
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
5dea0bdd
...
...
@@ -151,13 +151,20 @@ void AnakinSubgraphPass::CreateAnakinOp(
op_desc
->
SetType
(
"anakin_engine"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
graph_var_map
[
node
->
Name
()]
=
node
;
}
}
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
RenameAndGetOutputs
(
subgraph_nodes
,
&
block_desc
,
input_names_with_id
,
&
output_names_with_id
,
&
output_names
,
&
output_name_map
,
false
);
graph_var_map
,
false
);
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
...
...
@@ -168,13 +175,6 @@ void AnakinSubgraphPass::CreateAnakinOp(
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
auto
*
vars
=
block_desc
.
Proto
()
->
mutable_vars
();
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
*
vars
->
Add
()
=
*
node
->
Var
()
->
Proto
();
}
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
浏览文件 @
5dea0bdd
...
...
@@ -60,6 +60,7 @@ void RenameAndGetOutputs(
std
::
set
<
std
::
string
>
*
output_names_with_id
,
std
::
set
<
std
::
string
>
*
output_names
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
output_name_map
,
const
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
&
graph_var_map
,
bool
is_trt
)
{
//// In the normal case, the paddle-trt exists bug when runing the googlenet.
// When there are more than two convolutions of 1 * 1 with the same input, the
...
...
@@ -69,6 +70,15 @@ void RenameAndGetOutputs(
std
::
unordered_map
<
std
::
string
/*name*/
,
int
/*ITensor_quote_num*/
>
same_hierarchy_conv2d_num_map
;
auto
add_block_var
=
[
&
](
const
std
::
string
&
graph_arg
,
const
std
::
string
&
block_arg
)
{
auto
arg_var_node
=
graph_var_map
.
find
(
graph_arg
);
PADDLE_ENFORCE
(
arg_var_node
!=
graph_var_map
.
end
());
auto
*
var_t
=
block_desc
->
Var
(
block_arg
);
var_t
->
SetShape
(
arg_var_node
->
second
->
Var
()
->
GetShape
());
var_t
->
SetDataType
(
arg_var_node
->
second
->
Var
()
->
GetDataType
());
};
for
(
size_t
index
=
0
;
index
<
block_desc
->
OpSize
();
++
index
)
{
framework
::
proto
::
OpDesc
*
op
=
block_desc
->
Op
(
index
)
->
Proto
();
framework
::
OpDesc
op_desc
(
*
op
,
nullptr
);
...
...
@@ -87,13 +97,20 @@ void RenameAndGetOutputs(
auto
*
in_var
=
op
->
mutable_inputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
// all the arguments
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
const
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
const
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
input_names_with_id
.
count
(
arg_value_with_id
))
{
replaced_names
.
push_back
(
arg_value
);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value
);
}
}
else
{
replaced_names
.
push_back
(
arg_value_with_id
);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value_with_id
);
}
}
}
in_var
->
clear_arguments
();
...
...
@@ -105,7 +122,6 @@ void RenameAndGetOutputs(
for
(
auto
out_var
:
correspond_node
->
outputs
)
{
var2id
[
out_var
->
Name
()]
=
out_var
->
id
();
}
if
(
op_desc
.
Type
()
==
"conv2d"
&&
is_trt
)
{
auto
input_var_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
filter_var_name
=
op_desc
.
Input
(
"Filter"
).
front
();
...
...
@@ -125,15 +141,18 @@ void RenameAndGetOutputs(
same_hierarchy_conv2d_num_map
[
input_var_name
]
+=
1
;
}
}
// rename for the output variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
outputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
out_var
=
op
->
mutable_outputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
out_var
->
arguments_size
();
k
++
)
{
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
const
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
const
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
graph_var_map
.
count
(
arg_value
))
{
add_block_var
(
arg_value
,
arg_value_with_id
);
}
if
(
output_names_with_id
->
count
(
arg_value_with_id
))
{
(
*
output_name_map
)[
arg_value
]
=
arg_value_with_id
;
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
浏览文件 @
5dea0bdd
...
...
@@ -42,6 +42,7 @@ void RenameAndGetOutputs(
std
::
set
<
std
::
string
>
*
output_names_with_id
,
std
::
set
<
std
::
string
>
*
output_names
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
output_name_map
,
const
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
&
graph_var_map
,
bool
is_trt
=
true
);
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
5dea0bdd
...
...
@@ -142,6 +142,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
graph_var_map
[
node
->
Name
()]
=
node
;
}
}
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
// The following procedure is used to rename all the intermediate
...
...
@@ -157,7 +164,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
RenameAndGetOutputs
(
subgraph_nodes
,
&
block_desc
,
input_names_with_id
,
&
output_names_with_id
,
&
output_names
,
&
output_name_map
);
&
output_names_with_id
,
&
output_names
,
&
output_name_map
,
graph_var_map
);
// When tensorrt engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
...
...
@@ -168,14 +176,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
auto
*
vars
=
block_desc
.
Proto
()
->
mutable_vars
();
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
*
vars
->
Add
()
=
*
node
->
Var
()
->
Proto
();
}
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
...
...
@@ -213,7 +213,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
std
::
string
trt_engine_serialized_data
=
""
;
SetAttr
(
op_desc
->
Proto
(),
"engine_serialized_data"
,
trt_engine_serialized_data
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
5dea0bdd
...
...
@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -322,6 +323,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
specify_input_name_
;
ss
<<
cpu_math_library_num_threads_
;
ss
<<
use_anakin_
;
ss
<<
anakin_min_subgraph_size_
;
return
ss
.
str
();
}
...
...
@@ -393,10 +395,11 @@ void AnalysisConfig::SwitchIrDebug(int x) {
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shap
e
)
{
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_siz
e
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
use_anakin_
=
true
;
Update
();
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
5dea0bdd
...
...
@@ -385,6 +385,7 @@ void AnalysisPredictor::PrepareArgument() {
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
5dea0bdd
...
...
@@ -151,7 +151,8 @@ struct AnalysisConfig {
*/
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
);
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -288,6 +289,7 @@ struct AnalysisConfig {
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
5dea0bdd
...
...
@@ -120,40 +120,8 @@ class AnakinEngineOp : public framework::OperatorBase {
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
.
Get
(
engine_key_
);
}
return
anakin_engine_
;
}
void
Prepare
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
,
AnakinNvEngineT
*
engine
)
const
{
LOG
(
INFO
)
<<
"Prepare Anakin engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
framework
::
proto
::
BlockDesc
block_desc
;
block_desc
.
ParseFromString
(
Attr
<
std
::
string
>
(
"subgraph"
));
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
.
ConvertBlock
(
block_desc
,
param_names_
,
scope
,
engine
);
engine
->
Freeze
();
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
auto
t_shape
=
framework
::
vectorize2int
(
t
.
dims
());
// all input shape should be 4 dims
if
(
t_shape
.
size
()
==
2
)
{
t_shape
.
push_back
(
1
);
t_shape
.
push_back
(
1
);
}
engine
->
SetInputShape
(
x
,
t_shape
);
}
engine
->
Optimize
();
engine
->
InitGraph
();
}
};
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录