Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
a25331bc
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a25331bc
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick from feature/anakin-engine: deal the changing shape when using anakin #16189
上级
c79f06d3
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
391 addition
and
41 deletion
+391
-41
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+2
-1
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
...d/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
+85
-0
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
...id/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
+35
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+23
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+15
-0
paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
...id/framework/ir/simplify_anakin_detection_pattern_pass.cc
+4
-0
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+5
-1
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+52
-12
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+56
-0
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+37
-0
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+4
-0
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+18
-6
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+19
-7
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+1
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+7
-3
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+6
-3
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+0
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+4
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-0
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+2
-5
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
a25331bc
...
...
@@ -72,6 +72,7 @@ pass_library(identity_scale_op_clean_pass base)
pass_library
(
sync_batch_norm_pass base
)
pass_library
(
runtime_context_cache_pass base
)
pass_library
(
simplify_anakin_detection_pattern_pass inference
)
pass_library
(
anakin_fillconstant_elementwisemul_fuse inference
)
# There may be many transpose-flatten structures in a model, and the output of
# these structures will be used as inputs to the concat Op. This pattern will
...
...
@@ -82,7 +83,7 @@ foreach (index RANGE 3 6)
file
(
APPEND
${
pass_file
}
"USE_PASS(transpose_flatten
${
index
}
_concat_fuse_pass);
\n
"
)
endforeach
()
foreach
(
index RANGE
3
6
)
foreach
(
index RANGE
2
6
)
file
(
APPEND
${
pass_file
}
"USE_PASS(simplify_anakin_detection_pattern_pass
${
index
}
);
\n
"
)
endforeach
()
...
...
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);
std
::
unique_ptr
<
ir
::
Graph
>
AnakinFillconstantElementwisemulFuse
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
pattern_name
=
"anakin_fillconstant_elementwisemul_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"X"
)
->
AsInput
();
patterns
::
AnakinFillConstantElementWiseMulFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
GET_NODES
;
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
elementwise_in
=
subgraph
.
at
(
x
);
float
constant_value
=
boost
::
get
<
float
>
(
fill_constant
->
Op
()
->
GetAttr
(
"value"
));
framework
::
OpDesc
new_op_desc
;
new_op_desc
.
SetType
(
"scale"
);
new_op_desc
.
SetInput
(
"X"
,
{
elementwise_in
->
Name
()});
new_op_desc
.
SetAttr
(
"scale"
,
constant_value
);
new_op_desc
.
SetAttr
(
"bias"
,
static_cast
<
float
>
(
0.0
));
new_op_desc
.
SetAttr
(
"bias_after_scale"
,
true
);
new_op_desc
.
SetOutput
(
"Out"
,
{
elementwise_mul_out
->
Name
()});
new_op_desc
.
Flush
();
// Create a new node for the fused op.
auto
*
scale_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
IR_NODE_LINK_TO
(
elementwise_in
,
scale_op
);
// Input
IR_NODE_LINK_TO
(
scale_op
,
elementwise_mul_out
);
// Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
fill_constant
,
fill_constant_out
,
elementwise_mul
});
};
gpd
(
graph
.
get
(),
handler
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
anakin_fillconstant_elementwisemul_fuse
,
paddle
::
framework
::
ir
::
AnakinFillconstantElementwisemulFuse
);
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
AnakinFillconstantElementwisemulFuse
:
public
FusePassBase
{
public:
virtual
~
AnakinFillconstantElementwisemulFuse
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
a25331bc
...
...
@@ -1596,6 +1596,29 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
return
multiclass_nms_out
;
}
PDNode
*
patterns
::
AnakinFillConstantElementWiseMulFuse
::
operator
()(
PDNode
*
elementwise_op_input
)
{
auto
fill_constant
=
pattern
->
NewNode
(
fill_constant_repr
())
->
assert_is_op
(
"fill_constant"
);
auto
fill_constant_out
=
pattern
->
NewNode
(
fill_constant_out_repr
())
->
assert_is_op_output
(
"fill_constant"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"Y"
)
->
AsIntermediate
();
auto
elementwise_mul_op
=
pattern
->
NewNode
(
elementwise_mul_repr
())
->
assert_is_op
(
"elementwise_mul"
);
auto
elementwise_mul_out
=
pattern
->
NewNode
(
elementwise_mul_out_repr
())
->
assert_is_op_output
(
"elementwise_mul"
)
->
AsOutput
();
fill_constant_out
->
LinksFrom
({
fill_constant
});
elementwise_mul_op
->
LinksFrom
({
elementwise_op_input
,
fill_constant_out
});
elementwise_mul_out
->
LinksFrom
({
elementwise_mul_op
});
return
elementwise_mul_out
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
a25331bc
...
...
@@ -856,6 +856,21 @@ struct AnakinDetectionPattern : public PatternBase {
}
};
struct
AnakinFillConstantElementWiseMulFuse
:
public
PatternBase
{
AnakinFillConstantElementWiseMulFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"anakin_fillconstant_elementwisemul_fuse"
)
{}
PDNode
*
operator
()(
PDNode
*
elementwise_op_input
);
// declare operator node's name
PATTERN_DECL_NODE
(
fill_constant
);
PATTERN_DECL_NODE
(
fill_constant_out
);
PATTERN_DECL_NODE
(
elementwise_mul
);
PATTERN_DECL_NODE
(
elementwise_mul_out
);
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
浏览文件 @
a25331bc
...
...
@@ -215,6 +215,7 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
}
template
class
SimplifyAnakinDetectionPatternPass
<
1
>;
template
class
SimplifyAnakinDetectionPatternPass
<
2
>;
template
class
SimplifyAnakinDetectionPatternPass
<
3
>;
template
class
SimplifyAnakinDetectionPatternPass
<
4
>;
template
class
SimplifyAnakinDetectionPatternPass
<
5
>;
...
...
@@ -227,6 +228,9 @@ template class SimplifyAnakinDetectionPatternPass<6>;
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
1
>
);
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass2
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
2
>
);
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass3
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
3
>
);
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
a25331bc
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc DEPS anakin_engine framework_proto scope op_registry
)
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv
)
cc_test
(
test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter
)
...
...
@@ -13,3 +16,4 @@ cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter res
cc_test
(
test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op
)
cc_test
(
test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op
)
cc_test
(
test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op
)
cc_test
(
test_anakin_scale SRCS test_scale_op.cc DEPS anakin_op_converter scale_op math_function
)
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
a25331bc
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
...
...
@@ -72,32 +73,71 @@ class AnakinOpConverter {
// The scope here should be inited with the parameter vars.
void
ConvertBlockToAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
AnakinNvEngine
*
engine
)
{
framework
::
proto
::
BlockDesc
*
block_proto
=
block_desc
->
Proto
();
ConvertBlock
(
*
block_proto
,
parameters
,
scope
,
engine
);
ConvertBlock
(
*
block_proto
,
parameters
,
*
scope
,
engine
);
engine
->
Freeze
();
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
PADDLE_ENFORCE
(
max_batch_size
>
0
,
"the max_batch_size setted from config->EnableAnakinEngine "
"must largger than 0"
);
// If the user does not specify this variable, we use the input shape from
// the block_desc.
auto
max_input_shape
=
engine
->
GetMaxInputShape
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
auto
var_shape
=
var
->
GetShape
();
PADDLE_ENFORCE
(
var_shape
.
size
()
==
4
);
std
::
vector
<
int
>
input_shape
;
for
(
int
i
=
0
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
.
push_back
(
var_shape
[
i
]);
input_shape
.
resize
(
4
);
input_shape
[
0
]
=
max_batch_size
;
if
(
max_input_shape
.
count
(
input
))
{
PADDLE_ENFORCE
(
max_input_shape
[
input
].
size
()
==
4
,
"the dimensions of max_input_shape setted from "
"config->EnableAnakinEngine must be 4"
);
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
input_shape
[
i
]
=
max_input_shape
[
input
][
i
];
}
}
else
{
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
auto
var_shape
=
var
->
GetShape
();
std
::
cout
<<
"input :"
<<
input
<<
std
::
endl
;
PADDLE_ENFORCE
(
var_shape
.
size
()
==
4
);
for
(
size_t
i
=
1
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
[
i
]
=
var_shape
[
i
];
}
}
input_shape
[
0
]
=
engine
->
GetMaxBatch
();
temp_max_input_shape
[
input
]
=
input_shape
;
engine
->
SetInputShape
(
input
,
input_shape
);
// engine->Graph()->RegistVar(input); // For share from data.
}
engine
->
SetMaxInputShape
(
temp_max_input_shape
);
// engine->Graph()->RegistAllOut();
engine
->
Optimize
();
engine
->
InitGraph
();
/*
for(auto& input : inputs) {
platform::CUDAPlace gpu_place(engine->GetDevice());
auto input_var = scope->Var();
auto input_tensor = input_var->GetMutable<framework::LoDTensor>();
auto input_max_shape = temp_max_input_shape[input];
input_tensor->Resize(framework::make_ddim(input_max_shape));
auto input_data = input_tensor->mutable_data<float>(gpu_place);
auto* anakin_input = engine->Net()->get_in(input);
::anakin::saber::Tensor<::anakin::saber::NV> tmp_anakin_tensor(input_data,
::anakin::saber::NV(), 0, input_max_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
*/
}
void
SetEngine
(
AnakinNvEngine
*
engine
)
{
engine_
=
engine
;
}
...
...
paddle/fluid/inference/anakin/convert/scale.cc
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/scale.h"
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"scale"
));
float
bias
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"bias"
));
float
bias_after_scale
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"bias_after_scale"
));
PADDLE_ENFORCE
(
bias_after_scale
,
"The anakin scale layer only support bias after scale now."
);
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
paddle/fluid/inference/anakin/convert/scale.h
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
ScaleOpConverter
:
public
AnakinOpConverter
{
public:
ScaleOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ScaleOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
a25331bc
...
...
@@ -122,6 +122,8 @@ class AnakinConvertValidation {
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
desc
,
parameters_
,
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope_
,
...
...
@@ -131,7 +133,9 @@ class AnakinConvertValidation {
t_shape
.
push_back
(
1
);
}
engine_
->
SetInputShape
(
input
,
t_shape
);
temp_max_input_shape
[
input
]
=
t_shape
;
}
engine_
->
SetMaxInputShape
(
temp_max_input_shape
);
engine_
->
Optimize
();
engine_
->
InitGraph
();
}
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
a25331bc
...
...
@@ -33,13 +33,14 @@ namespace inference {
namespace
anakin
{
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
devic
e
,
int
max_batch_siz
e
)
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
device
,
int
max_batch_siz
e
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shap
e
)
:
graph_
(
new
AnakinGraphT
<
TargetT
,
PrecisionType
>
()),
net_
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
need_summary
))
{
device_
=
device
;
max_batch_size_
=
max_batch_size
;
max_input_shape_
=
max_input_shape
;
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
@@ -75,20 +76,31 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto
*
data
=
tensor
->
data
<
float
>
();
auto
fluid_input_shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
while
(
fluid_input_shape
.
size
()
<
4
)
{
fluid_input_shape
.
push_back
(
1
);
}
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
auto
net_shape
=
anakin_input
->
shape
();
std
::
vector
<
int
>
max_input_shape
=
max_input_shape_
[
input
.
first
];
int
max_shape_sum
=
std
::
accumulate
(
max_input_shape
.
begin
(),
max_input_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
PADDLE_ENFORCE
(
max_shape_sum
>=
tensor
->
numel
(),
"The anakin input max shape should be greater than"
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine"
);
/*
if (tensor->numel() > net_shape.count()) {
graph_->Reshape(input.first, fluid_input_shape);
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(true));
net_->init(*graph_);
anakin_input = net_->get_in(input.first);
}
*/
anakin_input
->
reshape
(
fluid_input_shape
);
net_shape
=
anakin_input
->
shape
();
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
// net_shape);
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
a25331bc
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <algorithm>
#include <functional>
#include <map>
#include <memory>
#include <string>
...
...
@@ -55,8 +56,9 @@ class AnakinEngine {
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
public:
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
);
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
~
AnakinEngine
();
void
InitGraph
();
void
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
);
...
...
@@ -73,10 +75,17 @@ class AnakinEngine {
NetT
*
Net
()
{
return
net_
.
get
();
}
GraphT
*
Graph
()
{
return
graph_
.
get
();
}
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
&
GetMaxInputShape
()
{
return
max_input_shape_
;
}
void
SetMaxInputShape
(
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
shape
)
{
max_input_shape_
=
shape
;
}
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Optimize
();
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
int
Get
MaxBatch
()
{
return
max_batch_siz
e_
;
}
int
Get
Device
()
{
return
devic
e_
;
}
// void SaveSerializedData(std::string& data) { graph_->save_to_string(data);
// }
// void LoadSerializedData(const std::string& data) {
...
...
@@ -87,6 +96,7 @@ class AnakinEngine {
private:
int
max_batch_size_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
;
int
device_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
...
...
@@ -104,11 +114,13 @@ class AnakinEngineManager {
return
engines_
.
at
(
name
).
get
();
}
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
string
engine_name
)
{
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_siz
e
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shap
e
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
...
...
paddle/fluid/inference/anakin/op_teller.cc
浏览文件 @
a25331bc
...
...
@@ -38,6 +38,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set
.
insert
(
"transpose2"
);
teller_set
.
insert
(
"density_prior_box"
);
teller_set
.
insert
(
"detection_out"
);
teller_set
.
insert
(
"scale"
);
}
bool
operator
()(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
a25331bc
...
...
@@ -57,6 +57,7 @@ struct Argument {
using
unique_ptr_t
=
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
;
using
fusion_statis_t
=
std
::
unordered_map
<
std
::
string
,
int
>
;
using
engine_opt_info_t
=
std
::
map
<
std
::
string
,
std
::
string
>
;
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
...
...
@@ -150,6 +151,8 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
tensorrt_use_static_engine
,
TensorRtUseStaticEngine
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_max_input_shape
,
AnakinMaxInputShape
,
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
a25331bc
...
...
@@ -77,6 +77,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
argument
->
engine_opt_info
()));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
}
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
a25331bc
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
...
...
@@ -256,11 +257,14 @@ void AnakinSubgraphPass::CreateAnakinOp(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
predictor_id
));
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
int
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
*
anakin_engine
=
inference
::
Singleton
<
anakin
::
AnakinEngineManager
>::
Global
().
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
engine_key
);
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
engine_key
);
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
...
@@ -268,7 +272,7 @@ void AnakinSubgraphPass::CreateAnakinOp(
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
*
scope
,
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
a25331bc
...
...
@@ -214,13 +214,16 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
to_string
(
0
));
// Get "" when there is no cached calibration table data.
std
::
string
calibration_data
=
GetTrtCalibTableData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
std
::
string
calibration_data
=
""
;
if
(
!
load_from_memory
)
{
calibration_data
=
GetTrtCalibTableData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
}
SetAttr
(
op_desc
->
Proto
(),
"calibration_data"
,
calibration_data
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
std
::
string
trt_engine_serialized_data
=
""
;
if
(
load_from_memory
)
{
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info
=
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
a25331bc
...
...
@@ -30,7 +30,6 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary.
if
(
!
argument
->
use_gpu
())
return
;
return
;
auto
&
graph
=
argument
->
main_graph
();
std
::
vector
<
std
::
string
>
repetitive_params
;
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
a25331bc
...
...
@@ -111,6 +111,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -355,8 +356,11 @@ void AnalysisConfig::SwitchIrDebug(int x) {
ir_debug_
=
x
;
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
)
{
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
use_anakin_
=
true
;
Update
();
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
a25331bc
...
...
@@ -380,6 +380,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
@@ -835,3 +836,4 @@ USE_ANAKIN_CONVERTER(softmax);
USE_ANAKIN_CONVERTER
(
detection_out
);
USE_ANAKIN_CONVERTER
(
density_prior_box
);
USE_ANAKIN_CONVERTER
(
scale
);
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
a25331bc
...
...
@@ -145,7 +145,9 @@ struct AnalysisConfig {
/**
* \brief Turn on the usage of Anakin sub-graph engine.
*/
void
EnableAnakinEngine
(
int
max_batch_size
=
1
);
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -271,6 +273,7 @@ struct AnalysisConfig {
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
a25331bc
...
...
@@ -71,7 +71,11 @@ void GpuPassStrategy::EnableMKLDNN() {
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"simplify_anakin_detection_pattern_pass5"
,
//
"simplify_anakin_detection_pattern_pass4"
,
//
"simplify_anakin_detection_pattern_pass3"
,
//
"simplify_anakin_detection_pattern_pass2"
,
//
"anakin_fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
a25331bc
...
...
@@ -97,6 +97,7 @@ class AnakinEngineOp : public framework::OperatorBase {
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
/*
auto t_shape = framework::vectorize(t.dims());
auto *anakin_input = engine->Net()->get_in(x);
auto net_shape = anakin_input->shape();
...
...
@@ -112,20 +113,16 @@ class AnakinEngineOp : public framework::OperatorBase {
t.mutable_data<float>(dev_place);
TensorCopySync(temp_t, dev_place, &t);
}
*/
inputs
.
insert
({
x
,
&
t
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
int
output_index
=
0
;
for
(
const
auto
&
y
:
Outputs
(
"Ys"
))
{
// std::vector<int> ddim =
// engine->Net()->get_out(output_maps[output_index])->valid_shape();
// we need get the output anakin output shape.
auto
*
fluid_v
=
scope
.
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
// fluid_t->Resize(framework::make_ddim(ddim));
// fluid_t->mutable_data<float>(boost::get<platform::CUDAPlace>(dev_place));
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录