Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
a25331bc
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a25331bc
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick from feature/anakin-engine: deal the changing shape when using anakin #16189
上级
c79f06d3
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
391 addition
and
41 deletion
+391
-41
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+2
-1
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
...d/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
+85
-0
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
...id/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
+35
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+23
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+15
-0
paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
...id/framework/ir/simplify_anakin_detection_pattern_pass.cc
+4
-0
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+5
-1
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+52
-12
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+56
-0
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+37
-0
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+4
-0
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+18
-6
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+19
-7
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+1
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+7
-3
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+6
-3
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+0
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+4
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-0
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+2
-5
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
a25331bc
...
...
@@ -72,6 +72,7 @@ pass_library(identity_scale_op_clean_pass base)
pass_library
(
sync_batch_norm_pass base
)
pass_library
(
runtime_context_cache_pass base
)
pass_library
(
simplify_anakin_detection_pattern_pass inference
)
pass_library
(
anakin_fillconstant_elementwisemul_fuse inference
)
# There may be many transpose-flatten structures in a model, and the output of
# these structures will be used as inputs to the concat Op. This pattern will
...
...
@@ -82,7 +83,7 @@ foreach (index RANGE 3 6)
file
(
APPEND
${
pass_file
}
"USE_PASS(transpose_flatten
${
index
}
_concat_fuse_pass);
\n
"
)
endforeach
()
foreach
(
index RANGE
3
6
)
foreach
(
index RANGE
2
6
)
file
(
APPEND
${
pass_file
}
"USE_PASS(simplify_anakin_detection_pattern_pass
${
index
}
);
\n
"
)
endforeach
()
...
...
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);
std
::
unique_ptr
<
ir
::
Graph
>
AnakinFillconstantElementwisemulFuse
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
pattern_name
=
"anakin_fillconstant_elementwisemul_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"X"
)
->
AsInput
();
patterns
::
AnakinFillConstantElementWiseMulFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
GET_NODES
;
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
elementwise_in
=
subgraph
.
at
(
x
);
float
constant_value
=
boost
::
get
<
float
>
(
fill_constant
->
Op
()
->
GetAttr
(
"value"
));
framework
::
OpDesc
new_op_desc
;
new_op_desc
.
SetType
(
"scale"
);
new_op_desc
.
SetInput
(
"X"
,
{
elementwise_in
->
Name
()});
new_op_desc
.
SetAttr
(
"scale"
,
constant_value
);
new_op_desc
.
SetAttr
(
"bias"
,
static_cast
<
float
>
(
0.0
));
new_op_desc
.
SetAttr
(
"bias_after_scale"
,
true
);
new_op_desc
.
SetOutput
(
"Out"
,
{
elementwise_mul_out
->
Name
()});
new_op_desc
.
Flush
();
// Create a new node for the fused op.
auto
*
scale_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
IR_NODE_LINK_TO
(
elementwise_in
,
scale_op
);
// Input
IR_NODE_LINK_TO
(
scale_op
,
elementwise_mul_out
);
// Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
fill_constant
,
fill_constant_out
,
elementwise_mul
});
};
gpd
(
graph
.
get
(),
handler
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
anakin_fillconstant_elementwisemul_fuse
,
paddle
::
framework
::
ir
::
AnakinFillconstantElementwisemulFuse
);
paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
AnakinFillconstantElementwisemulFuse
:
public
FusePassBase
{
public:
virtual
~
AnakinFillconstantElementwisemulFuse
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
a25331bc
...
...
@@ -1596,6 +1596,29 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
return
multiclass_nms_out
;
}
PDNode
*
patterns
::
AnakinFillConstantElementWiseMulFuse
::
operator
()(
PDNode
*
elementwise_op_input
)
{
auto
fill_constant
=
pattern
->
NewNode
(
fill_constant_repr
())
->
assert_is_op
(
"fill_constant"
);
auto
fill_constant_out
=
pattern
->
NewNode
(
fill_constant_out_repr
())
->
assert_is_op_output
(
"fill_constant"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"Y"
)
->
AsIntermediate
();
auto
elementwise_mul_op
=
pattern
->
NewNode
(
elementwise_mul_repr
())
->
assert_is_op
(
"elementwise_mul"
);
auto
elementwise_mul_out
=
pattern
->
NewNode
(
elementwise_mul_out_repr
())
->
assert_is_op_output
(
"elementwise_mul"
)
->
AsOutput
();
fill_constant_out
->
LinksFrom
({
fill_constant
});
elementwise_mul_op
->
LinksFrom
({
elementwise_op_input
,
fill_constant_out
});
elementwise_mul_out
->
LinksFrom
({
elementwise_mul_op
});
return
elementwise_mul_out
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
a25331bc
...
...
@@ -856,6 +856,21 @@ struct AnakinDetectionPattern : public PatternBase {
}
};
struct
AnakinFillConstantElementWiseMulFuse
:
public
PatternBase
{
AnakinFillConstantElementWiseMulFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"anakin_fillconstant_elementwisemul_fuse"
)
{}
PDNode
*
operator
()(
PDNode
*
elementwise_op_input
);
// declare operator node's name
PATTERN_DECL_NODE
(
fill_constant
);
PATTERN_DECL_NODE
(
fill_constant_out
);
PATTERN_DECL_NODE
(
elementwise_mul
);
PATTERN_DECL_NODE
(
elementwise_mul_out
);
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
浏览文件 @
a25331bc
...
...
@@ -215,6 +215,7 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
}
template
class
SimplifyAnakinDetectionPatternPass
<
1
>;
template
class
SimplifyAnakinDetectionPatternPass
<
2
>;
template
class
SimplifyAnakinDetectionPatternPass
<
3
>;
template
class
SimplifyAnakinDetectionPatternPass
<
4
>;
template
class
SimplifyAnakinDetectionPatternPass
<
5
>;
...
...
@@ -227,6 +228,9 @@ template class SimplifyAnakinDetectionPatternPass<6>;
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
1
>
);
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass2
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
2
>
);
REGISTER_PASS
(
simplify_anakin_detection_pattern_pass3
,
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
<
3
>
);
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
a25331bc
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc DEPS anakin_engine framework_proto scope op_registry
)
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv
)
cc_test
(
test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter
)
...
...
@@ -13,3 +16,4 @@ cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter res
cc_test
(
test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op
)
cc_test
(
test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op
)
cc_test
(
test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op
)
cc_test
(
test_anakin_scale SRCS test_scale_op.cc DEPS anakin_op_converter scale_op math_function
)
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
a25331bc
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
...
...
@@ -72,32 +73,71 @@ class AnakinOpConverter {
// The scope here should be inited with the parameter vars.
void
ConvertBlockToAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
AnakinNvEngine
*
engine
)
{
framework
::
proto
::
BlockDesc
*
block_proto
=
block_desc
->
Proto
();
ConvertBlock
(
*
block_proto
,
parameters
,
scope
,
engine
);
ConvertBlock
(
*
block_proto
,
parameters
,
*
scope
,
engine
);
engine
->
Freeze
();
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
PADDLE_ENFORCE
(
max_batch_size
>
0
,
"the max_batch_size setted from config->EnableAnakinEngine "
"must largger than 0"
);
// If the user does not specify this variable, we use the input shape from
// the block_desc.
auto
max_input_shape
=
engine
->
GetMaxInputShape
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
auto
var_shape
=
var
->
GetShape
();
PADDLE_ENFORCE
(
var_shape
.
size
()
==
4
);
std
::
vector
<
int
>
input_shape
;
for
(
int
i
=
0
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
.
push_back
(
var_shape
[
i
]);
input_shape
.
resize
(
4
);
input_shape
[
0
]
=
max_batch_size
;
if
(
max_input_shape
.
count
(
input
))
{
PADDLE_ENFORCE
(
max_input_shape
[
input
].
size
()
==
4
,
"the dimensions of max_input_shape setted from "
"config->EnableAnakinEngine must be 4"
);
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
input_shape
[
i
]
=
max_input_shape
[
input
][
i
];
}
}
else
{
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
auto
var_shape
=
var
->
GetShape
();
std
::
cout
<<
"input :"
<<
input
<<
std
::
endl
;
PADDLE_ENFORCE
(
var_shape
.
size
()
==
4
);
for
(
size_t
i
=
1
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
[
i
]
=
var_shape
[
i
];
}
}
input_shape
[
0
]
=
engine
->
GetMaxBatch
();
temp_max_input_shape
[
input
]
=
input_shape
;
engine
->
SetInputShape
(
input
,
input_shape
);
// engine->Graph()->RegistVar(input); // For share from data.
}
engine
->
SetMaxInputShape
(
temp_max_input_shape
);
// engine->Graph()->RegistAllOut();
engine
->
Optimize
();
engine
->
InitGraph
();
/*
for(auto& input : inputs) {
platform::CUDAPlace gpu_place(engine->GetDevice());
auto input_var = scope->Var();
auto input_tensor = input_var->GetMutable<framework::LoDTensor>();
auto input_max_shape = temp_max_input_shape[input];
input_tensor->Resize(framework::make_ddim(input_max_shape));
auto input_data = input_tensor->mutable_data<float>(gpu_place);
auto* anakin_input = engine->Net()->get_in(input);
::anakin::saber::Tensor<::anakin::saber::NV> tmp_anakin_tensor(input_data,
::anakin::saber::NV(), 0, input_max_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
*/
}
void
SetEngine
(
AnakinNvEngine
*
engine
)
{
engine_
=
engine
;
}
...
...
paddle/fluid/inference/anakin/convert/scale.cc
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/scale.h"
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"scale"
));
float
bias
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"bias"
));
float
bias_after_scale
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"bias_after_scale"
));
PADDLE_ENFORCE
(
bias_after_scale
,
"The anakin scale layer only support bias after scale now."
);
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
paddle/fluid/inference/anakin/convert/scale.h
0 → 100644
浏览文件 @
a25331bc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
ScaleOpConverter
:
public
AnakinOpConverter
{
public:
ScaleOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ScaleOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
a25331bc
...
...
@@ -122,6 +122,8 @@ class AnakinConvertValidation {
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
desc
,
parameters_
,
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope_
,
...
...
@@ -131,7 +133,9 @@ class AnakinConvertValidation {
t_shape
.
push_back
(
1
);
}
engine_
->
SetInputShape
(
input
,
t_shape
);
temp_max_input_shape
[
input
]
=
t_shape
;
}
engine_
->
SetMaxInputShape
(
temp_max_input_shape
);
engine_
->
Optimize
();
engine_
->
InitGraph
();
}
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
a25331bc
...
...
@@ -33,13 +33,14 @@ namespace inference {
namespace
anakin
{
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
devic
e
,
int
max_batch_siz
e
)
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
device
,
int
max_batch_siz
e
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shap
e
)
:
graph_
(
new
AnakinGraphT
<
TargetT
,
PrecisionType
>
()),
net_
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
need_summary
))
{
device_
=
device
;
max_batch_size_
=
max_batch_size
;
max_input_shape_
=
max_input_shape
;
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
@@ -75,20 +76,31 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto
*
data
=
tensor
->
data
<
float
>
();
auto
fluid_input_shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
while
(
fluid_input_shape
.
size
()
<
4
)
{
fluid_input_shape
.
push_back
(
1
);
}
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
auto
net_shape
=
anakin_input
->
shape
();
std
::
vector
<
int
>
max_input_shape
=
max_input_shape_
[
input
.
first
];
int
max_shape_sum
=
std
::
accumulate
(
max_input_shape
.
begin
(),
max_input_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
PADDLE_ENFORCE
(
max_shape_sum
>=
tensor
->
numel
(),
"The anakin input max shape should be greater than"
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine"
);
/*
if (tensor->numel() > net_shape.count()) {
graph_->Reshape(input.first, fluid_input_shape);
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(true));
net_->init(*graph_);
anakin_input = net_->get_in(input.first);
}
*/
anakin_input
->
reshape
(
fluid_input_shape
);
net_shape
=
anakin_input
->
shape
();
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
// net_shape);
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
a25331bc
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <algorithm>
#include <functional>
#include <map>
#include <memory>
#include <string>
...
...
@@ -55,8 +56,9 @@ class AnakinEngine {
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
public:
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
);
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
~
AnakinEngine
();
void
InitGraph
();
void
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
);
...
...
@@ -73,10 +75,17 @@ class AnakinEngine {
NetT
*
Net
()
{
return
net_
.
get
();
}
GraphT
*
Graph
()
{
return
graph_
.
get
();
}
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
&
GetMaxInputShape
()
{
return
max_input_shape_
;
}
void
SetMaxInputShape
(
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
shape
)
{
max_input_shape_
=
shape
;
}
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Optimize
();
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
int
Get
MaxBatch
()
{
return
max_batch_siz
e_
;
}
int
Get
Device
()
{
return
devic
e_
;
}
// void SaveSerializedData(std::string& data) { graph_->save_to_string(data);
// }
// void LoadSerializedData(const std::string& data) {
...
...
@@ -87,6 +96,7 @@ class AnakinEngine {
private:
int
max_batch_size_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
;
int
device_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
...
...
@@ -104,11 +114,13 @@ class AnakinEngineManager {
return
engines_
.
at
(
name
).
get
();
}
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
string
engine_name
)
{
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_siz
e
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shap
e
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
...
...
paddle/fluid/inference/anakin/op_teller.cc
浏览文件 @
a25331bc
...
...
@@ -38,6 +38,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set
.
insert
(
"transpose2"
);
teller_set
.
insert
(
"density_prior_box"
);
teller_set
.
insert
(
"detection_out"
);
teller_set
.
insert
(
"scale"
);
}
bool
operator
()(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
a25331bc
...
...
@@ -57,6 +57,7 @@ struct Argument {
using
unique_ptr_t
=
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
;
using
fusion_statis_t
=
std
::
unordered_map
<
std
::
string
,
int
>
;
using
engine_opt_info_t
=
std
::
map
<
std
::
string
,
std
::
string
>
;
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
...
...
@@ -150,6 +151,8 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
tensorrt_use_static_engine
,
TensorRtUseStaticEngine
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_max_input_shape
,
AnakinMaxInputShape
,
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
a25331bc
...
...
@@ -77,6 +77,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
argument
->
engine_opt_info
()));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
}
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
a25331bc
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
...
...
@@ -256,11 +257,14 @@ void AnakinSubgraphPass::CreateAnakinOp(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
predictor_id
));
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
int
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
*
anakin_engine
=
inference
::
Singleton
<
anakin
::
AnakinEngineManager
>::
Global
().
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
engine_key
);
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
engine_key
);
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
...
...
@@ -268,7 +272,7 @@ void AnakinSubgraphPass::CreateAnakinOp(
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
*
scope
,
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
a25331bc
...
...
@@ -214,13 +214,16 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std
::
to_string
(
0
));
// Get "" when there is no cached calibration table data.
std
::
string
calibration_data
=
GetTrtCalibTableData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
std
::
string
calibration_data
=
""
;
if
(
!
load_from_memory
)
{
calibration_data
=
GetTrtCalibTableData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
}
SetAttr
(
op_desc
->
Proto
(),
"calibration_data"
,
calibration_data
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
std
::
string
trt_engine_serialized_data
=
""
;
if
(
load_from_memory
)
{
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info
=
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
a25331bc
...
...
@@ -30,7 +30,6 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary.
if
(
!
argument
->
use_gpu
())
return
;
return
;
auto
&
graph
=
argument
->
main_graph
();
std
::
vector
<
std
::
string
>
repetitive_params
;
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
a25331bc
...
...
@@ -111,6 +111,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -355,8 +356,11 @@ void AnalysisConfig::SwitchIrDebug(int x) {
ir_debug_
=
x
;
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
)
{
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
use_anakin_
=
true
;
Update
();
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
a25331bc
...
...
@@ -380,6 +380,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
@@ -835,3 +836,4 @@ USE_ANAKIN_CONVERTER(softmax);
USE_ANAKIN_CONVERTER
(
detection_out
);
USE_ANAKIN_CONVERTER
(
density_prior_box
);
USE_ANAKIN_CONVERTER
(
scale
);
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
a25331bc
...
...
@@ -145,7 +145,9 @@ struct AnalysisConfig {
/**
* \brief Turn on the usage of Anakin sub-graph engine.
*/
void
EnableAnakinEngine
(
int
max_batch_size
=
1
);
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -271,6 +273,7 @@ struct AnalysisConfig {
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
a25331bc
...
...
@@ -71,7 +71,11 @@ void GpuPassStrategy::EnableMKLDNN() {
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"simplify_anakin_detection_pattern_pass5"
,
//
"simplify_anakin_detection_pattern_pass4"
,
//
"simplify_anakin_detection_pattern_pass3"
,
//
"simplify_anakin_detection_pattern_pass2"
,
//
"anakin_fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
a25331bc
...
...
@@ -97,6 +97,7 @@ class AnakinEngineOp : public framework::OperatorBase {
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
/*
auto t_shape = framework::vectorize(t.dims());
auto *anakin_input = engine->Net()->get_in(x);
auto net_shape = anakin_input->shape();
...
...
@@ -112,20 +113,16 @@ class AnakinEngineOp : public framework::OperatorBase {
t.mutable_data<float>(dev_place);
TensorCopySync(temp_t, dev_place, &t);
}
*/
inputs
.
insert
({
x
,
&
t
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
int
output_index
=
0
;
for
(
const
auto
&
y
:
Outputs
(
"Ys"
))
{
// std::vector<int> ddim =
// engine->Net()->get_out(output_maps[output_index])->valid_shape();
// we need get the output anakin output shape.
auto
*
fluid_v
=
scope
.
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
// fluid_t->Resize(framework::make_ddim(ddim));
// fluid_t->mutable_data<float>(boost::get<platform::CUDAPlace>(dev_place));
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录