Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
e14ab180
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
接近 2 年 前同步成功
通知
707
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“e3c73b981914c709e65e0b10fee1dc6d51501d07”上不存在“tools/dockerfile/Dockerfile.release16”
提交
e14ab180
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick from 1662, 16797.. : add anakin int8 support
上级
7ad182e1
变更
81
隐藏空白更改
内联
并排
Showing
81 changed file
with
1103 addition
and
589 deletion
+1103
-589
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+2
-1
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+12
-13
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-1
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+19
-9
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+6
-1
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+39
-10
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+9
-8
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+24
-55
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+2
-2
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+35
-71
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+2
-2
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+19
-6
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+55
-24
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+59
-52
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+2
-2
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+21
-10
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+3
-2
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+19
-6
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+2
-2
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+21
-16
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+2
-2
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+31
-15
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+6
-4
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+83
-57
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+6
-6
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+19
-6
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+2
-2
paddle/fluid/inference/anakin/convert/helper.cc
paddle/fluid/inference/anakin/convert/helper.cc
+32
-0
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+88
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+17
-4
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+2
-2
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+57
-24
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+19
-6
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+2
-2
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+35
-10
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+4
-4
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+18
-6
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+2
-2
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+19
-11
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+2
-2
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+21
-3
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+2
-2
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+19
-6
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+2
-2
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+19
-4
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+2
-2
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+21
-7
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+2
-2
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+4
-2
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+2
-16
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+14
-6
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+2
-2
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+14
-7
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+11
-2
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+10
-3
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+6
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+5
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+42
-12
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
...fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+8
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+13
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+6
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+11
-5
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+21
-7
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+8
-2
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
e14ab180
...
@@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
...
@@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
auto
base_op_desc
=
*
mul
->
Op
()
->
Proto
();
// Create an FC Node.
// Create an FC Node.
OpDesc
desc
;
OpDesc
desc
(
base_op_desc
,
nullptr
)
;
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
e14ab180
...
@@ -1640,7 +1640,8 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
...
@@ -1640,7 +1640,8 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_type
,
const
std
::
string
&
op_type
,
const
std
::
string
&
weight_name
,
const
std
::
string
&
weight_name
,
int
times
)
{
int
times
,
const
std
::
string
&
quant_type
)
{
const
int
kNumFields
=
5
;
const
int
kNumFields
=
5
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedOpOffset
=
1
;
const
int
kQuantizedOpOffset
=
1
;
...
@@ -1648,24 +1649,22 @@ void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
...
@@ -1648,24 +1649,22 @@ void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOutOffset
=
4
;
const
int
kDequantOpOutOffset
=
4
;
// the quant op always be one.
// the quant op always be one.
auto
quant_op_in_scale
=
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
->
assert_is_op_input
(
quant_type
,
"InScale"
)
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"InScale"
)
->
AsInput
();
->
AsInput
();
auto
quant_op
=
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
quant_type
);
->
assert_is_op
(
"fake_quantize_range_abs_max"
);
auto
quant_op_out_scale
=
auto
quant_op_out_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"OutScale"
)
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"Scale"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"Scale"
)
->
AsIntermediate
();
->
AsIntermediate
();
auto
quant_op_out
=
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
quant_type
,
"Out"
)
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"Out"
)
->
assert_is_op_input
(
op_type
)
->
assert_is_op_input
(
op_type
)
->
AsIntermediate
();
->
AsIntermediate
();
// there are 'times' quantized and dequant op
// there are 'times' quantized and dequant op
std
::
vector
<
PDNode
*>
nodes
;
std
::
vector
<
PDNode
*>
nodes
;
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
e14ab180
...
@@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
...
@@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_name
,
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_name
,
const
std
::
string
&
weight_name
,
int
times
=
1
);
const
std
::
string
&
weight_name
,
int
times
,
const
std
::
string
&
quant_type
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
...
...
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
e14ab180
...
@@ -25,7 +25,8 @@ namespace framework {
...
@@ -25,7 +25,8 @@ namespace framework {
namespace
ir
{
namespace
ir
{
void
RunQuantDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
int
times
,
void
RunQuantDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
int
times
,
std
::
string
op_type
)
{
const
std
::
string
&
op_type
,
const
std
::
string
&
quant_type
)
{
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
// FusePassBase::Init(pattern_name, graph);
// FusePassBase::Init(pattern_name, graph);
const
int
kNumFields
=
5
;
const
int
kNumFields
=
5
;
...
@@ -38,7 +39,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
...
@@ -38,7 +39,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"X"
)
->
assert_is_op_input
(
quant_type
,
"X"
)
->
AsInput
();
->
AsInput
();
std
::
string
quantized_op_type
=
""
;
std
::
string
quantized_op_type
=
""
;
...
@@ -46,6 +47,9 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
...
@@ -46,6 +47,9 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
if
(
op_type
==
"conv2d"
)
{
if
(
op_type
==
"conv2d"
)
{
quantized_op_type
=
"conv2d"
;
quantized_op_type
=
"conv2d"
;
weight_name
=
"Filter"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"depthwise_conv2d"
)
{
quantized_op_type
=
"depthwise_conv2d"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"conv2d_fusion"
)
{
}
else
if
(
op_type
==
"conv2d_fusion"
)
{
quantized_op_type
=
"conv2d_fusion"
;
quantized_op_type
=
"conv2d_fusion"
;
weight_name
=
"Filter"
;
weight_name
=
"Filter"
;
...
@@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
...
@@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
}
}
patterns
::
QuantDequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
patterns
::
QuantDequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
,
quant_type
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
...
@@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
...
@@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
// max_range = (range * range) / weight_scale
float
max_range
=
boost
::
get
<
float
>
(
float
max_range
=
boost
::
get
<
float
>
(
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
GetAttr
(
"max_range"
));
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
GetAttr
(
"max_range"
));
float
weight_scale
=
(
range
*
range
)
/
max_range
;
float
weight_scale
=
(
range
*
range
)
/
max_range
;
...
@@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
...
@@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
new_op_desc
.
SetType
(
quantized_op_type
);
new_op_desc
.
SetType
(
quantized_op_type
);
if
(
quantized_op_type
==
"conv2d"
||
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
)
{
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"fc"
)
{
}
else
if
(
quantized_op_type
==
"fc"
)
{
...
@@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
...
@@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
FusePassBase
::
Init
(
pattern_name
,
graph
);
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
};
std
::
unordered_set
<
std
::
string
>
quant_types
=
{
"fake_quantize_range_abs_max"
,
"fake_quantize_moving_average_abs_max"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d"
};
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
for
(
auto
&
op_type
:
quantized_op_types
)
{
for
(
auto
&
quant_type
:
quant_types
)
{
for
(
int
i
=
1
;
i
<=
6
;
i
++
)
{
for
(
auto
&
op_type
:
quantized_op_types
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
);
for
(
int
i
=
6
;
i
>=
1
;
i
--
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
,
quant_type
);
}
}
}
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
e14ab180
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc helper.cc DEPS anakin_engine framework_proto scope op_registry
gtest
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
...
...
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
ActivationOpConverter
<
TargetT
>::
ActivationOpConverter
(
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
...
@@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
...
@@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
anakin_op_type_
=
it
->
second
;
anakin_op_type_
=
it
->
second
;
}
}
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ActivationOpConverter
<
TargetT
>::
operator
()(
void
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
...
@@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
using
sigmoid_nv_fp32
=
SigmoidOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_nv_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_nv_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
using
sigmoid_cpu_fp32
=
SigmoidOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_int8
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
...
@@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
...
@@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
{
"sigmoid"
,
"Sigmoid"
}};
{
"sigmoid"
,
"Sigmoid"
}};
};
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"tanh"
)
{}
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"tanh"
)
{}
};
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"sigmoid"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"sigmoid"
)
{}
};
};
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
e14ab180
...
@@ -16,18 +16,14 @@
...
@@ -16,18 +16,14 @@
#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
AffineChannelOpConverter
<
TargetT
>::
operator
()(
void
AffineChannelOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -35,60 +31,20 @@ void AffineChannelOpConverter<TargetT>::operator()(
...
@@ -35,60 +31,20 @@ void AffineChannelOpConverter<TargetT>::operator()(
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Copy the Scale to CPUPlace and get the pointer.
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
new
framework
::
LoDTensor
());
scale_tensor
->
Resize
(
scale_t
->
dims
());
TensorCopySync
((
*
scale_t
),
platform
::
CPUPlace
(),
scale_tensor
.
get
());
// Copy the Bias to CPUPlace and get the pointer.
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
new
framework
::
LoDTensor
());
bias_tensor
->
Resize
(
bias_t
->
dims
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Generate the Scale parameter of Anakin.
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
while
(
scale_shape
.
size
()
<
4
)
{
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
}
Shape
anakin_scale_shape
(
scale_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_scale_shape
);
float
*
scale_cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
scale_cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Generate the Bias parameter of Anakin.
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
while
(
bias_shape
.
size
()
<
4
)
{
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
bias_cpu_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
@@ -97,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
...
@@ -97,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
using
affine_channel_nv_fp32
=
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_nv_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
X86
>
);
using
affine_channel_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_cpu_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_int8
);
paddle/fluid/inference/anakin/convert/affine_channel.h
浏览文件 @
e14ab180
...
@@ -21,8 +21,8 @@ namespace paddle {
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
AffineChannelOpConverter
()
=
default
;
AffineChannelOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
e14ab180
...
@@ -18,17 +18,14 @@
...
@@ -18,17 +18,14 @@
#include <map>
#include <map>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
BatchNormOpConverter
<
TargetT
>::
operator
()(
void
BatchNormOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -36,87 +33,46 @@ void BatchNormOpConverter<TargetT>::operator()(
...
@@ -36,87 +33,46 @@ void BatchNormOpConverter<TargetT>::operator()(
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
for
(
auto
k
:
{
"X"
,
"Scale"
,
"Bias"
,
"Mean"
,
"Variance"
})
{
for
(
auto
k
:
{
"X"
,
"Scale"
,
"Bias"
,
"Mean"
,
"Variance"
})
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
k
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
k
).
size
(),
1UL
);
auto
v
=
op_desc
.
Input
(
k
).
front
();
inputs
.
insert
({
k
,
v
});
}
}
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Y"
).
front
();
auto
output
=
op_desc
.
Output
(
"Y"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Y"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Y"
).
front
();
auto
epsilon
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"epsilon"
));
auto
epsilon
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"epsilon"
));
// auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
s
[
"X"
]
},
{
bn_output
});
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
},
{
bn_output
});
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
auto
scale_op_name
=
op_name
+
":scale"
;
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
framework
::
LoDTensor
*
tensor
)
{
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
auto
*
v
=
scope
.
FindVar
(
var_name
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
PADDLE_ENFORCE_NOT_NULL
(
v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
*
t
=
v
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
t
->
dims
());
TensorCopySync
(
*
t
,
platform
::
CPUPlace
(),
tensor
);
};
framework
::
LoDTensor
bias_t
;
framework
::
LoDTensor
mean_t
;
framework
::
LoDTensor
scale_t
;
framework
::
LoDTensor
variance_t
;
get_lod_tensor
(
inputs
[
"Bias"
],
&
bias_t
);
get_lod_tensor
(
inputs
[
"Mean"
],
&
mean_t
);
get_lod_tensor
(
inputs
[
"Scale"
],
&
scale_t
);
get_lod_tensor
(
inputs
[
"Variance"
],
&
variance_t
);
auto
fill_shape
=
[](
size_t
n
,
std
::
vector
<
int
>
shape
)
{
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
shape
.
insert
(
shape
.
begin
(),
1
);
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
if
(
shape
.
size
()
<
n
)
{
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
mean_v
);
shape
.
insert
(
shape
.
end
(),
n
-
shape
.
size
(),
1
);
}
return
shape
;
};
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
weight2
=
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
*
variance_data
=
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
variance_v
);
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight3
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
1
}));
auto
*
weight3
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
float
weight3_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
auto
*
scale
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
scale_shape
);
auto
scale
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
auto
*
bias
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
}
...
@@ -125,9 +81,17 @@ void BatchNormOpConverter<TargetT>::operator()(
...
@@ -125,9 +81,17 @@ void BatchNormOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
using
bn_nv_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
BatchNormOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_nv_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
using
bn_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
BatchNormOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_cpu_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_int8
);
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
BatchNormOpConverter
()
=
default
;
BatchNormOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
e14ab180
...
@@ -19,8 +19,8 @@ namespace paddle {
...
@@ -19,8 +19,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ConcatOpConverter
<
TargetT
>::
operator
()(
void
ConcatOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -39,8 +39,21 @@ void ConcatOpConverter<TargetT>::operator()(
...
@@ -39,8 +39,21 @@ void ConcatOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
using
concat_nv_fp32
=
ConcatOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_nv_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
using
concat_cpu_fp32
=
ConcatOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_int8
);
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ConcatOpConverter
()
=
default
;
ConcatOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
e14ab180
...
@@ -16,18 +16,16 @@
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <algorithm>
#include <memory>
#include <memory>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dOpConverter
<
TargetT
>::
operator
()(
void
Conv2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -42,11 +40,8 @@ void Conv2dOpConverter<TargetT>::operator()(
...
@@ -42,11 +40,8 @@ void Conv2dOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
...
@@ -69,25 +64,61 @@ void Conv2dOpConverter<TargetT>::operator()(
...
@@ -69,25 +64,61 @@ void Conv2dOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
if
(
enable_int8
)
{
anakin_shape
);
const
float
int8_range
=
127.
;
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
X86
>
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
using
conv2d_nv_fp32
=
Conv2dOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_int8
);
#endif
#endif
using
conv2d_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
Conv2dOpConverter
()
=
default
;
Conv2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
e14ab180
...
@@ -16,18 +16,16 @@
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <algorithm>
#include <memory>
#include <memory>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dFusionOpConverter
<
TargetT
>::
operator
()(
void
Conv2dFusionOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -43,24 +41,16 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
...
@@ -43,24 +41,16 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
...
@@ -77,37 +67,42 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
...
@@ -77,37 +67,42 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
auto
*
weight1
=
if
(
enable_int8
)
{
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
const
float
int8_range
=
127.
;
anakin_shape
);
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
framework
::
LoDTensor
bias_tensor
;
bool
is_valid_int8
=
bias_tensor
.
Resize
(
b_t
->
dims
());
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
PADDLE_ENFORCE
(
is_valid_int8
,
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
"We are in anakin subgraph int8 mode, the weight of conv "
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
"should be in range [-128, 127]"
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
// bias_shape.push_back(1);
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
// bias_shape.push_back(1);
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
Shape
anakin_bias_shape
(
bias_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
auto
*
weight2
=
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
anakin_bias_shape
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
{
weight_scale
/
int8_range
},
false
);
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
}
else
{
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
// namespace anakin
}
// namespace anakin
...
@@ -115,9 +110,21 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
...
@@ -115,9 +110,21 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
using
conv2d_fusion_nv_fp32
=
Conv2dFusionOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_int8
);
#endif
#endif
using
conv2d_fusion_cpu_fp32
=
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
Conv2dFusionOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
Conv2dFusionOpConverter
()
=
default
;
Conv2dFusionOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
e14ab180
...
@@ -23,8 +23,8 @@ namespace paddle {
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DensityPriorBoxOpConverter
<
TargetT
>::
operator
()(
void
DensityPriorBoxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -109,13 +109,24 @@ void DensityPriorBoxOpConverter<TargetT>::operator()(
...
@@ -109,13 +109,24 @@ void DensityPriorBoxOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
using
ds_pr_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
using
ds_pr_nv_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
using
ds_pr_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
using
ds_pr_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_int8
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,9 @@ namespace paddle {
...
@@ -22,8 +22,9 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
DensityPriorBoxOpConverter
()
=
default
;
DensityPriorBoxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DetectionOutOpConverter
<
TargetT
>::
operator
()(
void
DetectionOutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -67,8 +67,21 @@ void DetectionOutOpConverter<TargetT>::operator()(
...
@@ -67,8 +67,21 @@ void DetectionOutOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
using
detection_out_nv_fp32
=
DetectionOutOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_nv_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
detection_out_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_int8
);
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
DetectionOutOpConverter
()
=
default
;
DetectionOutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
e14ab180
...
@@ -16,17 +16,14 @@
...
@@ -16,17 +16,14 @@
#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DropoutOpConverter
<
TargetT
>::
operator
()(
void
DropoutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -42,12 +39,7 @@ void DropoutOpConverter<TargetT>::operator()(
...
@@ -42,12 +39,7 @@ void DropoutOpConverter<TargetT>::operator()(
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
factor
}));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
...
@@ -60,8 +52,21 @@ void DropoutOpConverter<TargetT>::operator()(
...
@@ -60,8 +52,21 @@ void DropoutOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
using
dropout_nv_fp32
=
DropoutOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_nv_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
dropout_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_int8
);
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
DropoutOpConverter
()
=
default
;
DropoutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
e14ab180
...
@@ -17,17 +17,14 @@
...
@@ -17,17 +17,14 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseAddOpConverter
<
TargetT
>::
operator
()(
void
ElementwiseAddOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -48,8 +45,8 @@ void ElementwiseAddOpConverter<TargetT>::operator()(
...
@@ -48,8 +45,8 @@ void ElementwiseAddOpConverter<TargetT>::operator()(
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
}
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseMulOpConverter
<
TargetT
>::
operator
()(
void
ElementwiseMulOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -75,12 +72,31 @@ void ElementwiseMulOpConverter<TargetT>::operator()(
...
@@ -75,12 +72,31 @@ void ElementwiseMulOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
using
elet_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
using
elet_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
using
elet_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
using
elet_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_int8
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,9 @@ namespace paddle {
...
@@ -20,8 +20,9 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ElementwiseAddOpConverter
()
=
default
;
ElementwiseAddOpConverter
()
=
default
;
...
@@ -34,8 +35,9 @@ class ElementwiseAddOpConverter : public AnakinOpConverter<TargetT> {
...
@@ -34,8 +35,9 @@ class ElementwiseAddOpConverter : public AnakinOpConverter<TargetT> {
private:
private:
};
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ElementwiseMulOpConverter
()
=
default
;
ElementwiseMulOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
e14ab180
...
@@ -16,22 +16,19 @@
...
@@ -16,22 +16,19 @@
#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FcBaseOpConverter
<
TargetT
>::
operator
()(
void
FcBaseOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
=
=
3
;
bool
with_bias
=
input_names
.
size
()
>
=
3
;
std
::
string
w_name
=
"Y"
;
std
::
string
w_name
=
"Y"
;
std
::
string
i_name
=
"X"
;
std
::
string
i_name
=
"X"
;
...
@@ -45,7 +42,12 @@ void FcBaseOpConverter<TargetT>::operator()(
...
@@ -45,7 +42,12 @@ void FcBaseOpConverter<TargetT>::operator()(
// get weights
// get weights
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
y_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
int
out_dim
=
weight_shape
[
1
];
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
...
@@ -53,64 +55,58 @@ void FcBaseOpConverter<TargetT>::operator()(
...
@@ -53,64 +55,58 @@ void FcBaseOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
Shape
anakin_shape
(
weight_shape
);
framework
::
LoDTensor
weight_tensor
;
auto
*
weight_data
=
weight_tensor
->
data
<
float
>
();
weight_tensor
.
Resize
(
y_t
->
dims
());
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
->
numel
());
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
auto
*
weight_data
=
weight_tensor
.
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
.
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
.
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
->
numel
());
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
}
}
}
}
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
int
weight_num
=
weight_tensor
->
numel
();
anakin_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
if
(
enable_int8
)
{
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
if
(
weight_shape
.
size
()
<
4UL
)
{
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
}
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
((
trans_weight_data
[
i
]
>=
-
128
)
&&
(
trans_weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of fc "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
trans_weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
trans_weight_data
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
// get bias
// get bias
if
(
with_bias
)
{
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
...
@@ -120,9 +116,39 @@ void FcBaseOpConverter<TargetT>::operator()(
...
@@ -120,9 +116,39 @@ void FcBaseOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
NV
>
);
using
mul_nv_fp32
=
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_nv_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_nv_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
X86
>
);
using
mul_cpu_fp32
=
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_int8
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
FcBaseOpConverter
()
=
default
;
FcBaseOpConverter
()
=
default
;
...
@@ -33,15 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter<TargetT> {
...
@@ -33,15 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter<TargetT> {
};
};
// with bias
// with bias
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
FcOpConverter
()
=
default
;
FcOpConverter
()
=
default
;
};
};
// without bias
// without bias
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
MulOpConverter
()
=
default
;
MulOpConverter
()
=
default
;
};
};
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
e14ab180
...
@@ -21,8 +21,8 @@ namespace paddle {
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FlattenOpConverter
<
TargetT
>::
operator
()(
void
FlattenOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -46,8 +46,21 @@ void FlattenOpConverter<TargetT>::operator()(
...
@@ -46,8 +46,21 @@ void FlattenOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
using
flatten_nv_fp32
=
FlattenOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_nv_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
using
flatten_cpu_fp32
=
FlattenOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_int8
);
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
FlattenOpConverter
()
=
default
;
FlattenOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/helper.cc
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
)
{
auto
&
src
=
var
.
Get
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
dst
(
new
framework
::
LoDTensor
());
dst
->
Resize
(
src
.
dims
());
TensorCopySync
((
src
),
place
,
dst
.
get
());
return
dst
;
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/helper.h
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
PBlock
;
using
anakin
::
graph
::
GraphGlobalMem
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape
)
{
while
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
}
Shape
anakin_shape
(
shape
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
anakin_shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
)
{
int
size
=
vec
.
size
();
return
pblock_from_vector
<
T
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}));
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_var
(
const
framework
::
Variable
&
var
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
return
pblock_from_tensor
<
T
>
(
*
tensor
,
shape
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
e14ab180
...
@@ -23,8 +23,8 @@ namespace paddle {
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Im2SequenceConverter
<
TargetT
>::
operator
()(
void
Im2SequenceConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -55,5 +55,18 @@ void Im2SequenceConverter<TargetT>::operator()(
...
@@ -55,5 +55,18 @@ void Im2SequenceConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
#ifdef PADDLE_WITH_CUDA
Im2SequenceConverter
<::
anakin
::
saber
::
NV
>
);
using
im2sequence_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_nv_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_int8
);
#endif
using
im2sequence_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_int8
);
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
Im2SequenceConverter
()
=
default
;
Im2SequenceConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
e14ab180
...
@@ -32,9 +32,9 @@ namespace paddle {
...
@@ -32,9 +32,9 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinOpConverter
{
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
PrecisionT
>
;
public:
public:
AnakinOpConverter
()
=
default
;
AnakinOpConverter
()
=
default
;
...
@@ -96,6 +96,13 @@ class AnakinOpConverter {
...
@@ -96,6 +96,13 @@ class AnakinOpConverter {
engine
->
Graph
()
->
RegistVar
(
output
);
engine
->
Graph
()
->
RegistVar
(
output
);
}
}
engine
->
Freeze
();
engine
->
Freeze
();
// Add scale for tensor in int8 mode.
auto
tensor_scales
=
engine
->
GetTensorScales
();
for
(
auto
&
item
:
tensor_scales
)
{
engine
->
Graph
()
->
SetVarScale
(
item
.
first
,
item
.
second
);
}
for
(
auto
&
input
:
inputs
)
{
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
if
(
parameters
.
count
(
input
))
continue
;
std
::
vector
<
int
>
input_shape
;
std
::
vector
<
int
>
input_shape
;
...
@@ -136,52 +143,78 @@ class AnakinOpConverter {
...
@@ -136,52 +143,78 @@ class AnakinOpConverter {
AnakinEngineT
*
engine_
{
nullptr
};
AnakinEngineT
*
engine_
{
nullptr
};
private:
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
>
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
,
PrecisionT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
std
::
mutex
mutex_
;
};
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
>;
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__) \
place_type__, place_class__, \
struct anakin_##op_type__##_##place_type__##_converter \
precision_type__, precision_class__) \
struct anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
: public ::paddle::framework::Registrar { \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_##place_type__##_
converter() {
\
anakin_##op_type__##_##place_type__##_
##precision_type__##_converter() {
\
LOG(INFO) << "register convert " << #op_type__ << " "; \
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::Registry< \
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter<
place_class__>>::
\
::paddle::inference::anakin::AnakinOpConverter<
\
Global()
\
place_class__, precision_class__>>::Global()
\
.Register<::paddle::inference::anakin::Converter__>(#op_type__);
\
.Register<Converter__>(#op_type__);
\
} \
} \
}; \
}; \
anakin_##op_type__##_##place_type__##_converter \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
anakin_##op_type__##_##place_type__##_converter__; \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \
int Touch_anakin_##op_type__##_##place_type__##_##precision_type__() { \
anakin_##op_type__##_##place_type__##_converter__.Touch(); \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__ \
.Touch(); \
return 0; \
return 0; \
}
}
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV)
::anakin::saber::NV, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV, INT8, \
::anakin::Precision::INT8)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86)
::anakin::saber::X86, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86, INT8, \
::anakin::Precision::INT8)
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
)
\
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
, precision_type__)
\
extern int Touch
ConverterRegister_anakin_##op_type__##_##place
_type__(); \
extern int Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__(); \
int use_
op_converter_anakin_##op_type__##_##place_type__
\
int use_
converter_anakin_##op_type__##_##place_type__##_##precision_type__
\
__attribute__((unused)) = \
__attribute__((unused)) =
\
Touch
ConverterRegister_anakin_##op_type__##_##place
_type__();
Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__();
#define USE_ANAKIN_CONVERTER(op_type__) \
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA)
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU)
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
e14ab180
...
@@ -23,8 +23,8 @@ namespace paddle {
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Pool2dOpConverter
<
TargetT
>::
operator
()(
void
Pool2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -72,8 +72,21 @@ void Pool2dOpConverter<TargetT>::operator()(
...
@@ -72,8 +72,21 @@ void Pool2dOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
using
pool2d_nv_float32
=
Pool2dOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_float32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
X86
>
);
using
pool2d_cpu_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_float32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
Pool2dOpConverter
()
=
default
;
Pool2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReluOpConverter
<
TargetT
>::
operator
()(
void
ReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -36,8 +36,8 @@ void ReluOpConverter<TargetT>::operator()(
...
@@ -36,8 +36,8 @@ void ReluOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
}
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
LeakyReluOpConverter
<
TargetT
>::
operator
()(
void
LeakyReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -58,10 +58,35 @@ void LeakyReluOpConverter<TargetT>::operator()(
...
@@ -58,10 +58,35 @@ void LeakyReluOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
NV
>
);
using
relu_nv_fp32
=
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
LeakyReluOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
Precision
::
FP32
>
;
using
leaky_nv_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_nv_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
using
relu_cpu_fp32
=
LeakyReluOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_cpu_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_int8
);
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ReluOpConverter
()
=
default
;
ReluOpConverter
()
=
default
;
...
@@ -34,8 +34,8 @@ class ReluOpConverter : public AnakinOpConverter<TargetT> {
...
@@ -34,8 +34,8 @@ class ReluOpConverter : public AnakinOpConverter<TargetT> {
virtual
~
ReluOpConverter
()
{}
virtual
~
ReluOpConverter
()
{}
};
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
LeakyReluOpConverter
()
=
default
;
LeakyReluOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
e14ab180
...
@@ -21,8 +21,8 @@ namespace paddle {
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReshapeOpConverter
<
TargetT
>::
operator
()(
void
ReshapeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -47,9 +47,21 @@ void ReshapeOpConverter<TargetT>::operator()(
...
@@ -47,9 +47,21 @@ void ReshapeOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
using
reshape_nv_fp32
=
ReshapeOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
using
reshape_cpu_fp32
=
ReshapeOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_int8
);
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ReshapeOpConverter
()
=
default
;
ReshapeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
e14ab180
...
@@ -16,17 +16,12 @@
...
@@ -16,17 +16,12 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
RoiAlignOpConverter
<
TargetT
>::
operator
()(
void
RoiAlignOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -57,8 +52,21 @@ void RoiAlignOpConverter<TargetT>::operator()(
...
@@ -57,8 +52,21 @@ void RoiAlignOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
using
roi_align_nv_fp32
=
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_nv_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
>
);
using
roi_align_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_cpu_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_int8
);
paddle/fluid/inference/anakin/convert/roi_align.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
RoiAlignOpConverter
()
=
default
;
RoiAlignOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ScaleOpConverter
<
TargetT
>::
operator
()(
void
ScaleOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -49,4 +49,22 @@ void ScaleOpConverter<TargetT>::operator()(
...
@@ -49,4 +49,22 @@ void ScaleOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
<::
anakin
::
saber
::
NV
>
);
#ifdef PADDLE_WITH_CUDA
using
scale_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_nv_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_int8
);
#endif
using
scale_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_int8
);
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
ScaleOpConverter
()
=
default
;
ScaleOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
e14ab180
...
@@ -18,8 +18,8 @@ namespace paddle {
...
@@ -18,8 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SoftMaxOpConverter
<
TargetT
>::
operator
()(
void
SoftMaxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -45,9 +45,22 @@ void SoftMaxOpConverter<TargetT>::operator()(
...
@@ -45,9 +45,22 @@ void SoftMaxOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
using
sm_nv_fp32
=
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
>
);
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_nv_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
using
sm_cpu_fp32
=
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
>
);
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_int8
);
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
SoftMaxOpConverter
()
=
default
;
SoftMaxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
e14ab180
...
@@ -22,8 +22,8 @@ namespace paddle {
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SplitOpConverter
<
TargetT
>::
operator
()(
void
SplitOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -56,7 +56,22 @@ void SplitOpConverter<TargetT>::operator()(
...
@@ -56,7 +56,22 @@ void SplitOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
NV
>
);
using
split_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
split_nv_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
split_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
X86
>
);
using
split_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
split_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_int8
);
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
SplitOpConverter
()
=
default
;
SplitOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
e14ab180
...
@@ -23,11 +23,10 @@ namespace paddle {
...
@@ -23,11 +23,10 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SumOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
void
SumOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -49,6 +48,21 @@ void SumOpConverter<TargetT>::operator()(const framework::proto::OpDesc &op,
...
@@ -49,6 +48,21 @@ void SumOpConverter<TargetT>::operator()(const framework::proto::OpDesc &op,
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sum_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_nv_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sum_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_int8
);
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
SumOpConverter
()
=
default
;
SumOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
e14ab180
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -57,6 +57,7 @@ TEST(tanh_op, gpu) {
...
@@ -57,6 +57,7 @@ TEST(tanh_op, gpu) {
}
}
#endif
#endif
/*
TEST(sigm_op, cpu) {
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
platform::CPUDeviceContext ctx(cpu_place);
...
@@ -68,6 +69,7 @@ TEST(tanh_op, cpu) {
...
@@ -68,6 +69,7 @@ TEST(tanh_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}
}
*/
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
浏览文件 @
e14ab180
...
@@ -28,8 +28,8 @@ void test_affine_channel_op(const platform::DeviceContext& context,
...
@@ -28,8 +28,8 @@ void test_affine_channel_op(const platform::DeviceContext& context,
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
...
...
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) {
...
@@ -25,8 +25,8 @@ void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) {
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
"batch_norm_variance"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
...
...
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ template <typename TargetT>
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
...
...
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ template <typename TargetT>
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
...
...
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ template <typename TargetT>
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
e14ab180
...
@@ -27,8 +27,8 @@ static void test_elementwise_op(const std::string& op_type,
...
@@ -27,8 +27,8 @@ static void test_elementwise_op(const std::string& op_type,
bool
use_gpu
)
{
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ void test_mul_op(const platform::DeviceContext& context, bool use_gpu) {
...
@@ -25,8 +25,8 @@ void test_mul_op(const platform::DeviceContext& context, bool use_gpu) {
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
浏览文件 @
e14ab180
...
@@ -24,8 +24,8 @@ template <typename TargetT>
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
...
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
浏览文件 @
e14ab180
...
@@ -25,8 +25,8 @@ void test_pool2d(const platform::DeviceContext& context, bool use_gpu,
...
@@ -25,8 +25,8 @@ void test_pool2d(const platform::DeviceContext& context, bool use_gpu,
std
::
string
pool_type
=
"max"
)
{
std
::
string
pool_type
=
"max"
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
// So, the ITensor's Dims of input and output should be C * H * W.
...
...
paddle/fluid/inference/anakin/convert/test_relu_op.cc
浏览文件 @
e14ab180
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -60,20 +60,6 @@ TEST(leaky_relu_op, gpu) {
...
@@ -60,20 +60,6 @@ TEST(leaky_relu_op, gpu) {
}
}
#endif
#endif
/* seems bug here
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu", ctx, false);
}
TEST(leaky_relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false);
}
*/
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
浏览文件 @
e14ab180
...
@@ -24,8 +24,8 @@ template <typename TargetT>
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
...
@@ -49,8 +49,8 @@ template <typename TargetT>
...
@@ -49,8 +49,8 @@ template <typename TargetT>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
浏览文件 @
e14ab180
...
@@ -24,8 +24,8 @@ template <typename TargetT>
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_split_op.cc
浏览文件 @
e14ab180
...
@@ -27,8 +27,8 @@ void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu,
...
@@ -27,8 +27,8 @@ void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu,
const
std
::
vector
<
int
>
&
sections
)
{
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
std
::
vector
<
std
::
string
>
output_vars
;
...
...
paddle/fluid/inference/anakin/convert/test_sum_op.cc
浏览文件 @
e14ab180
...
@@ -26,8 +26,8 @@ template <typename TargetT>
...
@@ -26,8 +26,8 @@ template <typename TargetT>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
浏览文件 @
e14ab180
...
@@ -24,8 +24,8 @@ template <typename TargetT>
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
...
@@ -47,8 +47,8 @@ template <typename TargetT>
...
@@ -47,8 +47,8 @@ template <typename TargetT>
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
use_gpu
);
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
...
...
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
e14ab180
...
@@ -23,8 +23,8 @@ namespace paddle {
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
TransposeOpConverter
<
TargetT
>::
operator
()(
void
TransposeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -50,9 +50,17 @@ void TransposeOpConverter<TargetT>::operator()(
...
@@ -50,9 +50,17 @@ void TransposeOpConverter<TargetT>::operator()(
}
// namespace paddle
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
using
transpose_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
TransposeOpConverter
<::
anakin
::
saber
::
NV
>
);
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_nv_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_int8
);
#endif
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
using
transpose_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
TransposeOpConverter
<::
anakin
::
saber
::
X86
>
);
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_int8
);
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
e14ab180
...
@@ -20,8 +20,8 @@ namespace paddle {
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
public:
TransposeOpConverter
()
=
default
;
TransposeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
e14ab180
...
@@ -61,7 +61,7 @@ void RandomizeTensor(framework::LoDTensor* tensor,
...
@@ -61,7 +61,7 @@ void RandomizeTensor(framework::LoDTensor* tensor,
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
temp_data
+
i
)
=
random
(
-
128.
,
128
.
);
*
(
temp_data
+
i
)
=
random
(
0.
,
1
.
);
}
}
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
...
@@ -72,9 +72,9 @@ void RandomizeTensor(framework::LoDTensor* tensor,
...
@@ -72,9 +72,9 @@ void RandomizeTensor(framework::LoDTensor* tensor,
* anakin
* anakin
* layer.
* layer.
*/
*/
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinConvertValidation
{
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
T
>
;
public:
public:
AnakinConvertValidation
()
=
delete
;
AnakinConvertValidation
()
=
delete
;
...
@@ -84,7 +84,7 @@ class AnakinConvertValidation {
...
@@ -84,7 +84,7 @@ class AnakinConvertValidation {
const
platform
::
DeviceContext
&
ctx
,
const
platform
::
DeviceContext
&
ctx
,
bool
use_gpu
=
true
)
bool
use_gpu
=
true
)
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
true
));
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
T
>
(
true
));
}
}
// Declare a Variable as input with random initialization.
// Declare a Variable as input with random initialization.
...
@@ -127,7 +127,7 @@ class AnakinConvertValidation {
...
@@ -127,7 +127,7 @@ class AnakinConvertValidation {
// should init anakin engine here.
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
<
TargetT
>>::
Global
().
ConvertOp
(
Singleton
<
AnakinOpConverter
<
TargetT
,
PrecisionT
>>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
true
/*test_mode*/
);
engine_
->
Freeze
();
engine_
->
Freeze
();
...
@@ -213,8 +213,15 @@ class AnakinConvertValidation {
...
@@ -213,8 +213,15 @@ class AnakinConvertValidation {
bool
use_gpu_
{
true
};
bool
use_gpu_
{
true
};
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
>;
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
e14ab180
...
@@ -172,11 +172,20 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
...
@@ -172,11 +172,20 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#endif
#endif
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
e14ab180
...
@@ -93,6 +93,12 @@ class AnakinEngine {
...
@@ -93,6 +93,12 @@ class AnakinEngine {
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
int
GetDevice
()
{
return
device_
;
}
void
AddTensorScale
(
const
std
::
string
&
tensor_name
,
float
scale
)
{
tensor_scales_
[
tensor_name
]
=
scale
;
}
std
::
unordered_map
<
std
::
string
,
float
>
GetTensorScales
()
{
return
tensor_scales_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -112,11 +118,12 @@ class AnakinEngine {
...
@@ -112,11 +118,12 @@ class AnakinEngine {
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
std
::
unique_ptr
<
NetT
>
net_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
unordered_map
<
std
::
string
,
float
>
tensor_scales_
;
};
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
>
class
AnakinEngineManager
{
class
AnakinEngineManager
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
Type
>
;
public:
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
...
@@ -132,7 +139,7 @@ class AnakinEngineManager {
...
@@ -132,7 +139,7 @@ class AnakinEngineManager {
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
Type
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
engines_
[
engine_name
].
reset
(
p
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
return
p
;
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
e14ab180
...
@@ -169,7 +169,13 @@ struct Argument {
...
@@ -169,7 +169,13 @@ struct Argument {
anakin_max_shape_t
);
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_precision_mode
,
AnakinPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_passes_filter
,
AnakinPassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
anakin_ops_filter
,
AnakinOpsFilter
,
std
::
vector
<
std
::
string
>
);
// Memory optimized related.
// Memory optimized related.
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
e14ab180
...
@@ -123,6 +123,11 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -123,6 +123,11 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
bool
enable_int8
=
argument
->
anakin_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"anakin_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
anakin_ops_filter
()));
}
}
pre_pass
=
pass_name
;
pre_pass
=
pass_name
;
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
e14ab180
...
@@ -39,8 +39,14 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
...
@@ -39,8 +39,14 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
);
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
);
auto
teller
=
[](
const
framework
::
ir
::
Node
*
node
)
{
auto
&
anakin_ops_filter
=
Get
<
std
::
vector
<
std
::
string
>>
(
"anakin_ops_filter"
);
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
auto
teller
=
[
&
anakin_ops_filter
](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
else
if
(
std
::
find
(
anakin_ops_filter
.
begin
(),
anakin_ops_filter
.
end
(),
node
->
Op
()
->
Type
())
!=
anakin_ops_filter
.
end
())
return
false
;
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
};
...
@@ -191,47 +197,71 @@ void AnakinSubgraphPass::CreateAnakinOp(
...
@@ -191,47 +197,71 @@ void AnakinSubgraphPass::CreateAnakinOp(
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
auto
max_input_shape
=
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
if
(
enable_int8
)
{
CreateAnakinEngine
<::
anakin
::
Precision
::
INT8
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
else
{
CreateAnakinEngine
<::
anakin
::
Precision
::
FP32
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
AnakinSubgraphPass
::
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
{
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
->
Proto
());
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
if
(
use_gpu
)
{
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
max_input_shape
,
program_inputs
,
engine_key
);
#endif
#endif
}
else
{
}
else
{
inference
::
Singleton
<
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
max_input_shape
,
program_inputs
,
engine_key
);
}
}
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
if
(
use_gpu
)
{
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
anakin_engine
=
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
param_set
,
output_mapping
,
anakin_engine
);
#endif
}
else
{
}
else
{
auto
*
anakin_engine
=
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
浏览文件 @
e14ab180
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <memory>
#include <set>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
...
@@ -36,6 +37,13 @@ class AnakinSubgraphPass : public framework::ir::FusePassBase {
...
@@ -36,6 +37,13 @@ class AnakinSubgraphPass : public framework::ir::FusePassBase {
const
std
::
vector
<
std
::
string
>
&
graph_params
,
const
std
::
vector
<
std
::
string
>
&
graph_params
,
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
;
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
;
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
template
<::
anakin
::
Precision
PrecisionT
>
void
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
e14ab180
...
@@ -116,6 +116,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -116,6 +116,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_precision_mode_
);
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
// Ir related.
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
CP_MEMBER
(
enable_ir_optim_
);
...
@@ -276,7 +279,10 @@ void AnalysisConfig::Update() {
...
@@ -276,7 +279,10 @@ void AnalysisConfig::Update() {
pass_builder
()
->
ClearPasses
();
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
pass_builder
()
->
AppendPass
(
pass
);
if
(
std
::
find
(
anakin_passes_filter_
.
begin
(),
anakin_passes_filter_
.
end
(),
pass
)
==
anakin_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
}
}
}
}
...
@@ -391,11 +397,16 @@ void AnalysisConfig::SwitchIrDebug(int x) {
...
@@ -391,11 +397,16 @@ void AnalysisConfig::SwitchIrDebug(int x) {
}
}
void
AnalysisConfig
::
EnableAnakinEngine
(
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_size
)
{
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
std
::
vector
<
std
::
string
>
passes_filter
,
std
::
vector
<
std
::
string
>
ops_filter
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
anakin_passes_filter_
=
passes_filter
;
anakin_ops_filter_
=
ops_filter
;
use_anakin_
=
true
;
use_anakin_
=
true
;
anakin_precision_mode_
=
precision_mode
;
Update
();
Update
();
}
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e14ab180
...
@@ -386,6 +386,9 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -386,6 +386,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinPrecisionMode
(
config_
.
anakin_precision_mode_
);
argument_
.
SetAnakinPassesFilter
(
config_
.
anakin_passes_filter_
);
argument_
.
SetAnakinOpsFilter
(
config_
.
anakin_ops_filter_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e14ab180
...
@@ -152,7 +152,9 @@ struct AnalysisConfig {
...
@@ -152,7 +152,9 @@ struct AnalysisConfig {
void
EnableAnakinEngine
(
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
);
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
*/
...
@@ -291,6 +293,9 @@ struct AnalysisConfig {
...
@@ -291,6 +293,9 @@ struct AnalysisConfig {
int
anakin_max_batchsize_
;
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
Precision
anakin_precision_mode_
;
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
bool
use_mkldnn_quantizer_
{
false
};
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
e14ab180
...
@@ -73,15 +73,21 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
...
@@ -73,15 +73,21 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine.
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"graph_viz_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
// "conv_bn_fuse_pass", //
"conv_elementwise_add_fuse_pass"
,
//
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"anakin_subgraph_pass"
,
"anakin_subgraph_pass"
,
//
"graph_viz_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
});
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
e14ab180
...
@@ -44,6 +44,7 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -44,6 +44,7 @@ class AnakinEngineOp : public framework::OperatorBase {
std
::
string
engine_key_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
bool
use_gpu_
;
bool
enable_int8_
;
public:
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
AnakinEngineOp
(
const
std
::
string
&
type
,
...
@@ -55,6 +56,7 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -55,6 +56,7 @@ class AnakinEngineOp : public framework::OperatorBase {
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
enable_int8_
=
Attr
<
bool
>
(
"enable_int8"
);
for
(
const
auto
&
param
:
params
)
{
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
param_names_
.
insert
(
param
);
}
}
...
@@ -68,11 +70,6 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -68,11 +70,6 @@ class AnakinEngineOp : public framework::OperatorBase {
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
const
platform
::
Place
&
dev_place
)
const
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
).
stream
();
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
std
::
vector
<
std
::
string
>
output_maps
=
std
::
vector
<
std
::
string
>
output_maps
=
...
@@ -96,18 +93,35 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -96,18 +93,35 @@ class AnakinEngineOp : public framework::OperatorBase {
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
output_index
+=
1
;
}
}
if
(
enable_int8_
)
{
Execute
<::
anakin
::
Precision
::
INT8
>
(
inputs
,
outputs
,
dev_place
);
}
else
{
Execute
<::
anakin
::
Precision
::
FP32
>
(
inputs
,
outputs
,
dev_place
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
)
.
stream
();
auto
*
engine
=
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
#endif
#endif
}
else
{
}
else
{
auto
*
engine
=
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
engine
->
Execute
(
inputs
,
outputs
);
}
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e14ab180
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <pybind11/stl.h>
#include <pybind11/stl.h>
#include <cstring>
#include <cstring>
#include <iostream>
#include <iostream>
#include <map>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
...
@@ -230,8 +231,13 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -230,8 +231,13 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
true
)
py
::
arg
(
"use_static"
)
=
true
)
.
def
(
"enable_anakin_engine"
,
&
AnalysisConfig
::
EnableAnakinEngine
,
.
def
(
"enable_anakin_engine"
,
&
AnalysisConfig
::
EnableAnakinEngine
,
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
{},
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"min_subgraph_size"
)
=
6
)
py
::
arg
(
"max_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
py
::
arg
(
"min_subgraph_size"
)
=
6
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
py
::
arg
(
"x"
)
=
true
)
py
::
arg
(
"x"
)
=
true
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录