Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e14ab180
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e14ab180
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick from 1662, 16797.. : add anakin int8 support
上级
7ad182e1
变更
81
隐藏空白更改
内联
并排
Showing
81 changed file
with
1103 addition
and
589 deletion
+1103
-589
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+2
-1
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+12
-13
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-1
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+19
-9
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+6
-1
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+39
-10
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+9
-8
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+24
-55
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+2
-2
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+35
-71
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+2
-2
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+19
-6
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+55
-24
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+59
-52
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+2
-2
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+21
-10
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+3
-2
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+19
-6
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+2
-2
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+21
-16
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+2
-2
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+31
-15
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+6
-4
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+83
-57
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+6
-6
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+19
-6
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+2
-2
paddle/fluid/inference/anakin/convert/helper.cc
paddle/fluid/inference/anakin/convert/helper.cc
+32
-0
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+88
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+17
-4
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+2
-2
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+57
-24
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+19
-6
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+2
-2
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+35
-10
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+4
-4
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+18
-6
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+2
-2
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+19
-11
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+2
-2
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+21
-3
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+2
-2
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+19
-6
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+2
-2
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+19
-4
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+2
-2
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+21
-7
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+2
-2
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+4
-2
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+2
-16
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+14
-6
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+2
-2
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+14
-7
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+11
-2
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+10
-3
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+6
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+5
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+42
-12
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
...fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+8
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+13
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+6
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+11
-5
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+21
-7
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+8
-2
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
e14ab180
...
...
@@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
auto
base_op_desc
=
*
mul
->
Op
()
->
Proto
();
// Create an FC Node.
OpDesc
desc
;
OpDesc
desc
(
base_op_desc
,
nullptr
)
;
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
e14ab180
...
...
@@ -1640,7 +1640,8 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_type
,
const
std
::
string
&
weight_name
,
int
times
)
{
int
times
,
const
std
::
string
&
quant_type
)
{
const
int
kNumFields
=
5
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedOpOffset
=
1
;
...
...
@@ -1648,24 +1649,22 @@ void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOutOffset
=
4
;
// the quant op always be one.
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"InScale"
)
->
AsInput
();
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
"fake_quantize_range_abs_max"
);
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
->
assert_is_op_input
(
quant_type
,
"InScale"
)
->
AsInput
();
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
quant_type
);
auto
quant_op_out_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"OutScale"
)
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"Scale"
)
->
AsIntermediate
();
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"Out"
)
->
assert_is_op_input
(
op_type
)
->
AsIntermediate
();
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
quant_type
,
"Out"
)
->
assert_is_op_input
(
op_type
)
->
AsIntermediate
();
// there are 'times' quantized and dequant op
std
::
vector
<
PDNode
*>
nodes
;
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
e14ab180
...
...
@@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_name
,
const
std
::
string
&
weight_name
,
int
times
=
1
);
const
std
::
string
&
weight_name
,
int
times
,
const
std
::
string
&
quant_type
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
...
...
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
e14ab180
...
...
@@ -25,7 +25,8 @@ namespace framework {
namespace
ir
{
void
RunQuantDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
int
times
,
std
::
string
op_type
)
{
const
std
::
string
&
op_type
,
const
std
::
string
&
quant_type
)
{
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
// FusePassBase::Init(pattern_name, graph);
const
int
kNumFields
=
5
;
...
...
@@ -38,7 +39,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"X"
)
->
assert_is_op_input
(
quant_type
,
"X"
)
->
AsInput
();
std
::
string
quantized_op_type
=
""
;
...
...
@@ -46,6 +47,9 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
if
(
op_type
==
"conv2d"
)
{
quantized_op_type
=
"conv2d"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"depthwise_conv2d"
)
{
quantized_op_type
=
"depthwise_conv2d"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"conv2d_fusion"
)
{
quantized_op_type
=
"conv2d_fusion"
;
weight_name
=
"Filter"
;
...
...
@@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
}
patterns
::
QuantDequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
,
quant_type
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
...
...
@@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
// max_range = (range * range) / weight_scale
float
max_range
=
boost
::
get
<
float
>
(
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
GetAttr
(
"max_range"
));
float
weight_scale
=
(
range
*
range
)
/
max_range
;
...
...
@@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
new_op_desc
.
SetType
(
quantized_op_type
);
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
)
{
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"fc"
)
{
...
...
@@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
};
std
::
unordered_set
<
std
::
string
>
quant_types
=
{
"fake_quantize_range_abs_max"
,
"fake_quantize_moving_average_abs_max"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d"
};
auto
*
scope
=
param_scope
();
for
(
auto
&
op_type
:
quantized_op_types
)
{
for
(
int
i
=
1
;
i
<=
6
;
i
++
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
);
for
(
auto
&
quant_type
:
quant_types
)
{
for
(
auto
&
op_type
:
quantized_op_types
)
{
for
(
int
i
=
6
;
i
>=
1
;
i
--
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
,
quant_type
);
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
e14ab180
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc helper.cc DEPS anakin_engine framework_proto scope op_registry
gtest
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
...
...
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
ActivationOpConverter
<
TargetT
>::
ActivationOpConverter
(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
...
...
@@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
anakin_op_type_
=
it
->
second
;
}
template
<
typename
TargetT
>
void
ActivationOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sigmoid_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_nv_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_nv_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sigmoid_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_int8
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
...
...
@@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
{
"sigmoid"
,
"Sigmoid"
}};
};
template
<
typename
TargetT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"tanh"
)
{}
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"tanh"
)
{}
};
template
<
typename
TargetT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"sigmoid"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"sigmoid"
)
{}
};
}
// namespace anakin
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,14 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
AffineChannelOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
AffineChannelOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -35,60 +31,20 @@ void AffineChannelOpConverter<TargetT>::operator()(
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
new
framework
::
LoDTensor
());
scale_tensor
->
Resize
(
scale_t
->
dims
());
TensorCopySync
((
*
scale_t
),
platform
::
CPUPlace
(),
scale_tensor
.
get
());
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
new
framework
::
LoDTensor
());
bias_tensor
->
Resize
(
bias_t
->
dims
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Generate the Scale parameter of Anakin.
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
while
(
scale_shape
.
size
()
<
4
)
{
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
}
Shape
anakin_scale_shape
(
scale_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_scale_shape
);
float
*
scale_cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
scale_cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Generate the Bias parameter of Anakin.
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
while
(
bias_shape
.
size
()
<
4
)
{
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
bias_cpu_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
...
...
@@ -97,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
NV
>
);
using
affine_channel_nv_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_nv_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
X86
>
);
using
affine_channel_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_cpu_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_int8
);
paddle/fluid/inference/anakin/convert/affine_channel.h
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
AffineChannelOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
e14ab180
...
...
@@ -18,17 +18,14 @@
#include <map>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
BatchNormOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
BatchNormOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -36,87 +33,46 @@ void BatchNormOpConverter<TargetT>::operator()(
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
for
(
auto
k
:
{
"X"
,
"Scale"
,
"Bias"
,
"Mean"
,
"Variance"
})
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
k
).
size
(),
1UL
);
auto
v
=
op_desc
.
Input
(
k
).
front
();
inputs
.
insert
({
k
,
v
});
}
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Y"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Y"
).
front
();
auto
epsilon
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"epsilon"
));
// auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
s
[
"X"
]
},
{
bn_output
});
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
},
{
bn_output
});
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
framework
::
LoDTensor
*
tensor
)
{
auto
*
v
=
scope
.
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
v
);
auto
*
t
=
v
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
t
->
dims
());
TensorCopySync
(
*
t
,
platform
::
CPUPlace
(),
tensor
);
};
framework
::
LoDTensor
bias_t
;
framework
::
LoDTensor
mean_t
;
framework
::
LoDTensor
scale_t
;
framework
::
LoDTensor
variance_t
;
get_lod_tensor
(
inputs
[
"Bias"
],
&
bias_t
);
get_lod_tensor
(
inputs
[
"Mean"
],
&
mean_t
);
get_lod_tensor
(
inputs
[
"Scale"
],
&
scale_t
);
get_lod_tensor
(
inputs
[
"Variance"
],
&
variance_t
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
fill_shape
=
[](
size_t
n
,
std
::
vector
<
int
>
shape
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
if
(
shape
.
size
()
<
n
)
{
shape
.
insert
(
shape
.
end
(),
n
-
shape
.
size
(),
1
);
}
return
shape
;
};
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
mean_v
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
auto
*
variance_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
variance_v
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight3
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
float
weight3_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
auto
*
weight3
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
1
}));
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
auto
*
scale
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
scale_shape
);
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
auto
*
bias
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
...
...
@@ -125,9 +81,17 @@ void BatchNormOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
NV
>
);
using
bn_nv_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_nv_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
X86
>
);
using
bn_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_cpu_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_int8
);
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
BatchNormOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
e14ab180
...
...
@@ -19,8 +19,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ConcatOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ConcatOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -39,8 +39,21 @@ void ConcatOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
NV
>
);
using
concat_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_nv_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
X86
>
);
using
concat_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_int8
);
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ConcatOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Conv2dOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -42,11 +40,8 @@ void Conv2dOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
...
...
@@ -69,25 +64,61 @@ void Conv2dOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
X86
>
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
NV
>
);
using
conv2d_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_int8
);
#endif
using
conv2d_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Conv2dFusionOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dFusionOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -43,24 +41,16 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
...
...
@@ -77,37 +67,42 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
...
...
@@ -115,9 +110,21 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
NV
>
);
using
conv2d_fusion_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
X86
>
);
using
conv2d_fusion_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dFusionOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DensityPriorBoxOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DensityPriorBoxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -109,13 +109,24 @@ void DensityPriorBoxOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
using
ds_pr_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_nv_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
using
ds_pr_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_int8
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DensityPriorBoxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DetectionOutOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DetectionOutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -67,8 +67,21 @@ void DetectionOutOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
NV
>
);
using
detection_out_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_nv_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
detection_out_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_int8
);
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DetectionOutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
e14ab180
...
...
@@ -16,17 +16,14 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DropoutOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DropoutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -42,12 +39,7 @@ void DropoutOpConverter<TargetT>::operator()(
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
factor
}));
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
...
...
@@ -60,8 +52,21 @@ void DropoutOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
NV
>
);
using
dropout_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_nv_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
dropout_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_int8
);
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DropoutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
e14ab180
...
...
@@ -17,17 +17,14 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ElementwiseAddOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseAddOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -48,8 +45,8 @@ void ElementwiseAddOpConverter<TargetT>::operator()(
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
template
<
typename
TargetT
>
void
ElementwiseMulOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseMulOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -75,12 +72,31 @@ void ElementwiseMulOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
NV
>
);
using
elet_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
X86
>
);
using
elet_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_int8
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseAddOpConverter
()
=
default
;
...
...
@@ -34,8 +35,9 @@ class ElementwiseAddOpConverter : public AnakinOpConverter<TargetT> {
private:
};
template
<
typename
TargetT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseMulOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
e14ab180
...
...
@@ -16,22 +16,19 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
FcBaseOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FcBaseOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
=
=
3
;
bool
with_bias
=
input_names
.
size
()
>
=
3
;
std
::
string
w_name
=
"Y"
;
std
::
string
i_name
=
"X"
;
...
...
@@ -45,7 +42,12 @@ void FcBaseOpConverter<TargetT>::operator()(
// get weights
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
y_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
int
out_dim
=
weight_shape
[
1
];
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
...
...
@@ -53,64 +55,58 @@ void FcBaseOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
Shape
anakin_shape
(
weight_shape
);
framework
::
LoDTensor
weight_tensor
;
weight_tensor
.
Resize
(
y_t
->
dims
());
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
auto
*
weight_data
=
weight_tensor
.
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
.
numel
());
auto
*
weight_data
=
weight_tensor
->
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
->
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
.
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
->
numel
());
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
}
}
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
int
weight_num
=
weight_tensor
->
numel
();
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
((
trans_weight_data
[
i
]
>=
-
128
)
&&
(
trans_weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of fc "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
trans_weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
trans_weight_data
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
// get bias
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
@@ -120,9 +116,39 @@ void FcBaseOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
NV
>
);
using
mul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_nv_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_nv_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
X86
>
);
using
mul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_int8
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcBaseOpConverter
()
=
default
;
...
...
@@ -33,15 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter<TargetT> {
};
// with bias
template
<
typename
TargetT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcOpConverter
()
=
default
;
};
// without bias
template
<
typename
TargetT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
MulOpConverter
()
=
default
;
};
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
FlattenOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FlattenOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -46,8 +46,21 @@ void FlattenOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
NV
>
);
using
flatten_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_nv_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
X86
>
);
using
flatten_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_int8
);
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FlattenOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/helper.cc
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
)
{
auto
&
src
=
var
.
Get
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
dst
(
new
framework
::
LoDTensor
());
dst
->
Resize
(
src
.
dims
());
TensorCopySync
((
src
),
place
,
dst
.
get
());
return
dst
;
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/helper.h
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
PBlock
;
using
anakin
::
graph
::
GraphGlobalMem
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape
)
{
while
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
}
Shape
anakin_shape
(
shape
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
anakin_shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
)
{
int
size
=
vec
.
size
();
return
pblock_from_vector
<
T
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}));
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_var
(
const
framework
::
Variable
&
var
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
return
pblock_from_tensor
<
T
>
(
*
tensor
,
shape
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Im2SequenceConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Im2SequenceConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -55,5 +55,18 @@ void Im2SequenceConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
<::
anakin
::
saber
::
NV
>
);
#ifdef PADDLE_WITH_CUDA
using
im2sequence_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_nv_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_int8
);
#endif
using
im2sequence_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_int8
);
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Im2SequenceConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
e14ab180
...
...
@@ -32,9 +32,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
PrecisionT
>
;
public:
AnakinOpConverter
()
=
default
;
...
...
@@ -96,6 +96,13 @@ class AnakinOpConverter {
engine
->
Graph
()
->
RegistVar
(
output
);
}
engine
->
Freeze
();
// Add scale for tensor in int8 mode.
auto
tensor_scales
=
engine
->
GetTensorScales
();
for
(
auto
&
item
:
tensor_scales
)
{
engine
->
Graph
()
->
SetVarScale
(
item
.
first
,
item
.
second
);
}
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
std
::
vector
<
int
>
input_shape
;
...
...
@@ -136,52 +143,78 @@ class AnakinOpConverter {
AnakinEngineT
*
engine_
{
nullptr
};
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
>
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
,
PrecisionT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__) \
struct anakin_##op_type__##_##place_type__##_converter \
place_type__, place_class__, \
precision_type__, precision_class__) \
struct anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_##place_type__##_
converter() {
\
anakin_##op_type__##_##place_type__##_
##precision_type__##_converter() {
\
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter<
place_class__>>::
\
Global()
\
.Register<::paddle::inference::anakin::Converter__>(#op_type__);
\
::paddle::inference::anakin::AnakinOpConverter<
\
place_class__, precision_class__>>::Global()
\
.Register<Converter__>(#op_type__);
\
} \
}; \
anakin_##op_type__##_##place_type__##_converter \
anakin_##op_type__##_##place_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \
anakin_##op_type__##_##place_type__##_converter__.Touch(); \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__; \
int Touch_anakin_##op_type__##_##place_type__##_##precision_type__() { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__ \
.Touch(); \
return 0; \
}
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV)
::anakin::saber::NV, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV, INT8, \
::anakin::Precision::INT8)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86)
::anakin::saber::X86, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86, INT8, \
::anakin::Precision::INT8)
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
)
\
extern int Touch
ConverterRegister_anakin_##op_type__##_##place
_type__(); \
int use_
op_converter_anakin_##op_type__##_##place_type__
\
__attribute__((unused)) = \
Touch
ConverterRegister_anakin_##op_type__##_##place
_type__();
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
, precision_type__)
\
extern int Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__(); \
int use_
converter_anakin_##op_type__##_##place_type__##_##precision_type__
\
__attribute__((unused)) =
\
Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__();
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA)
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU)
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Pool2dOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Pool2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -72,8 +72,21 @@ void Pool2dOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
NV
>
);
using
pool2d_nv_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_float32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
X86
>
);
using
pool2d_cpu_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_float32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Pool2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ReluOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -36,8 +36,8 @@ void ReluOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
template
<
typename
TargetT
>
void
LeakyReluOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
LeakyReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -58,10 +58,35 @@ void LeakyReluOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
NV
>
);
using
relu_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_nv_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_nv_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
X86
>
);
using
relu_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_cpu_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_int8
);
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReluOpConverter
()
=
default
;
...
...
@@ -34,8 +34,8 @@ class ReluOpConverter : public AnakinOpConverter<TargetT> {
virtual
~
ReluOpConverter
()
{}
};
template
<
typename
TargetT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
LeakyReluOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ReshapeOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReshapeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -47,9 +47,21 @@ void ReshapeOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
NV
>
);
using
reshape_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
X86
>
);
using
reshape_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_int8
);
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReshapeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
e14ab180
...
...
@@ -16,17 +16,12 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
RoiAlignOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
RoiAlignOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -57,8 +52,21 @@ void RoiAlignOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
>
);
using
roi_align_nv_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_nv_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
>
);
using
roi_align_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_cpu_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_int8
);
paddle/fluid/inference/anakin/convert/roi_align.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
RoiAlignOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ScaleOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ScaleOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -49,4 +49,22 @@ void ScaleOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
<::
anakin
::
saber
::
NV
>
);
#ifdef PADDLE_WITH_CUDA
using
scale_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_nv_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_int8
);
#endif
using
scale_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_int8
);
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ScaleOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
e14ab180
...
...
@@ -18,8 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SoftMaxOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SoftMaxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -45,9 +45,22 @@ void SoftMaxOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sm_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_nv_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sm_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_int8
);
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SoftMaxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SplitOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SplitOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -56,7 +56,22 @@ void SplitOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
NV
>
);
using
split_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
split_nv_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
split_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
X86
>
);
using
split_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
split_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_int8
);
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SplitOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
e14ab180
...
...
@@ -23,11 +23,10 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SumOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SumOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -49,6 +48,21 @@ void SumOpConverter<TargetT>::operator()(const framework::proto::OpDesc &op,
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sum_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_nv_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sum_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_int8
);
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SumOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -57,6 +57,7 @@ TEST(tanh_op, gpu) {
}
#endif
/*
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
...
...
@@ -68,6 +69,7 @@ TEST(tanh_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}
*/
}
// namespace anakin
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
浏览文件 @
e14ab180
...
...
@@ -28,8 +28,8 @@ void test_affine_channel_op(const platform::DeviceContext& context,
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
...
...
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) {
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
...
...
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
...
...
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
...
...
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_elementwise_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_mul_op(const platform::DeviceContext& context, bool use_gpu) {
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_pool2d(const platform::DeviceContext& context, bool use_gpu,
std
::
string
pool_type
=
"max"
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
...
...
paddle/fluid/inference/anakin/convert/test_relu_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -60,20 +60,6 @@ TEST(leaky_relu_op, gpu) {
}
#endif
/* seems bug here
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu", ctx, false);
}
TEST(leaky_relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false);
}
*/
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
...
...
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
...
...
@@ -49,8 +49,8 @@ template <typename TargetT>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_split_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu,
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
...
...
paddle/fluid/inference/anakin/convert/test_sum_op.cc
浏览文件 @
e14ab180
...
...
@@ -26,8 +26,8 @@ template <typename TargetT>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
...
...
@@ -47,8 +47,8 @@ template <typename TargetT>
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
...
...
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
TransposeOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
TransposeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -50,9 +50,17 @@ void TransposeOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
NV
>
);
using
transpose_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_nv_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
X86
>
);
using
transpose_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_int8
);
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TransposeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
e14ab180
...
...
@@ -61,7 +61,7 @@ void RandomizeTensor(framework::LoDTensor* tensor,
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
temp_data
+
i
)
=
random
(
-
128.
,
128
.
);
*
(
temp_data
+
i
)
=
random
(
0.
,
1
.
);
}
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
...
...
@@ -72,9 +72,9 @@ void RandomizeTensor(framework::LoDTensor* tensor,
* anakin
* layer.
*/
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
T
>
;
public:
AnakinConvertValidation
()
=
delete
;
...
...
@@ -84,7 +84,7 @@ class AnakinConvertValidation {
const
platform
::
DeviceContext
&
ctx
,
bool
use_gpu
=
true
)
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
true
));
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
T
>
(
true
));
}
// Declare a Variable as input with random initialization.
...
...
@@ -127,7 +127,7 @@ class AnakinConvertValidation {
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
<
TargetT
>>::
Global
().
ConvertOp
(
Singleton
<
AnakinOpConverter
<
TargetT
,
PrecisionT
>>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
...
...
@@ -213,8 +213,15 @@ class AnakinConvertValidation {
bool
use_gpu_
{
true
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
e14ab180
...
...
@@ -172,11 +172,20 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#endif
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
e14ab180
...
...
@@ -93,6 +93,12 @@ class AnakinEngine {
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
void
AddTensorScale
(
const
std
::
string
&
tensor_name
,
float
scale
)
{
tensor_scales_
[
tensor_name
]
=
scale
;
}
std
::
unordered_map
<
std
::
string
,
float
>
GetTensorScales
()
{
return
tensor_scales_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
...
...
@@ -112,11 +118,12 @@ class AnakinEngine {
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
unordered_map
<
std
::
string
,
float
>
tensor_scales_
;
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
>
class
AnakinEngineManager
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
Type
>
;
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
...
...
@@ -132,7 +139,7 @@ class AnakinEngineManager {
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
Type
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
e14ab180
...
...
@@ -169,7 +169,13 @@ struct Argument {
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_precision_mode
,
AnakinPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_passes_filter
,
AnakinPassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
anakin_ops_filter
,
AnakinOpsFilter
,
std
::
vector
<
std
::
string
>
);
// Memory optimized related.
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
e14ab180
...
...
@@ -123,6 +123,11 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
bool
enable_int8
=
argument
->
anakin_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"anakin_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
anakin_ops_filter
()));
}
pre_pass
=
pass_name
;
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
e14ab180
...
...
@@ -39,8 +39,14 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
);
auto
teller
=
[](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
auto
&
anakin_ops_filter
=
Get
<
std
::
vector
<
std
::
string
>>
(
"anakin_ops_filter"
);
auto
teller
=
[
&
anakin_ops_filter
](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
else
if
(
std
::
find
(
anakin_ops_filter
.
begin
(),
anakin_ops_filter
.
end
(),
node
->
Op
()
->
Type
())
!=
anakin_ops_filter
.
end
())
return
false
;
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
...
...
@@ -191,47 +197,71 @@ void AnakinSubgraphPass::CreateAnakinOp(
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
if
(
enable_int8
)
{
CreateAnakinEngine
<::
anakin
::
Precision
::
INT8
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
else
{
CreateAnakinEngine
<::
anakin
::
Precision
::
FP32
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
AnakinSubgraphPass
::
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
{
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
->
Proto
());
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
#endif
}
else
{
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
}
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
NV
>>::
Global
()
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
#endif
}
else
{
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
X86
>>::
Global
()
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
浏览文件 @
e14ab180
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
...
...
@@ -36,6 +37,13 @@ class AnakinSubgraphPass : public framework::ir::FusePassBase {
const
std
::
vector
<
std
::
string
>
&
graph_params
,
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
;
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
template
<::
anakin
::
Precision
PrecisionT
>
void
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
;
};
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
e14ab180
...
...
@@ -116,6 +116,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_precision_mode_
);
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -276,7 +279,10 @@ void AnalysisConfig::Update() {
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
pass_builder
()
->
AppendPass
(
pass
);
if
(
std
::
find
(
anakin_passes_filter_
.
begin
(),
anakin_passes_filter_
.
end
(),
pass
)
==
anakin_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
}
}
...
...
@@ -391,11 +397,16 @@ void AnalysisConfig::SwitchIrDebug(int x) {
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_size
)
{
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
std
::
vector
<
std
::
string
>
passes_filter
,
std
::
vector
<
std
::
string
>
ops_filter
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
anakin_passes_filter_
=
passes_filter
;
anakin_ops_filter_
=
ops_filter
;
use_anakin_
=
true
;
anakin_precision_mode_
=
precision_mode
;
Update
();
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e14ab180
...
...
@@ -386,6 +386,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinPrecisionMode
(
config_
.
anakin_precision_mode_
);
argument_
.
SetAnakinPassesFilter
(
config_
.
anakin_passes_filter_
);
argument_
.
SetAnakinOpsFilter
(
config_
.
anakin_ops_filter_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e14ab180
...
...
@@ -152,7 +152,9 @@ struct AnalysisConfig {
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
);
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -291,6 +293,9 @@ struct AnalysisConfig {
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
Precision
anakin_precision_mode_
;
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
e14ab180
...
...
@@ -73,15 +73,21 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"graph_viz_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"anakin_subgraph_pass"
,
// "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
"anakin_subgraph_pass"
,
//
"graph_viz_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
e14ab180
...
...
@@ -44,6 +44,7 @@ class AnakinEngineOp : public framework::OperatorBase {
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
bool
enable_int8_
;
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -55,6 +56,7 @@ class AnakinEngineOp : public framework::OperatorBase {
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
enable_int8_
=
Attr
<
bool
>
(
"enable_int8"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
...
...
@@ -68,11 +70,6 @@ class AnakinEngineOp : public framework::OperatorBase {
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
).
stream
();
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
std
::
vector
<
std
::
string
>
output_maps
=
...
...
@@ -96,18 +93,35 @@ class AnakinEngineOp : public framework::OperatorBase {
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
if
(
enable_int8_
)
{
Execute
<::
anakin
::
Precision
::
INT8
>
(
inputs
,
outputs
,
dev_place
);
}
else
{
Execute
<::
anakin
::
Precision
::
FP32
>
(
inputs
,
outputs
,
dev_place
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
)
.
stream
();
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
#endif
}
else
{
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e14ab180
...
...
@@ -16,6 +16,7 @@
#include <pybind11/stl.h>
#include <cstring>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/analysis_predictor.h"
...
...
@@ -230,8 +231,13 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
true
)
.
def
(
"enable_anakin_engine"
,
&
AnalysisConfig
::
EnableAnakinEngine
,
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
{},
py
::
arg
(
"min_subgraph_size"
)
=
6
)
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
py
::
arg
(
"min_subgraph_size"
)
=
6
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
py
::
arg
(
"x"
)
=
true
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录