Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e14ab180
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e14ab180
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick from 1662, 16797.. : add anakin int8 support
上级
7ad182e1
变更
81
显示空白变更内容
内联
并排
Showing
81 changed file
with
1103 addition
and
589 deletion
+1103
-589
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+2
-1
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+12
-13
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-1
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+19
-9
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+6
-1
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+39
-10
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+9
-8
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+24
-55
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+2
-2
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+35
-71
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+2
-2
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+19
-6
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+55
-24
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+2
-2
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+59
-52
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+2
-2
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+21
-10
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+3
-2
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+19
-6
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+2
-2
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+21
-16
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+2
-2
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+31
-15
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+6
-4
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+83
-57
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+6
-6
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+19
-6
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+2
-2
paddle/fluid/inference/anakin/convert/helper.cc
paddle/fluid/inference/anakin/convert/helper.cc
+32
-0
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+88
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+17
-4
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+2
-2
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+57
-24
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+19
-6
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+2
-2
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+35
-10
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+4
-4
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+18
-6
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+2
-2
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+19
-11
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+2
-2
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+21
-3
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+2
-2
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+19
-6
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+2
-2
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+19
-4
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+2
-2
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+21
-7
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+2
-2
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+4
-2
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+2
-16
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+2
-2
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+14
-6
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+2
-2
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+14
-7
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+11
-2
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+10
-3
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+6
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+5
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+42
-12
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
...fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+8
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+13
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+6
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+11
-5
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+21
-7
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+8
-2
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
e14ab180
...
...
@@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
auto
base_op_desc
=
*
mul
->
Op
()
->
Proto
();
// Create an FC Node.
OpDesc
desc
;
OpDesc
desc
(
base_op_desc
,
nullptr
)
;
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
e14ab180
...
...
@@ -1640,7 +1640,8 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_type
,
const
std
::
string
&
weight_name
,
int
times
)
{
int
times
,
const
std
::
string
&
quant_type
)
{
const
int
kNumFields
=
5
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedOpOffset
=
1
;
...
...
@@ -1648,22 +1649,20 @@ void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOutOffset
=
4
;
// the quant op always be one.
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"InScale"
)
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
->
assert_is_op_input
(
quant_type
,
"InScale"
)
->
AsInput
();
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
"fake_quantize_range_abs_max"
);
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
quant_type
);
auto
quant_op_out_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"OutScale"
)
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"Scale"
)
->
AsIntermediate
();
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
"fake_quantize_range_abs_max"
,
"Out"
)
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
quant_type
,
"Out"
)
->
assert_is_op_input
(
op_type
)
->
AsIntermediate
();
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
e14ab180
...
...
@@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_name
,
const
std
::
string
&
weight_name
,
int
times
=
1
);
const
std
::
string
&
weight_name
,
int
times
,
const
std
::
string
&
quant_type
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
...
...
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
e14ab180
...
...
@@ -25,7 +25,8 @@ namespace framework {
namespace
ir
{
void
RunQuantDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
int
times
,
std
::
string
op_type
)
{
const
std
::
string
&
op_type
,
const
std
::
string
&
quant_type
)
{
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
// FusePassBase::Init(pattern_name, graph);
const
int
kNumFields
=
5
;
...
...
@@ -38,7 +39,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"fake_quantize_range_abs_max"
,
"X"
)
->
assert_is_op_input
(
quant_type
,
"X"
)
->
AsInput
();
std
::
string
quantized_op_type
=
""
;
...
...
@@ -46,6 +47,9 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
if
(
op_type
==
"conv2d"
)
{
quantized_op_type
=
"conv2d"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"depthwise_conv2d"
)
{
quantized_op_type
=
"depthwise_conv2d"
;
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"conv2d_fusion"
)
{
quantized_op_type
=
"conv2d_fusion"
;
weight_name
=
"Filter"
;
...
...
@@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
}
patterns
::
QuantDequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
,
quant_type
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
...
...
@@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
// max_range = (range * range) / weight_scale
float
max_range
=
boost
::
get
<
float
>
(
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
GetAttr
(
"max_range"
));
float
weight_scale
=
(
range
*
range
)
/
max_range
;
...
...
@@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
new_op_desc
.
SetType
(
quantized_op_type
);
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
)
{
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"fc"
)
{
...
...
@@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
};
std
::
unordered_set
<
std
::
string
>
quant_types
=
{
"fake_quantize_range_abs_max"
,
"fake_quantize_moving_average_abs_max"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d"
};
auto
*
scope
=
param_scope
();
for
(
auto
&
quant_type
:
quant_types
)
{
for
(
auto
&
op_type
:
quantized_op_types
)
{
for
(
int
i
=
1
;
i
<=
6
;
i
++
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
);
for
(
int
i
=
6
;
i
>=
1
;
i
--
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
,
quant_type
);
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
e14ab180
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc helper.cc DEPS anakin_engine framework_proto scope op_registry
gtest
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
...
...
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
ActivationOpConverter
<
TargetT
>::
ActivationOpConverter
(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
...
...
@@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
anakin_op_type_
=
it
->
second
;
}
template
<
typename
TargetT
>
void
ActivationOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sigmoid_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_nv_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_nv_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sigmoid_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_int8
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
...
...
@@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
{
"sigmoid"
,
"Sigmoid"
}};
};
template
<
typename
TargetT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"tanh"
)
{}
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"tanh"
)
{}
};
template
<
typename
TargetT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"sigmoid"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"sigmoid"
)
{}
};
}
// namespace anakin
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,14 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
AffineChannelOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
AffineChannelOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -35,60 +31,20 @@ void AffineChannelOpConverter<TargetT>::operator()(
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
new
framework
::
LoDTensor
());
scale_tensor
->
Resize
(
scale_t
->
dims
());
TensorCopySync
((
*
scale_t
),
platform
::
CPUPlace
(),
scale_tensor
.
get
());
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
new
framework
::
LoDTensor
());
bias_tensor
->
Resize
(
bias_t
->
dims
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Generate the Scale parameter of Anakin.
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
while
(
scale_shape
.
size
()
<
4
)
{
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
}
Shape
anakin_scale_shape
(
scale_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_scale_shape
);
float
*
scale_cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
scale_cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Generate the Bias parameter of Anakin.
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
while
(
bias_shape
.
size
()
<
4
)
{
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
bias_cpu_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
...
...
@@ -97,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
NV
>
);
using
affine_channel_nv_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_nv_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
X86
>
);
using
affine_channel_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_cpu_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_int8
);
paddle/fluid/inference/anakin/convert/affine_channel.h
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
AffineChannelOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
e14ab180
...
...
@@ -18,17 +18,14 @@
#include <map>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
BatchNormOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
BatchNormOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -36,87 +33,46 @@ void BatchNormOpConverter<TargetT>::operator()(
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
for
(
auto
k
:
{
"X"
,
"Scale"
,
"Bias"
,
"Mean"
,
"Variance"
})
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
k
).
size
(),
1UL
);
auto
v
=
op_desc
.
Input
(
k
).
front
();
inputs
.
insert
({
k
,
v
});
}
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Y"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Y"
).
front
();
auto
epsilon
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"epsilon"
));
// auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
s
[
"X"
]
},
{
bn_output
});
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
},
{
bn_output
});
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
framework
::
LoDTensor
*
tensor
)
{
auto
*
v
=
scope
.
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
v
);
auto
*
t
=
v
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
t
->
dims
());
TensorCopySync
(
*
t
,
platform
::
CPUPlace
(),
tensor
);
};
framework
::
LoDTensor
bias_t
;
framework
::
LoDTensor
mean_t
;
framework
::
LoDTensor
scale_t
;
framework
::
LoDTensor
variance_t
;
get_lod_tensor
(
inputs
[
"Bias"
],
&
bias_t
);
get_lod_tensor
(
inputs
[
"Mean"
],
&
mean_t
);
get_lod_tensor
(
inputs
[
"Scale"
],
&
scale_t
);
get_lod_tensor
(
inputs
[
"Variance"
],
&
variance_t
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
fill_shape
=
[](
size_t
n
,
std
::
vector
<
int
>
shape
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
if
(
shape
.
size
()
<
n
)
{
shape
.
insert
(
shape
.
end
(),
n
-
shape
.
size
(),
1
);
}
return
shape
;
};
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
mean_v
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
auto
*
variance_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
variance_v
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight3
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
float
weight3_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
auto
*
weight3
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
1
}));
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
auto
*
scale
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
scale_shape
);
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
auto
*
bias
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
...
...
@@ -125,9 +81,17 @@ void BatchNormOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
NV
>
);
using
bn_nv_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_nv_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
X86
>
);
using
bn_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_cpu_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_int8
);
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
BatchNormOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
e14ab180
...
...
@@ -19,8 +19,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ConcatOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ConcatOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -39,8 +39,21 @@ void ConcatOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
NV
>
);
using
concat_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_nv_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
X86
>
);
using
concat_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_int8
);
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ConcatOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Conv2dOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -42,11 +40,8 @@ void Conv2dOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
...
...
@@ -69,25 +64,61 @@ void Conv2dOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
X86
>
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
NV
>
);
using
conv2d_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_int8
);
#endif
using
conv2d_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
e14ab180
...
...
@@ -16,18 +16,16 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Conv2dFusionOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dFusionOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -43,24 +41,16 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
...
...
@@ -77,37 +67,42 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
...
...
@@ -115,9 +110,21 @@ void Conv2dFusionOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
NV
>
);
using
conv2d_fusion_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
X86
>
);
using
conv2d_fusion_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_int8
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dFusionOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DensityPriorBoxOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DensityPriorBoxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -109,13 +109,24 @@ void DensityPriorBoxOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
using
ds_pr_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_nv_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
using
ds_pr_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_int8
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DensityPriorBoxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DetectionOutOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DetectionOutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -67,8 +67,21 @@ void DetectionOutOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
NV
>
);
using
detection_out_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_nv_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
detection_out_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_int8
);
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DetectionOutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
e14ab180
...
...
@@ -16,17 +16,14 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
DropoutOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DropoutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -42,12 +39,7 @@ void DropoutOpConverter<TargetT>::operator()(
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
factor
}));
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
...
...
@@ -60,8 +52,21 @@ void DropoutOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
NV
>
);
using
dropout_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_nv_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
X86
>
);
using
dropout_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_int8
);
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DropoutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
e14ab180
...
...
@@ -17,17 +17,14 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ElementwiseAddOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseAddOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -48,8 +45,8 @@ void ElementwiseAddOpConverter<TargetT>::operator()(
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
template
<
typename
TargetT
>
void
ElementwiseMulOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseMulOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -75,12 +72,31 @@ void ElementwiseMulOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
NV
>
);
using
elet_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
X86
>
);
using
elet_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_int8
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseAddOpConverter
()
=
default
;
...
...
@@ -34,8 +35,9 @@ class ElementwiseAddOpConverter : public AnakinOpConverter<TargetT> {
private:
};
template
<
typename
TargetT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseMulOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
e14ab180
...
...
@@ -16,22 +16,19 @@
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
Shape
;
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
FcBaseOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FcBaseOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
=
=
3
;
bool
with_bias
=
input_names
.
size
()
>
=
3
;
std
::
string
w_name
=
"Y"
;
std
::
string
i_name
=
"X"
;
...
...
@@ -45,7 +42,12 @@ void FcBaseOpConverter<TargetT>::operator()(
// get weights
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight_tensor
=
tensor_from_var
(
*
y_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
int
out_dim
=
weight_shape
[
1
];
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
...
...
@@ -53,64 +55,58 @@ void FcBaseOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
Shape
anakin_shape
(
weight_shape
);
framework
::
LoDTensor
weight_tensor
;
weight_tensor
.
Resize
(
y_t
->
dims
());
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
auto
*
weight_data
=
weight_tensor
.
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
.
numel
());
auto
*
weight_data
=
weight_tensor
->
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
->
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
.
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
->
numel
());
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
}
}
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
int
weight_num
=
weight_tensor
->
numel
();
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
((
trans_weight_data
[
i
]
>=
-
128
)
&&
(
trans_weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of fc "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
trans_weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
trans_weight_data
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
// get bias
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
@@ -120,9 +116,39 @@ void FcBaseOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
NV
>
);
using
mul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_nv_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_nv_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
X86
>
);
using
mul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_int8
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcBaseOpConverter
()
=
default
;
...
...
@@ -33,15 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter<TargetT> {
};
// with bias
template
<
typename
TargetT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcOpConverter
()
=
default
;
};
// without bias
template
<
typename
TargetT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
MulOpConverter
()
=
default
;
};
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
FlattenOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FlattenOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -46,8 +46,21 @@ void FlattenOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
NV
>
);
using
flatten_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_nv_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
X86
>
);
using
flatten_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_int8
);
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FlattenOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/helper.cc
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
)
{
auto
&
src
=
var
.
Get
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
dst
(
new
framework
::
LoDTensor
());
dst
->
Resize
(
src
.
dims
());
TensorCopySync
((
src
),
place
,
dst
.
get
());
return
dst
;
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/helper.h
0 → 100644
浏览文件 @
e14ab180
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
PBlock
;
using
anakin
::
graph
::
GraphGlobalMem
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape
)
{
while
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
}
Shape
anakin_shape
(
shape
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
anakin_shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
)
{
int
size
=
vec
.
size
();
return
pblock_from_vector
<
T
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}));
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_var
(
const
framework
::
Variable
&
var
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
return
pblock_from_tensor
<
T
>
(
*
tensor
,
shape
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Im2SequenceConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Im2SequenceConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -55,5 +55,18 @@ void Im2SequenceConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
<::
anakin
::
saber
::
NV
>
);
#ifdef PADDLE_WITH_CUDA
using
im2sequence_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_nv_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_int8
);
#endif
using
im2sequence_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_int8
);
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Im2SequenceConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
e14ab180
...
...
@@ -32,9 +32,9 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
PrecisionT
>
;
public:
AnakinOpConverter
()
=
default
;
...
...
@@ -96,6 +96,13 @@ class AnakinOpConverter {
engine
->
Graph
()
->
RegistVar
(
output
);
}
engine
->
Freeze
();
// Add scale for tensor in int8 mode.
auto
tensor_scales
=
engine
->
GetTensorScales
();
for
(
auto
&
item
:
tensor_scales
)
{
engine
->
Graph
()
->
SetVarScale
(
item
.
first
,
item
.
second
);
}
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
std
::
vector
<
int
>
input_shape
;
...
...
@@ -136,52 +143,78 @@ class AnakinOpConverter {
AnakinEngineT
*
engine_
{
nullptr
};
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
>
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
,
PrecisionT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__) \
struct anakin_##op_type__##_##place_type__##_converter \
place_type__, place_class__, \
precision_type__, precision_class__) \
struct anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_##place_type__##_
converter() {
\
anakin_##op_type__##_##place_type__##_
##precision_type__##_converter() {
\
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter<
place_class__>>::
\
Global()
\
.Register<::paddle::inference::anakin::Converter__>(#op_type__);
\
::paddle::inference::anakin::AnakinOpConverter<
\
place_class__, precision_class__>>::Global()
\
.Register<Converter__>(#op_type__);
\
} \
}; \
anakin_##op_type__##_##place_type__##_converter \
anakin_##op_type__##_##place_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \
anakin_##op_type__##_##place_type__##_converter__.Touch(); \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__; \
int Touch_anakin_##op_type__##_##place_type__##_##precision_type__() { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__ \
.Touch(); \
return 0; \
}
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV)
::anakin::saber::NV, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV, INT8, \
::anakin::Precision::INT8)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86)
::anakin::saber::X86, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86, INT8, \
::anakin::Precision::INT8)
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
)
\
extern int Touch
ConverterRegister_anakin_##op_type__##_##place
_type__(); \
int use_
op_converter_anakin_##op_type__##_##place_type__
\
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__
, precision_type__)
\
extern int Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__(); \
int use_
converter_anakin_##op_type__##_##place_type__##_##precision_type__
\
__attribute__((unused)) = \
Touch
ConverterRegister_anakin_##op_type__##_##place
_type__();
Touch
_anakin_##op_type__##_##place_type__##_##precision
_type__();
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA)
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU)
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
Pool2dOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Pool2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -72,8 +72,21 @@ void Pool2dOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
NV
>
);
using
pool2d_nv_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_float32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
X86
>
);
using
pool2d_cpu_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_float32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_int8
);
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Pool2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ReluOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -36,8 +36,8 @@ void ReluOpConverter<TargetT>::operator()(
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
template
<
typename
TargetT
>
void
LeakyReluOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
LeakyReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -58,10 +58,35 @@ void LeakyReluOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
NV
>
);
using
relu_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_nv_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_nv_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
X86
>
);
using
relu_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_cpu_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_int8
);
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReluOpConverter
()
=
default
;
...
...
@@ -34,8 +34,8 @@ class ReluOpConverter : public AnakinOpConverter<TargetT> {
virtual
~
ReluOpConverter
()
{}
};
template
<
typename
TargetT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
LeakyReluOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
e14ab180
...
...
@@ -21,8 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ReshapeOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReshapeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -47,9 +47,21 @@ void ReshapeOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
NV
>
);
using
reshape_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
X86
>
);
using
reshape_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_int8
);
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReshapeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
e14ab180
...
...
@@ -16,17 +16,12 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
RoiAlignOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
RoiAlignOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -57,8 +52,21 @@ void RoiAlignOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
>
);
using
roi_align_nv_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_nv_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
>
);
using
roi_align_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_cpu_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_int8
);
paddle/fluid/inference/anakin/convert/roi_align.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
RoiAlignOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
ScaleOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ScaleOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -49,4 +49,22 @@ void ScaleOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
<::
anakin
::
saber
::
NV
>
);
#ifdef PADDLE_WITH_CUDA
using
scale_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_nv_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_int8
);
#endif
using
scale_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_int8
);
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ScaleOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
e14ab180
...
...
@@ -18,8 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SoftMaxOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SoftMaxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -45,9 +45,22 @@ void SoftMaxOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sm_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_nv_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sm_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_int8
);
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SoftMaxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
e14ab180
...
...
@@ -22,8 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SplitOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SplitOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -56,7 +56,22 @@ void SplitOpConverter<TargetT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
NV
>
);
using
split_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
split_nv_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
split_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
X86
>
);
using
split_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
split_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_int8
);
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SplitOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
e14ab180
...
...
@@ -23,11 +23,10 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
SumOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SumOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -49,6 +48,21 @@ void SumOpConverter<TargetT>::operator()(const framework::proto::OpDesc &op,
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
NV
>
);
using
sum_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_nv_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
X86
>
);
using
sum_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_int8
);
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SumOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -57,6 +57,7 @@ TEST(tanh_op, gpu) {
}
#endif
/*
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
...
...
@@ -68,6 +69,7 @@ TEST(tanh_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}
*/
}
// namespace anakin
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
浏览文件 @
e14ab180
...
...
@@ -28,8 +28,8 @@ void test_affine_channel_op(const platform::DeviceContext& context,
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
...
...
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) {
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
...
...
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
...
...
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
...
...
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ template <typename TargetT>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_elementwise_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_mul_op(const platform::DeviceContext& context, bool use_gpu) {
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
浏览文件 @
e14ab180
...
...
@@ -25,8 +25,8 @@ void test_pool2d(const platform::DeviceContext& context, bool use_gpu,
std
::
string
pool_type
=
"max"
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
...
...
paddle/fluid/inference/anakin/convert/test_relu_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ static void test_activation_op(const std::string& op_type,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -60,20 +60,6 @@ TEST(leaky_relu_op, gpu) {
}
#endif
/* seems bug here
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu", ctx, false);
}
TEST(leaky_relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false);
}
*/
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
...
...
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
...
...
@@ -49,8 +49,8 @@ template <typename TargetT>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_split_op.cc
浏览文件 @
e14ab180
...
...
@@ -27,8 +27,8 @@ void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu,
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
...
...
paddle/fluid/inference/anakin/convert/test_sum_op.cc
浏览文件 @
e14ab180
...
...
@@ -26,8 +26,8 @@ template <typename TargetT>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
...
...
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
浏览文件 @
e14ab180
...
...
@@ -24,8 +24,8 @@ template <typename TargetT>
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
...
...
@@ -47,8 +47,8 @@ template <typename TargetT>
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
...
...
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
e14ab180
...
...
@@ -23,8 +23,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
TransposeOpConverter
<
TargetT
>::
operator
()(
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
TransposeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -50,9 +50,17 @@ void TransposeOpConverter<TargetT>::operator()(
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
NV
>
);
using
transpose_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_nv_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_int8
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
X86
>
);
using
transpose_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_int8
);
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
e14ab180
...
...
@@ -20,8 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TransposeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
e14ab180
...
...
@@ -61,7 +61,7 @@ void RandomizeTensor(framework::LoDTensor* tensor,
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
temp_data
+
i
)
=
random
(
-
128.
,
128
.
);
*
(
temp_data
+
i
)
=
random
(
0.
,
1
.
);
}
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
...
...
@@ -72,9 +72,9 @@ void RandomizeTensor(framework::LoDTensor* tensor,
* anakin
* layer.
*/
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
T
>
;
public:
AnakinConvertValidation
()
=
delete
;
...
...
@@ -84,7 +84,7 @@ class AnakinConvertValidation {
const
platform
::
DeviceContext
&
ctx
,
bool
use_gpu
=
true
)
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
true
));
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
T
>
(
true
));
}
// Declare a Variable as input with random initialization.
...
...
@@ -127,7 +127,7 @@ class AnakinConvertValidation {
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
<
TargetT
>>::
Global
().
ConvertOp
(
Singleton
<
AnakinOpConverter
<
TargetT
,
PrecisionT
>>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
...
...
@@ -213,8 +213,15 @@ class AnakinConvertValidation {
bool
use_gpu_
{
true
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
e14ab180
...
...
@@ -172,11 +172,20 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#endif
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
e14ab180
...
...
@@ -93,6 +93,12 @@ class AnakinEngine {
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
void
AddTensorScale
(
const
std
::
string
&
tensor_name
,
float
scale
)
{
tensor_scales_
[
tensor_name
]
=
scale
;
}
std
::
unordered_map
<
std
::
string
,
float
>
GetTensorScales
()
{
return
tensor_scales_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
...
...
@@ -112,11 +118,12 @@ class AnakinEngine {
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
unordered_map
<
std
::
string
,
float
>
tensor_scales_
;
};
template
<
typename
TargetT
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
>
class
AnakinEngineManager
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
Precision
Type
>
;
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
...
...
@@ -132,7 +139,7 @@ class AnakinEngineManager {
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
Type
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
e14ab180
...
...
@@ -169,7 +169,13 @@ struct Argument {
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_precision_mode
,
AnakinPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_passes_filter
,
AnakinPassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
anakin_ops_filter
,
AnakinOpsFilter
,
std
::
vector
<
std
::
string
>
);
// Memory optimized related.
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
e14ab180
...
...
@@ -123,6 +123,11 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
bool
enable_int8
=
argument
->
anakin_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"anakin_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
anakin_ops_filter
()));
}
pre_pass
=
pass_name
;
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
e14ab180
...
...
@@ -39,8 +39,14 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
);
auto
teller
=
[](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
auto
&
anakin_ops_filter
=
Get
<
std
::
vector
<
std
::
string
>>
(
"anakin_ops_filter"
);
auto
teller
=
[
&
anakin_ops_filter
](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
else
if
(
std
::
find
(
anakin_ops_filter
.
begin
(),
anakin_ops_filter
.
end
(),
node
->
Op
()
->
Type
())
!=
anakin_ops_filter
.
end
())
return
false
;
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
...
...
@@ -191,47 +197,71 @@ void AnakinSubgraphPass::CreateAnakinOp(
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
if
(
enable_int8
)
{
CreateAnakinEngine
<::
anakin
::
Precision
::
INT8
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
else
{
CreateAnakinEngine
<::
anakin
::
Precision
::
FP32
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
AnakinSubgraphPass
::
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
{
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
->
Proto
());
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
#endif
}
else
{
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
>>::
Global
()
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
}
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
NV
>>::
Global
()
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
#endif
}
else
{
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
X86
>>::
Global
()
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
浏览文件 @
e14ab180
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
...
...
@@ -36,6 +37,13 @@ class AnakinSubgraphPass : public framework::ir::FusePassBase {
const
std
::
vector
<
std
::
string
>
&
graph_params
,
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
;
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
template
<::
anakin
::
Precision
PrecisionT
>
void
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
;
};
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
e14ab180
...
...
@@ -116,6 +116,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_precision_mode_
);
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
...
...
@@ -276,9 +279,12 @@ void AnalysisConfig::Update() {
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
if
(
std
::
find
(
anakin_passes_filter_
.
begin
(),
anakin_passes_filter_
.
end
(),
pass
)
==
anakin_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
}
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
...
...
@@ -391,11 +397,16 @@ void AnalysisConfig::SwitchIrDebug(int x) {
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_size
)
{
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
std
::
vector
<
std
::
string
>
passes_filter
,
std
::
vector
<
std
::
string
>
ops_filter
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
anakin_passes_filter_
=
passes_filter
;
anakin_ops_filter_
=
ops_filter
;
use_anakin_
=
true
;
anakin_precision_mode_
=
precision_mode
;
Update
();
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e14ab180
...
...
@@ -386,6 +386,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinPrecisionMode
(
config_
.
anakin_precision_mode_
);
argument_
.
SetAnakinPassesFilter
(
config_
.
anakin_passes_filter_
);
argument_
.
SetAnakinOpsFilter
(
config_
.
anakin_ops_filter_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e14ab180
...
...
@@ -152,7 +152,9 @@ struct AnalysisConfig {
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
);
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
...
...
@@ -291,6 +293,9 @@ struct AnalysisConfig {
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
Precision
anakin_precision_mode_
;
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
e14ab180
...
...
@@ -73,15 +73,21 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"graph_viz_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
//
"conv_bn_fuse_pass", //
//
"conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"anakin_subgraph_pass"
,
"graph_viz_pass"
,
//
"anakin_subgraph_pass"
,
//
"graph_viz_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
e14ab180
...
...
@@ -44,6 +44,7 @@ class AnakinEngineOp : public framework::OperatorBase {
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
bool
enable_int8_
;
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -55,6 +56,7 @@ class AnakinEngineOp : public framework::OperatorBase {
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
enable_int8_
=
Attr
<
bool
>
(
"enable_int8"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
...
...
@@ -68,11 +70,6 @@ class AnakinEngineOp : public framework::OperatorBase {
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
).
stream
();
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
std
::
vector
<
std
::
string
>
output_maps
=
...
...
@@ -96,18 +93,35 @@ class AnakinEngineOp : public framework::OperatorBase {
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
if
(
enable_int8_
)
{
Execute
<::
anakin
::
Precision
::
INT8
>
(
inputs
,
outputs
,
dev_place
);
}
else
{
Execute
<::
anakin
::
Precision
::
FP32
>
(
inputs
,
outputs
,
dev_place
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
)
.
stream
();
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
#endif
}
else
{
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e14ab180
...
...
@@ -16,6 +16,7 @@
#include <pybind11/stl.h>
#include <cstring>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/analysis_predictor.h"
...
...
@@ -230,8 +231,13 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
true
)
.
def
(
"enable_anakin_engine"
,
&
AnalysisConfig
::
EnableAnakinEngine
,
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
{},
py
::
arg
(
"min_subgraph_size"
)
=
6
)
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
py
::
arg
(
"min_subgraph_size"
)
=
6
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
py
::
arg
(
"x"
)
=
true
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录