Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7ad182e1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7ad182e1
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-Pick from 16662 : Anakin subgraph cpu support
上级
8643dbc2
变更
72
显示空白变更内容
内联
并排
Showing
72 changed file
with
1149 addition
and
680 deletion
+1149
-680
cmake/anakin_subgraph.cmake
cmake/anakin_subgraph.cmake
+2
-1
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+18
-14
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+8
-5
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+18
-14
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+2
-1
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+30
-25
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+2
-1
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+12
-18
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+2
-1
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+26
-21
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+2
-1
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+30
-21
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+2
-1
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+37
-23
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+2
-1
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+23
-22
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+2
-1
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+16
-16
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+2
-1
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+24
-26
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+4
-2
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+22
-16
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+6
-3
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+12
-11
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+2
-1
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+13
-17
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+2
-1
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+47
-27
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+17
-19
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+2
-1
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+20
-19
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+4
-2
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+13
-11
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+2
-1
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+16
-11
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+2
-1
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+9
-14
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+2
-1
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+13
-12
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+2
-1
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+14
-16
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+2
-1
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+13
-14
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+2
-1
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+37
-6
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+24
-4
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+22
-2
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+19
-22
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+22
-5
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+21
-2
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+36
-5
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+22
-5
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+21
-5
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+50
-46
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+39
-7
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+38
-6
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+22
-4
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+43
-31
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+21
-2
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+37
-7
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+13
-11
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+2
-1
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+26
-25
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+42
-4
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+16
-9
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+14
-14
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+39
-10
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+0
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-3
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-1
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+15
-25
未找到文件。
cmake/anakin_subgraph.cmake
浏览文件 @
7ad182e1
...
...
@@ -25,8 +25,9 @@ endif()
if
(
ANAKIN_FOUND
)
message
(
STATUS
"Current ANAKIN header is
${
ANAKIN_INCLUDE_DIR
}
/anakin_config.h. "
)
include_directories
(
${
ANAKIN_ROOT
}
)
include_directories
(
${
ANAKIN_ROOT
}
/include
)
include_directories
(
${
ANAKIN_ROOT
}
/
include/
saber
)
include_directories
(
${
ANAKIN_ROOT
}
/saber
)
link_directories
(
${
ANAKIN_ROOT
}
)
add_definitions
(
-DPADDLE_WITH_ANAKIN
)
endif
()
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
7ad182e1
...
...
@@ -16,16 +16,13 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
ActivationOpConverter
::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
template
<
typename
TargetT
>
ActivationOpConverter
<
TargetT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
PADDLE_ENFORCE
(
it
!=
anakin_op_types_
.
end
(),
...
...
@@ -33,10 +30,10 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
anakin_op_type_
=
it
->
second
;
}
void
ActivationOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
ActivationOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -44,13 +41,20 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
this
->
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ActivationOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
...
...
@@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter {
{
"sigmoid"
,
"Sigmoid"
}};
};
class
TanhOpConverter
:
public
ActivationOpConverter
{
template
<
typename
TargetT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
public:
TanhOpConverter
()
:
ActivationOpConverter
(
"tanh"
)
{}
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"tanh"
)
{}
};
class
SigmoidOpConverter
:
public
ActivationOpConverter
{
template
<
typename
TargetT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
public:
SigmoidOpConverter
()
:
ActivationOpConverter
(
"sigmoid"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"sigmoid"
)
{}
};
}
// namespace anakin
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
7ad182e1
...
...
@@ -18,19 +18,16 @@
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
AffineChannelOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
AffineChannelOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -59,7 +56,7 @@ void AffineChannelOpConverter::operator()(
bias_tensor
->
Resize
(
bias_t
->
dims
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Generate the Scale parameter of Anakin.
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
...
...
@@ -67,7 +64,8 @@ void AffineChannelOpConverter::operator()(
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
}
Shape
anakin_scale_shape
(
scale_shape
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight1
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_scale_shape
);
float
*
scale_cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
...
...
@@ -75,7 +73,7 @@ void AffineChannelOpConverter::operator()(
scale_cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Generate the Bias parameter of Anakin.
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
...
...
@@ -83,18 +81,24 @@ void AffineChannelOpConverter::operator()(
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
bias_cpu_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/affine_channel.h
浏览文件 @
7ad182e1
...
...
@@ -21,7 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
AffineChannelOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
AffineChannelOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
7ad182e1
...
...
@@ -21,17 +21,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
BatchNormOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
BatchNormOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
1
);
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
...
...
@@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
inputs
[
"X"
]},
{
bn_output
});
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
inputs
[
"X"
]},
{
bn_output
});
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
...
...
@@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
auto
*
variance_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight3
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
float
weight3_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
auto
*
scale
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
scale_shape
);
auto
*
scale
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
scale_shape
);
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
auto
*
bias
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
auto
*
bias
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
BatchNormOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
BatchNormOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
7ad182e1
...
...
@@ -15,38 +15,32 @@
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ConcatOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
ConcatOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
auto
input_names
=
op_desc
.
Input
(
"X"
);
// PADDLE_ENFORCE(axis > 0,
// "The axis attr of Concat op should be large than 0 for trt");
auto
y_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Concat"
,
input_names
,
{
y_name
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
this
->
engine_
->
AddOp
(
op_name
,
"Concat"
,
input_names
,
{
y_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ConcatOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ConcatOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
7ad182e1
...
...
@@ -18,19 +18,18 @@
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Conv2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
Conv2dOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
...
...
@@ -39,7 +38,7 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
...
...
@@ -51,38 +50,44 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilation_rate"
,
dilations
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
X86
>
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
Conv2dOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
Conv2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
7ad182e1
...
...
@@ -18,19 +18,18 @@
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Conv2dFusionOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
Conv2dFusionOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
...
...
@@ -40,7 +39,7 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
...
...
@@ -63,28 +62,31 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilation_rate"
,
dilations
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
...
...
@@ -98,17 +100,24 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
Conv2dFusionOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
7ad182e1
...
...
@@ -17,17 +17,14 @@
#include <map>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
DensityPriorBoxOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
DensityPriorBoxOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -81,27 +78,44 @@ void DensityPriorBoxOpConverter::operator()(
std
::
vector
<
float
>
temp_v
=
{};
engine_
->
AddOp
(
op_name
,
"PriorBox"
,
{
input_name
,
image_name
},
{
output_name
});
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"min_size"
,
min_sizes
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"max_size"
,
max_sizes
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"aspect_ratio"
,
aspect_ratios
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"fixed_size"
,
fixed_sizes
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"fixed_ratio"
,
fixed_ratios
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"density"
,
dens
);
engine_
->
AddOpAttr
(
op_name
,
"is_flip"
,
is_flip
);
engine_
->
AddOpAttr
(
op_name
,
"is_clip"
,
is_clip
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"variance"
,
variances
);
engine_
->
AddOpAttr
(
op_name
,
"img_h"
,
static_cast
<
int
>
(
0
));
engine_
->
AddOpAttr
(
op_name
,
"img_w"
,
static_cast
<
int
>
(
0
));
engine_
->
AddOpAttr
(
op_name
,
"step_h"
,
step_h
);
engine_
->
AddOpAttr
(
op_name
,
"step_w"
,
step_w
);
engine_
->
AddOpAttr
(
op_name
,
"offset"
,
offset
);
engine_
->
AddOpAttr
<
PTuple
<
std
::
string
>>
(
op_name
,
"order"
,
t_order
);
this
->
engine_
->
AddOp
(
op_name
,
"PriorBox"
,
{
input_name
,
image_name
},
{
output_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"min_size"
,
min_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"max_size"
,
max_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"aspect_ratio"
,
aspect_ratios
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_size"
,
fixed_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_ratio"
,
fixed_ratios
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"density"
,
dens
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_flip"
,
is_flip
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_clip"
,
is_clip
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"variance"
,
variances
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_h"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_w"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_h"
,
step_h
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_w"
,
step_w
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"offset"
,
offset
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
std
::
string
>
>
(
op_name
,
"order"
,
t_order
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
DensityPriorBoxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
7ad182e1
...
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
DetectionOutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
DetectionOutOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
target_name
=
op_desc
.
Input
(
"TargetBox"
).
front
();
auto
prior_box_name
=
op_desc
.
Input
(
"PriorBox"
).
front
();
...
...
@@ -52,22 +47,28 @@ void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
"Not support encode_center_size code_type in DetectionOut of anakin"
);
}
engine_
->
AddOp
(
op_name
,
"DetectionOutput"
,
{
target_name
,
scores_name
,
prior_box_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"share_location"
,
true
);
engine_
->
AddOpAttr
(
op_name
,
"variance_encode_in_target"
,
false
);
engine_
->
AddOpAttr
(
op_name
,
"class_num"
,
static_cast
<
int
>
(
0
));
engine_
->
AddOpAttr
(
op_name
,
"background_id"
,
background_label
);
engine_
->
AddOpAttr
(
op_name
,
"keep_top_k"
,
keep_top_k
);
engine_
->
AddOpAttr
(
op_name
,
"code_type"
,
anakin_code_type
);
engine_
->
AddOpAttr
(
op_name
,
"conf_thresh"
,
score_threshold
);
engine_
->
AddOpAttr
(
op_name
,
"nms_top_k"
,
nms_top_k
);
engine_
->
AddOpAttr
(
op_name
,
"nms_thresh"
,
nms_threshold
);
engine_
->
AddOpAttr
(
op_name
,
"nms_eta"
,
nms_eta
);
this
->
engine_
->
AddOp
(
op_name
,
"DetectionOutput"
,
{
target_name
,
scores_name
,
prior_box_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"share_location"
,
true
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"variance_encode_in_target"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"class_num"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"background_id"
,
background_label
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"keep_top_k"
,
keep_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"code_type"
,
anakin_code_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"conf_thresh"
,
score_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_top_k"
,
nms_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_thresh"
,
nms_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_eta"
,
nms_eta
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
DetectionOutOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
DetectionOutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
7ad182e1
...
...
@@ -19,21 +19,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
DropoutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
DropoutOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Mask"
).
size
(),
1
);
...
...
@@ -43,25 +38,30 @@ void DropoutOpConverter::operator()(const framework::proto::OpDesc &op,
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
DropoutOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
DropoutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
7ad182e1
...
...
@@ -19,18 +19,15 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ElementwiseAddOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
ElementwiseAddOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -43,14 +40,16 @@ void ElementwiseAddOpConverter::operator()(
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
void
ElementwiseMulOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
ElementwiseMulOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -63,26 +62,25 @@ void ElementwiseMulOpConverter::operator()(
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
,
y_name
},
{
out_name
});
// Fill a number to weight_1 as a placeholder.
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
placeholder_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
placeholder_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Prod"
;
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ElementwiseAddOpConverter
()
=
default
;
...
...
@@ -33,7 +34,8 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
private:
};
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ElementwiseMulOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
7ad182e1
...
...
@@ -19,17 +19,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
FcBaseOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
FcBaseOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
==
3
;
...
...
@@ -51,13 +50,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
...
...
@@ -79,12 +78,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
}
}
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// get bias
if
(
with_bias
)
{
...
...
@@ -104,13 +104,14 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
@@ -118,5 +119,10 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
FcBaseOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
FcBaseOpConverter
()
=
default
;
...
...
@@ -32,13 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter {
};
// with bias
class
FcOpConverter
:
public
FcBaseOpConverter
{
template
<
typename
TargetT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
public:
FcOpConverter
()
=
default
;
};
// without bias
class
MulOpConverter
:
public
FcBaseOpConverter
{
template
<
typename
TargetT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
public:
MulOpConverter
()
=
default
;
};
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
7ad182e1
...
...
@@ -15,20 +15,16 @@
#include "paddle/fluid/inference/anakin/convert/flatten.h"
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
FlattenOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
FlattenOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
...
...
@@ -41,12 +37,17 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
std
::
vector
<
int
>
out_dims
=
{
0
,
-
1
,
1
,
1
};
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
out_dims
);
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
out_dims
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
FlattenOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
FlattenOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
7ad182e1
...
...
@@ -17,23 +17,16 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Im2SequenceConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
Im2SequenceConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
0
);
...
...
@@ -43,21 +36,24 @@ void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op,
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
kernels
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"kernels"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"paddings"
,
paddings
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"window_size"
,
kernels
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilations"
,
dilations
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"paddings"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"window_size"
,
kernels
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilations"
,
dilations
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
<::
anakin
::
saber
::
NV
>
);
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
Im2SequenceConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
Im2SequenceConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
7ad182e1
...
...
@@ -32,10 +32,10 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
using
AnakinNvEngine
=
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
template
<
typename
TargetT
>
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
;
public:
AnakinOpConverter
()
=
default
;
...
...
@@ -45,7 +45,7 @@ class AnakinOpConverter {
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
Anakin
NvEngine
*
engine
,
const
framework
::
Scope
&
scope
,
Anakin
EngineT
*
engine
,
bool
test_mode
=
false
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
std
::
string
op_type
=
op_desc
.
Type
();
...
...
@@ -65,7 +65,7 @@ class AnakinOpConverter {
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
Anakin
NvEngine
*
engine
)
{
const
framework
::
Scope
&
scope
,
Anakin
EngineT
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
...
...
@@ -79,7 +79,7 @@ class AnakinOpConverter {
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
Anakin
NvEngine
*
engine
)
{
const
std
::
vector
<
std
::
string
>
&
outputs
,
Anakin
EngineT
*
engine
)
{
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
...
...
@@ -128,40 +128,60 @@ class AnakinOpConverter {
engine
->
InitNet
();
}
void
SetEngine
(
Anakin
NvEngine
*
engine
)
{
engine_
=
engine
;
}
void
SetEngine
(
Anakin
EngineT
*
engine
)
{
engine_
=
engine
;
}
virtual
~
AnakinOpConverter
()
{}
protected:
bool
test_mode_
;
Anakin
NvEngine
*
engine_
{
nullptr
};
Anakin
EngineT
*
engine_
{
nullptr
};
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
struct anakin_##op_type__##_converter \
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__) \
struct anakin_##op_type__##_##place_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_
converter() {
\
LOG(INFO) << "register convert " << #op_type__
;
\
anakin_##op_type__##_
##place_type__##_converter() {
\
LOG(INFO) << "register convert " << #op_type__
<< " ";
\
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter>::Global() \
::paddle::inference::anakin::AnakinOpConverter<place_class__>>:: \
Global() \
.Register<::paddle::inference::anakin::Converter__>(#op_type__); \
} \
}; \
anakin_##op_type__##_converter anakin_##op_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__() { \
anakin_##op_type__##_converter__.Touch(); \
anakin_##op_type__##_##place_type__##_converter \
anakin_##op_type__##_##place_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \
anakin_##op_type__##_##place_type__##_converter__.Touch(); \
return 0; \
}
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86)
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__) \
extern int TouchConverterRegister_anakin_##op_type__##_##place_type__(); \
int use_op_converter_anakin_##op_type__##_##place_type__ \
__attribute__((unused)) = \
TouchConverterRegister_anakin_##op_type__##_##place_type__();
#define USE_ANAKIN_CONVERTER(op_type__) \
extern int TouchConverterRegister_anakin_##op_type__(); \
int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
TouchConverterRegister_anakin_##op_type__();
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU)
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
7ad182e1
...
...
@@ -17,23 +17,16 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Pool2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
Pool2dOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -65,17 +58,22 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
}
engine_
->
AddOp
(
op_name
,
"Pooling"
,
{
x_name
},
{
y_name
});
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"pool_size"
,
ksize
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
engine_
->
AddOpAttr
(
op_name
,
"method"
,
anakin_pool_type
);
engine_
->
AddOpAttr
(
op_name
,
"global_pooling"
,
global_pooling
);
engine_
->
AddOpAttr
(
op_name
,
"cmp_out_shape_floor_as_conv"
,
!
ceil_mode
);
this
->
engine_
->
AddOp
(
op_name
,
"Pooling"
,
{
x_name
},
{
y_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"pool_size"
,
ksize
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"method"
,
anakin_pool_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"global_pooling"
,
global_pooling
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"cmp_out_shape_floor_as_conv"
,
!
ceil_mode
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
Pool2dOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
Pool2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
7ad182e1
...
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
ReluOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -37,14 +32,14 @@ void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
void
LeakyReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
LeakyReluOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -54,13 +49,19 @@ void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op,
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
alpha
);
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
alpha
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ReluOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ReluOpConverter
()
=
default
;
...
...
@@ -33,7 +34,8 @@ class ReluOpConverter : public AnakinOpConverter {
virtual
~
ReluOpConverter
()
{}
};
class
LeakyReluOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
LeakyReluOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
7ad182e1
...
...
@@ -15,20 +15,16 @@
#include "paddle/fluid/inference/anakin/convert/reshape.h"
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ReshapeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
ReshapeOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
...
...
@@ -37,17 +33,23 @@ void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op,
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
auto
shape
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"shape"
));
if
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
end
(),
4
-
shape
.
size
(),
1
);
}
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
shape
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
shape
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ReshapeOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ReshapeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
7ad182e1
...
...
@@ -25,10 +25,10 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
RoiAlignOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
RoiAlignOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"ROIs"
).
size
(),
1
);
...
...
@@ -44,16 +44,21 @@ void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op,
auto
pooled_width
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"pooled_width"
));
auto
sampling_ratio
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"sampling_ratio"
));
engine_
->
AddOp
(
op_name
,
"RoiAlign"
,
{
input_x_name
,
input_rois_name
},
this
->
engine_
->
AddOp
(
op_name
,
"RoiAlign"
,
{
input_x_name
,
input_rois_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"spatial_scale"
,
spatial_scale
);
engine_
->
AddOpAttr
(
op_name
,
"pooled_height"
,
pooled_height
);
engine_
->
AddOpAttr
(
op_name
,
"pooled_width"
,
pooled_width
);
engine_
->
AddOpAttr
(
op_name
,
"sampling_ratio"
,
sampling_ratio
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"spatial_scale"
,
spatial_scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_height"
,
pooled_height
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_width"
,
pooled_width
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"sampling_ratio"
,
sampling_ratio
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/roi_align.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
RoiAlignOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
RoiAlignOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
7ad182e1
...
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
ScaleOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -44,14 +39,14 @@ void ScaleOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE
(
bias_after_scale
,
"The anakin scale layer only support bias after scale now."
);
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
this
->
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_
ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
REGISTER_
CUDA_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
<::
anakin
::
saber
::
NV
>
);
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
7ad182e1
...
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
ScaleOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
ScaleOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
7ad182e1
...
...
@@ -14,19 +14,14 @@
#include "paddle/fluid/inference/anakin/convert/softmax.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
SoftMaxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
SoftMaxOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
...
...
@@ -41,12 +36,18 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
));
this
->
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
SoftMaxOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
SoftMaxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
7ad182e1
...
...
@@ -16,23 +16,16 @@
#include <algorithm>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
SplitOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
SplitOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_names
=
op_desc
.
Output
(
"Out"
);
...
...
@@ -51,14 +44,19 @@ void SplitOpConverter::operator()(const framework::proto::OpDesc &op,
num_sum
+=
output_lengths
[
i
];
slice_point
.
push_back
(
num_sum
);
}
engine_
->
AddOp
(
op_name
,
"Slice"
,
{
input_name
},
y_names
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"slice_point"
,
slice_point
);
this
->
engine_
->
AddOp
(
op_name
,
"Slice"
,
{
input_name
},
y_names
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"slice_point"
,
slice_point
);
// slice_dim is useless in anakin
engine_
->
AddOpAttr
(
op_name
,
"slice_dim"
,
4
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"slice_dim"
,
4
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
SplitOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
SplitOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
7ad182e1
...
...
@@ -17,22 +17,17 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
SumOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
void
SumOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -43,13 +38,17 @@ void SumOpConverter::operator()(const framework::proto::OpDesc &op,
std
::
vector
<
float
>
coeff
=
{
1
,
1
};
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
SumOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
SumOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
static
void
test_activation_op
(
const
std
::
string
&
op_type
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
PADDLE_ENFORCE
(
converter
!=
nullptr
);
template
<
typename
TargetT
>
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -41,13 +43,42 @@ static void test_activation_op(const std::string &op_type) {
validator
.
Execute
(
5
);
}
TEST
(
sigm_op
,
test
)
{
test_activation_op
(
"sigmoid"
);
}
TEST
(
tanh_op
,
test
)
{
test_activation_op
(
"tanh"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
sigm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"sigmoid"
,
ctx
,
true
);
}
TEST
(
tanh_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"tanh"
,
ctx
,
true
);
}
#endif
TEST
(
sigm_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_activation_op
<::
anakin
::
saber
::
X86
>
(
"sigmoid"
,
ctx
,
false
);
}
TEST
(
tanh_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_activation_op
<::
anakin
::
saber
::
X86
>
(
"tanh"
,
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
sigmoid
);
USE_OP
(
tanh
);
USE_CPU_ANAKIN_CONVERTER
(
sigmoid
);
USE_CPU_ANAKIN_CONVERTER
(
tanh
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
#endif
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,16 +21,19 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
affine_channel
,
native
)
{
template
<
typename
TargetT
>
void
test_affine_channel_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
// Declare the difference between the inputs.
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"bias"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
validator
.
DeclParamVar
(
"bias"
,
{
3
});
// Prepare Op descriptions.
framework
::
OpDesc
desc
;
...
...
@@ -47,9 +50,26 @@ TEST(affine_channel, native) {
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
affine_channel_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
affine_channel_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
affine_channel
);
USE_CPU_ANAKIN_CONVERTER
(
affine_channel
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
affine_channel
);
#endif
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
浏览文件 @
7ad182e1
...
...
@@ -19,12 +19,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
batch_norm_op
,
test
)
{
template
<
typename
TargetT
>
void
test_batchnorm_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
(
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
...
...
@@ -64,8 +66,26 @@ TEST(batch_norm_op, test) {
validator
.
Execute
(
1
,
neglected_output
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
batch_norm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
batch_norm_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
batch_norm
);
USE_CPU_ANAKIN_CONVERTER
(
batch_norm
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
batch_norm
);
#endif
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,10 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
concat_op
,
test
)
{
template
<
typename
TargetT
>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
...
...
@@ -44,31 +46,26 @@ TEST(concat_op, test) {
validator
.
Execute
(
1
);
}
TEST
(
concat_op
,
test2
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
4
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
3
,
4
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
2
,
4
});
validator
.
DeclOutputVar
(
"concat_out"
,
{
6
,
4
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"concat"
);
desc
.
SetInput
(
"X"
,
{
"concat_x1"
,
"concat_x2"
,
"concat_x3"
});
desc
.
SetOutput
(
"Out"
,
{
"concat_out"
});
int
axis
=
0
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
#ifdef PADDLE_WITH_CUDA
TEST
(
concat_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_concat_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
validator
.
Execute
(
1
);
TEST
(
concat_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_concat_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
concat
);
USE_CPU_ANAKIN_CONVERTER
(
concat
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
concat
);
#endif
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,13 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
conv2d_op
,
test
)
{
auto
*
conv2d_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"conv2d"
);
ASSERT_TRUE
(
conv2d_converter
!=
nullptr
);
template
<
typename
TargetT
>
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
...
...
@@ -54,9 +53,27 @@ TEST(conv2d_op, test) {
validator
.
Execute
(
3
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
conv2d_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
conv2d_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
conv2d
);
USE_CPU_ANAKIN_CONVERTER
(
conv2d
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
conv2d
);
#endif
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,10 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
dropout_op
,
native
)
{
template
<
typename
TargetT
>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
...
...
@@ -45,9 +47,26 @@ TEST(dropout_op, native) {
validator
.
Execute
(
1
,
neglected_output
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
dropout_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_dropout_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
dropout_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_dropout_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
dropout
);
USE_CPU_ANAKIN_CONVERTER
(
dropout
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
dropout
);
#endif
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,10 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
static
void
test_elementwise_op
(
const
std
::
string
&
op_type
)
{
template
<
typename
TargetT
>
static
void
test_elementwise_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
...
...
@@ -43,14 +47,41 @@ static void test_elementwise_op(const std::string &op_type) {
validator
.
Execute
(
1
);
}
TEST
(
elementwise_op
,
native_add
)
{
test_elementwise_op
(
"elementwise_add"
);
}
TEST
(
elementwise_op
,
native_mul
)
{
test_elementwise_op
(
"elementwise_mul"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
elementwise_op
,
native_add_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_add"
,
ctx
,
true
);
}
TEST
(
elementwise_op
,
native_mul_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_mul"
,
ctx
,
true
);
}
#endif
TEST
(
elementwise_op
,
native_add_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_add"
,
ctx
,
false
);
}
TEST
(
elementwise_op
,
native_mul_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_mul"
,
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_OP
(
elementwise_mul
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
#endif
USE_CPU_ANAKIN_CONVERTER
(
elementwise_add
);
USE_CPU_ANAKIN_CONVERTER
(
elementwise_mul
);
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
7ad182e1
...
...
@@ -20,13 +20,13 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
fc_op
,
test
)
{
auto
*
fc_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"fc"
);
ASSERT_TRUE
(
fc_converter
);
template
<
typename
TargetT
>
void
test_mul_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
...
...
@@ -42,9 +42,26 @@ TEST(fc_op, test) {
validator
.
Execute
(
10
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
mul_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_mul_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
mul_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_mul_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
mul
);
USE_CPU_ANAKIN_CONVERTER
(
fc
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
fc
);
#endif
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
浏览文件 @
7ad182e1
...
...
@@ -20,13 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
flatten_op
,
test
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"flatten"
);
ASSERT_TRUE
(
converter
);
template
<
typename
TargetT
>
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -42,10 +41,27 @@ TEST(flatten_op, test) {
validator
.
Execute
(
5
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
flatten_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_flatten_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
flatten_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_flatten_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
reshape
);
USE_OP_ITSELF
(
flatten
);
USE_CPU_ANAKIN_CONVERTER
(
flatten
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
flatten
);
#endif
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
浏览文件 @
7ad182e1
...
...
@@ -19,15 +19,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
void
test_pool2d
(
bool
global_pooling
,
bool
ceil_mode
,
template
<
typename
TargetT
>
void
test_pool2d
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
bool
global_pooling
,
bool
ceil_mode
,
std
::
string
pool_type
=
"max"
)
{
auto
*
pool2d_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"pool2d"
);
ASSERT_TRUE
(
pool2d_converter
);
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
...
...
@@ -64,56 +63,61 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
validator
.
Execute
(
1
);
}
void
test_pool2d2
(
bool
global_pooling
,
bool
ceil_mode
,
std
::
string
pool_type
=
"max"
)
{
auto
*
pool2d_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"pool2d"
);
ASSERT_TRUE
(
pool2d_converter
);
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
validator
.
DeclInputVar
(
"pool2d_x"
,
{
1
,
1
,
17
,
17
});
validator
.
DeclOutputVar
(
"pool2d_out"
,
{
1
,
1
,
17
,
17
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"pool2d"
);
desc
.
SetInput
(
"X"
,
{
"pool2d_x"
});
desc
.
SetOutput
(
"Out"
,
{
"pool2d_out"
});
std
::
vector
<
int
>
ksize
({
3
,
3
});
std
::
vector
<
int
>
strides
({
1
,
1
});
std
::
vector
<
int
>
paddings
({
1
,
1
});
std
::
string
pooling_t
=
pool_type
;
#ifdef PADDLE_WITH_CUDA
TEST
(
Pool2dOpConverter
,
normal
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
true
,
false
);
}
desc
.
SetAttr
(
"pooling_type"
,
pooling_t
);
desc
.
SetAttr
(
"ksize"
,
ksize
);
desc
.
SetAttr
(
"strides"
,
strides
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
desc
.
SetAttr
(
"global_pooling"
,
global_pooling
);
desc
.
SetAttr
(
"ceil_mode"
,
true
);
TEST
(
Pool2dOpConverter
,
max_ceil_test
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
);
}
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
TEST
(
Pool2dOpConverter
,
avg_ceil_test
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
,
"avg"
);
}
#endif
validator
.
Execute
(
1
);
TEST
(
Pool2dOpConverter
,
normal_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
true
,
false
);
}
TEST
(
Pool2dOpConverter
,
normal
)
{
test_pool2d
(
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling
)
{
test_pool2d
(
true
,
false
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test
)
{
test_pool2d
(
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test
)
{
test_pool2d
(
false
,
true
,
"avg"
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test2
)
{
test_pool2d2
(
false
,
true
,
"avg"
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
,
"avg"
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
pool2d
);
USE_CPU_ANAKIN_CONVERTER
(
pool2d
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
pool2d
);
#endif
paddle/fluid/inference/anakin/convert/test_relu_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
static
void
test_relu_op
(
const
std
::
string
&
op_type
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
PADDLE_ENFORCE
(
converter
!=
nullptr
);
template
<
typename
TargetT
>
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
...
...
@@ -44,14 +46,44 @@ static void test_relu_op(const std::string &op_type) {
validator
.
Execute
(
5
);
}
TEST
(
activation
,
relu
)
{
test_relu_op
(
"relu"
);
}
TEST
(
activation
,
leaky_relu
)
{
test_relu_op
(
"leaky_relu"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"relu"
,
ctx
,
true
);
}
TEST
(
leaky_relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"leaky_relu"
,
ctx
,
true
);
}
#endif
/* seems bug here
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu", ctx, false);
}
TEST(leaky_relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false);
}
*/
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
relu
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_OP
(
leaky_relu
);
USE_CPU_ANAKIN_CONVERTER
(
relu
);
USE_CPU_ANAKIN_CONVERTER
(
leaky_relu
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
#endif
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
浏览文件 @
7ad182e1
...
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
reshape
,
test
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"reshape"
);
ASSERT_TRUE
(
converter
);
template
<
typename
TargetT
>
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
...
...
@@ -45,10 +45,12 @@ TEST(reshape, test) {
validator
.
Execute
(
1
);
}
TEST
(
reshape
,
test2
)
{
template
<
typename
TargetT
>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
...
...
@@ -66,9 +68,39 @@ TEST(reshape, test2) {
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
reshape1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
reshape2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
reshape1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
reshape2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
reshape
);
USE_CPU_ANAKIN_CONVERTER
(
reshape
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
reshape
);
#endif
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
浏览文件 @
7ad182e1
...
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
softmax
,
test
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"softmax"
);
ASSERT_TRUE
(
converter
);
template
<
typename
TargetT
>
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
...
...
@@ -41,9 +41,27 @@ TEST(softmax, test) {
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
softmax_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_softmax_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
relu_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_softmax_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
softmax
);
USE_CPU_ANAKIN_CONVERTER
(
softmax
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
softmax
);
#endif
paddle/fluid/inference/anakin/convert/test_split_op.cc
浏览文件 @
7ad182e1
...
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
template
<
int
Axis
>
void
AnakinSliceTest
(
const
std
::
vector
<
int
>
&
in_shape
,
template
<
typename
TargetT
,
int
Axis
>
void
AnakinSliceTest
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
const
std
::
vector
<
int
>
&
in_shape
,
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
...
...
@@ -55,51 +57,58 @@ void AnakinSliceTest(const std::vector<int> &in_shape,
// batch = 0, axis = 1, same shape
TEST
(
split_op
,
test_same_shape_axis1_batch1
)
{
AnakinSliceTest
<
1
>
({
1
,
4
,
2
,
2
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
4
,
2
,
2
},
{
2
,
2
});
}
// batch = 0, axis = 1, different shape
TEST
(
split_op
,
test_different_shape_axis1_batch1
)
{
AnakinSliceTest
<
1
>
({
1
,
3
,
2
,
2
},
{
2
,
1
});
}
// batch = 10, axis = 1, same shape
TEST
(
split_op
,
test_same_shape_axis1_batch10
)
{
AnakinSliceTest
<
1
>
({
1
,
4
,
2
,
2
},
{
2
,
2
});
}
// batch = 10, axis = 1, different shape
TEST
(
split_op
,
test_different_shape_axis1_batch10
)
{
AnakinSliceTest
<
1
>
({
1
,
3
,
2
,
2
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
3
,
2
,
2
},
{
2
,
1
});
}
// batch = 0, axis = 2, same shape
TEST
(
split_op
,
test_same_shape_axis2_batch1
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
4
,
2
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
// batch = 0, axis = 2, different shape
TEST
(
split_op
,
test_different_shape_axis2_batch1
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
3
,
2
},
{
2
,
1
});
}
// batch = 10, axis = 2, same shape
TEST
(
split_op
,
test_same_shape_axis2_batch10
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
4
,
2
},
{
2
,
2
});
}
// batch = 10, axis = 2, different shape
TEST
(
split_op
,
test_different_shape_axis2_batch10
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
3
,
2
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
3
,
2
},
{
2
,
1
});
}
// batch = 0, axis = 3, same shape
TEST
(
split_op
,
test_same_shape_axis3_batch1
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
4
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
// batch = 0, axis = 3, different shape
TEST
(
split_op
,
test_different_shape_axis3_batch1
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
3
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
// batch = 10, axis = 3, same shape
TEST
(
split_op
,
test_same_shape_axis3_batch10
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
4
},
{
2
,
2
});
TEST
(
split_op
,
test_different_shape_axis1_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
1
>
(
ctx
,
false
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
TEST
(
split_op
,
test_different_shape_axis2_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
2
>
(
ctx
,
false
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
// batch = 10, axis = 3, different shape
TEST
(
split_op
,
test_different_shape_axis3_batch10
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
3
},
{
2
,
1
});
TEST
(
split_op
,
test_different_shape_axis3_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
3
>
(
ctx
,
false
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
}
// namespace anakin
...
...
@@ -107,4 +116,7 @@ TEST(split_op, test_different_shape_axis3_batch10) {
}
// namespace paddle
USE_OP
(
split
);
USE_CPU_ANAKIN_CONVERTER
(
split
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
split
);
#endif
paddle/fluid/inference/anakin/convert/test_sum_op.cc
浏览文件 @
7ad182e1
...
...
@@ -22,10 +22,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
sum
,
native
)
{
template
<
typename
TargetT
>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
...
...
@@ -40,9 +42,26 @@ TEST(sum, native) {
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
sum_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_sum_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
sum_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_sum_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
sum
);
USE_CPU_ANAKIN_CONVERTER
(
sum
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
sum
);
#endif
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
浏览文件 @
7ad182e1
...
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
TEST
(
transpose_op
,
test
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"transpose"
);
ASSERT_TRUE
(
converter
!=
nullptr
);
template
<
typename
TargetT
>
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
...
...
@@ -43,11 +43,12 @@ TEST(transpose_op, test) {
validator
.
Execute
(
3
);
}
// test input shape's dims < 4
TEST
(
transpose_op
,
test2
)
{
template
<
typename
TargetT
>
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
...
...
@@ -65,9 +66,38 @@ TEST(transpose_op, test2) {
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
transpose1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
transpose2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
transpose1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
transpose2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
transpose
);
USE_CPU_ANAKIN_CONVERTER
(
transpose
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
transpose
);
#endif
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
7ad182e1
...
...
@@ -17,20 +17,16 @@
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
TransposeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
template
<
typename
TargetT
>
void
TransposeOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
...
@@ -38,7 +34,7 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Permute"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Permute"
,
{
input
},
{
output
});
auto
axis
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"axis"
));
size_t
axis_size
=
axis
.
size
();
...
...
@@ -46,11 +42,17 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
axis
.
push_back
(
axis_size
);
axis_size
+=
1
;
}
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
axis
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
axis
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
7ad182e1
...
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
anakin
{
class
TransposeOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
TransposeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
7ad182e1
...
...
@@ -32,14 +32,8 @@ limitations under the License. */
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
...
...
@@ -55,8 +49,8 @@ float random(float low, float high) {
return
dist
(
mt
);
}
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
Place
&
place
,
const
platform
::
DeviceContext
&
ctx
)
{
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
Place
&
place
)
{
auto
dims
=
tensor
->
dims
();
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
...
...
@@ -78,17 +72,19 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
* anakin
* layer.
*/
template
<
typename
TargetT
>
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
public:
AnakinConvertValidation
()
=
delete
;
AnakinConvertValidation
(
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
framework
::
Scope
*
scope
)
:
parameters_
(
parameters
),
scope_
(
scope
),
place_
(
0
)
{
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
framework
::
Scope
*
scope
,
const
platform
::
DeviceContext
&
ctx
,
bool
use_gpu
=
true
)
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
true
));
}
// Declare a Variable as input with random initialization.
...
...
@@ -108,11 +104,10 @@ class AnakinConvertValidation {
}
void
DeclVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
platform
::
CUDADeviceContext
ctx
(
place_
);
auto
*
x
=
scope_
->
Var
(
name
);
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
RandomizeTensor
(
x_tensor
,
place_
,
ctx
);
RandomizeTensor
(
x_tensor
,
ctx_
.
GetPlace
()
);
std
::
vector
<
int64_t
>
dim_vec_int64
;
for
(
auto
&
ele
:
dim_vec
)
{
...
...
@@ -132,7 +127,7 @@ class AnakinConvertValidation {
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
Singleton
<
AnakinOpConverter
<
TargetT
>
>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
...
...
@@ -160,11 +155,8 @@ class AnakinConvertValidation {
void
Execute
(
int
batch_size
,
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{})
{
// Execute Fluid Op
platform
::
CUDADeviceContext
ctx
(
place_
);
op_
->
Run
(
*
scope_
,
place_
);
op_
->
Run
(
*
scope_
,
ctx_
.
GetPlace
());
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
...
...
@@ -180,20 +172,27 @@ class AnakinConvertValidation {
std
::
vector
<
float
>
fluid_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
_
,
&
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
outputs
.
insert
({
output
,
tensor
});
}
engine_
->
Execute
(
inputs
,
outputs
,
stream_
);
if
(
!
use_gpu_
)
{
engine_
->
Execute
(
inputs
,
outputs
);
}
else
{
cudaStream_t
stream
;
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream
),
0
);
engine_
->
Execute
(
inputs
,
outputs
,
stream
);
}
int
i_output
=
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
anakin_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
anakin_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
_
,
&
anakin_out
);
size_t
anakin_out_size
=
anakin_out
.
size
();
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
...
...
@@ -205,15 +204,17 @@ class AnakinConvertValidation {
private:
std
::
unique_ptr
<
AnakinNvEngineT
>
engine_
{
nullptr
};
cudaStream_t
stream_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
framework
::
ProgramDesc
program_desc_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
framework
::
Scope
*
scope_
;
platform
::
CUDAPlace
place_
;
const
platform
::
DeviceContext
&
ctx_
;
bool
use_gpu_
{
true
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
7ad182e1
...
...
@@ -69,11 +69,11 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
)
{
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
)
{
#ifdef PADDLE_WITH_CUDA
cudaDeviceSynchronize
();
#endif
for
(
const
auto
&
input
:
inputs
)
{
auto
*
tensor
=
input
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
...
...
@@ -105,6 +105,35 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
)
{
BindInput
(
inputs
);
net_
->
prediction
();
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CPUPlace
cpu_place
;
auto
*
tensor
=
output
.
second
;
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
*
anakin_data
=
anakin_output
->
data
();
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
tensor
->
Resize
(
framework
::
make_ddim
(
anakin_output_shape
));
auto
*
fluid_data
=
tensor
->
mutable_data
<
float
>
(
cpu_place
);
memory
::
Copy
(
cpu_place
,
static_cast
<
void
*>
(
fluid_data
),
cpu_place
,
static_cast
<
void
*>
(
anakin_data
),
tensor
->
numel
()
*
sizeof
(
float
));
}
}
#ifdef PADDLE_WITH_CUDA
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
)
{
BindInput
(
inputs
);
net_
->
prediction
();
cudaDeviceSynchronize
();
for
(
const
auto
&
output
:
outputs
)
{
...
...
@@ -121,6 +150,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
}
cudaDeviceSynchronize
();
}
#endif
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
...
...
@@ -140,7 +170,15 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
return
std
::
unique_ptr
<
AnakinEngine
>
(
engine
);
}
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
>;
#endif
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.h
浏览文件 @
7ad182e1
...
...
@@ -32,7 +32,6 @@
#include "saber/saber_types.h"
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
namespace
anakin
{
...
...
@@ -94,9 +93,16 @@ class AnakinEngine {
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
);
#endif
private:
void
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
);
private:
bool
initialized_
{
false
};
...
...
@@ -108,24 +114,25 @@ class AnakinEngine {
std
::
vector
<
std
::
string
>
program_inputs_
;
};
template
<
typename
TargetT
>
class
AnakinEngineManager
{
using
Anakin
NvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
using
Anakin
EngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
Anakin
Nv
EngineT
*
Get
(
const
std
::
string
&
name
)
const
{
AnakinEngineT
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
AnakinNvEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
AnakinEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
...
...
@@ -138,7 +145,7 @@ class AnakinEngineManager {
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
Anakin
Nv
EngineT
>>
engines_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
AnakinEngineT
>>
engines_
;
std
::
mutex
mut_
;
};
}
// namespace anakin
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7ad182e1
...
...
@@ -67,7 +67,7 @@ struct Argument {
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE(Has(#field__)
);
\
PADDLE_ENFORCE(Has(#field__)
, "There is no such field");
\
return field__##_; \
} \
void Set##Field(const type__& x) { \
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7ad182e1
...
...
@@ -114,6 +114,7 @@ void IRPassManager::CreatePasses(Argument *argument,
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
7ad182e1
...
...
@@ -194,20 +194,49 @@ void AnakinSubgraphPass::CreateAnakinOp(
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
auto
*
anakin_engine
=
inference
::
Singleton
<
anakin
::
AnakinEngineManager
>::
Global
().
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
#endif
}
else
{
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
}
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
if
(
use_gpu
)
{
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
NV
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
else
{
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
X86
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
}
}
// namespace analysis
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
7ad182e1
...
...
@@ -70,4 +70,3 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
endif
()
inference_analysis_test
(
faster_rcnn_test SRCS faster_rcnn_test.cc EXTRA_DEPS paddle_fluid
)
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7ad182e1
...
...
@@ -268,9 +268,11 @@ void AnalysisConfig::Update() {
PADDLE_ENFORCE
(
!
use_tensorrt_
,
"Anakin sub-graph and TensorRT sub-graph are not allowed to "
"run at the same time!"
);
PADDLE_ENFORCE
(
use_gpu_
,
"Anakin sub-graph engine need gpu, please use the EnableGpu API."
);
if
(
use_gpu_
)
{
LOG
(
INFO
)
<<
"Run Anakin GPU mode"
;
}
else
{
LOG
(
INFO
)
<<
"Run Anakin CPU mode"
;
}
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7ad182e1
...
...
@@ -382,7 +382,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetTensorRtUseStaticEngine
(
config_
.
trt_use_static_engine_
);
}
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
if
(
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
7ad182e1
...
...
@@ -34,28 +34,16 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
inference
::
Singleton
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
inference
::
anakin
::
AnakinEngine
;
class
AnakinEngineOp
:
public
framework
::
OperatorBase
{
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
AnakinNvEngineT
*
anakin_engine_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -66,10 +54,10 @@ class AnakinEngineOp : public framework::OperatorBase {
input_names_
=
Inputs
(
"Xs"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
anakin_engine_
=
nullptr
;
}
protected:
...
...
@@ -80,7 +68,6 @@ class AnakinEngineOp : public framework::OperatorBase {
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
auto
*
engine
=
GetEngine
(
scope
,
dev_place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
...
...
@@ -92,7 +79,6 @@ class AnakinEngineOp : public framework::OperatorBase {
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
...
...
@@ -110,17 +96,21 @@ class AnakinEngineOp : public framework::OperatorBase {
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
}
AnakinNvEngineT
*
GetEngine
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
anakin_engine_
==
nullptr
)
{
anakin_engine_
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
#endif
}
else
{
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
}
return
anakin_engine_
;
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录