Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
7ad182e1
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
接近 2 年 前同步成功
通知
707
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7ad182e1
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-Pick from 16662 : Anakin subgraph cpu support
上级
8643dbc2
变更
72
隐藏空白更改
内联
并排
Showing
72 changed file
with
1149 addition
and
680 deletion
+1149
-680
cmake/anakin_subgraph.cmake
cmake/anakin_subgraph.cmake
+2
-1
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+18
-14
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+8
-5
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+18
-14
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+2
-1
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+30
-25
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+2
-1
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+12
-18
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+2
-1
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+26
-21
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+2
-1
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+30
-21
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+2
-1
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+37
-23
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+2
-1
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+23
-22
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+2
-1
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+16
-16
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+2
-1
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+24
-26
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+4
-2
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+22
-16
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+6
-3
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+12
-11
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+2
-1
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+13
-17
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+2
-1
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+47
-27
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+17
-19
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+2
-1
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+20
-19
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+4
-2
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+13
-11
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+2
-1
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+16
-11
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+2
-1
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+9
-14
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+2
-1
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+13
-12
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+2
-1
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+14
-16
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+2
-1
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+13
-14
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+2
-1
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+37
-6
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+24
-4
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+22
-2
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+19
-22
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+22
-5
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+21
-2
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+36
-5
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+22
-5
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+21
-5
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+50
-46
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+39
-7
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+38
-6
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+22
-4
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+43
-31
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+21
-2
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+37
-7
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+13
-11
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+2
-1
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+26
-25
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+42
-4
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+16
-9
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+14
-14
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+39
-10
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+0
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-3
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-1
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+15
-25
未找到文件。
cmake/anakin_subgraph.cmake
浏览文件 @
7ad182e1
...
@@ -25,8 +25,9 @@ endif()
...
@@ -25,8 +25,9 @@ endif()
if
(
ANAKIN_FOUND
)
if
(
ANAKIN_FOUND
)
message
(
STATUS
"Current ANAKIN header is
${
ANAKIN_INCLUDE_DIR
}
/anakin_config.h. "
)
message
(
STATUS
"Current ANAKIN header is
${
ANAKIN_INCLUDE_DIR
}
/anakin_config.h. "
)
include_directories
(
${
ANAKIN_ROOT
}
)
include_directories
(
${
ANAKIN_ROOT
}
/include
)
include_directories
(
${
ANAKIN_ROOT
}
/include
)
include_directories
(
${
ANAKIN_ROOT
}
/
include/
saber
)
include_directories
(
${
ANAKIN_ROOT
}
/saber
)
link_directories
(
${
ANAKIN_ROOT
}
)
link_directories
(
${
ANAKIN_ROOT
}
)
add_definitions
(
-DPADDLE_WITH_ANAKIN
)
add_definitions
(
-DPADDLE_WITH_ANAKIN
)
endif
()
endif
()
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
7ad182e1
...
@@ -16,16 +16,13 @@
...
@@ -16,16 +16,13 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
ActivationOpConverter
::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
template
<
typename
TargetT
>
ActivationOpConverter
<
TargetT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
PADDLE_ENFORCE
(
it
!=
anakin_op_types_
.
end
(),
PADDLE_ENFORCE
(
it
!=
anakin_op_types_
.
end
(),
...
@@ -33,10 +30,10 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
...
@@ -33,10 +30,10 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
anakin_op_type_
=
it
->
second
;
anakin_op_type_
=
it
->
second
;
}
}
void
ActivationOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
ActivationOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -44,13 +41,20 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -44,13 +41,20 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ActivationOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
...
@@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter {
...
@@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter {
{
"sigmoid"
,
"Sigmoid"
}};
{
"sigmoid"
,
"Sigmoid"
}};
};
};
class
TanhOpConverter
:
public
ActivationOpConverter
{
template
<
typename
TargetT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
public:
public:
TanhOpConverter
()
:
ActivationOpConverter
(
"tanh"
)
{}
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"tanh"
)
{}
};
};
class
SigmoidOpConverter
:
public
ActivationOpConverter
{
template
<
typename
TargetT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
>
{
public:
public:
SigmoidOpConverter
()
:
ActivationOpConverter
(
"sigmoid"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
>
(
"sigmoid"
)
{}
};
};
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
7ad182e1
...
@@ -18,19 +18,16 @@
...
@@ -18,19 +18,16 @@
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
AffineChannelOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
AffineChannelOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -59,7 +56,7 @@ void AffineChannelOpConverter::operator()(
...
@@ -59,7 +56,7 @@ void AffineChannelOpConverter::operator()(
bias_tensor
->
Resize
(
bias_t
->
dims
());
bias_tensor
->
Resize
(
bias_t
->
dims
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
TensorCopySync
((
*
bias_t
),
platform
::
CPUPlace
(),
bias_tensor
.
get
());
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Generate the Scale parameter of Anakin.
// Generate the Scale parameter of Anakin.
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
auto
scale_shape
=
framework
::
vectorize2int
(
scale_t
->
dims
());
...
@@ -67,15 +64,16 @@ void AffineChannelOpConverter::operator()(
...
@@ -67,15 +64,16 @@ void AffineChannelOpConverter::operator()(
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
scale_shape
.
insert
(
scale_shape
.
begin
(),
1
);
}
}
Shape
anakin_scale_shape
(
scale_shape
);
Shape
anakin_scale_shape
(
scale_shape
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight1
=
anakin_scale_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_scale_shape
);
float
*
scale_cpu_data
=
float
*
scale_cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
std
::
copy_n
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
scale_cpu_data
);
scale_cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_scale_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Generate the Bias parameter of Anakin.
// Generate the Bias parameter of Anakin.
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
auto
bias_shape
=
framework
::
vectorize2int
(
bias_t
->
dims
());
...
@@ -83,18 +81,24 @@ void AffineChannelOpConverter::operator()(
...
@@ -83,18 +81,24 @@ void AffineChannelOpConverter::operator()(
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
}
}
Shape
anakin_bias_shape
(
bias_shape
);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
anakin_bias_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
bias_cpu_data
=
float
*
bias_cpu_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
std
::
copy_n
(
bias_tensor
->
data
<
float
>
(),
bias_tensor
->
numel
(),
bias_cpu_data
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/affine_channel.h
浏览文件 @
7ad182e1
...
@@ -21,7 +21,8 @@ namespace paddle {
...
@@ -21,7 +21,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
AffineChannelOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
AffineChannelOpConverter
()
=
default
;
AffineChannelOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
7ad182e1
...
@@ -21,17 +21,16 @@
...
@@ -21,17 +21,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
BatchNormOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
BatchNormOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
1
);
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
...
@@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
inputs
[
"X"
]},
{
bn_output
});
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
inputs
[
"X"
]},
{
bn_output
});
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
auto
scale_op_name
=
op_name
+
":scale"
;
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
auto
get_lod_tensor
=
[
this
,
&
scope
,
&
op_name
](
const
std
::
string
&
var_name
,
...
@@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape1
(
fill_shape
(
4
,
framework
::
vectorize2int
(
mean_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
Shape
shape2
(
fill_shape
(
4
,
framework
::
vectorize2int
(
variance_t
.
dims
())));
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
auto
*
mean_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
std
::
copy_n
(
mean_t
.
data
<
float
>
(),
mean_t
.
numel
(),
mean_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
weight2
=
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape2
);
auto
*
variance_data
=
auto
*
variance_data
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
std
::
copy_n
(
variance_t
.
data
<
float
>
(),
variance_t
.
numel
(),
variance_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
Shape
shape3
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight3
=
auto
*
weight3
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape3
);
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
auto
*
alpha_data
=
static_cast
<
float
*>
(
weight3
->
h_tensor
().
mutable_data
());
float
weight3_data
[]
=
{
1
};
float
weight3_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
std
::
copy
(
std
::
begin
(
weight3_data
),
std
::
end
(
weight3_data
),
alpha_data
);
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
Shape
scale_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
scale_t
.
dims
())));
auto
*
scale
=
auto
*
scale
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
scale_shape
);
scale_shape
);
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
auto
*
scale_data
=
static_cast
<
float
*>
(
scale
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
std
::
copy_n
(
scale_t
.
data
<
float
>
(),
scale_t
.
numel
(),
scale_data
);
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
Shape
bias_shape
(
fill_shape
(
4
,
framework
::
vectorize2int
(
bias_t
.
dims
())));
auto
*
bias
=
auto
*
bias
=
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
bias_shape
);
bias_shape
);
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
auto
*
bias_data
=
static_cast
<
float
*>
(
bias
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
std
::
copy_n
(
bias_t
.
data
<
float
>
(),
bias_t
.
numel
(),
bias_data
);
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/batch_norm.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
BatchNormOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
BatchNormOpConverter
()
=
default
;
BatchNormOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
7ad182e1
...
@@ -15,38 +15,32 @@
...
@@ -15,38 +15,32 @@
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm>
#include <algorithm>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
ConcatOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
ConcatOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
auto
input_names
=
op_desc
.
Input
(
"X"
);
auto
input_names
=
op_desc
.
Input
(
"X"
);
// PADDLE_ENFORCE(axis > 0,
// "The axis attr of Concat op should be large than 0 for trt");
auto
y_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
y_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Concat"
,
input_names
,
{
y_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Concat"
,
input_names
,
{
y_name
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/concat.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ConcatOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ConcatOpConverter
()
=
default
;
ConcatOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
7ad182e1
...
@@ -18,19 +18,18 @@
...
@@ -18,19 +18,18 @@
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
Conv2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
Conv2dOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
...
@@ -39,7 +38,7 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -39,7 +38,7 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
...
@@ -51,38 +50,44 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -51,38 +50,44 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilation_rate"
,
dilations
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
X86
>
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
paddle/fluid/inference/anakin/convert/conv2d.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
Conv2dOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
Conv2dOpConverter
()
=
default
;
Conv2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
7ad182e1
...
@@ -18,19 +18,18 @@
...
@@ -18,19 +18,18 @@
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
PTuple
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
Conv2dFusionOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
Conv2dFusionOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
...
@@ -40,7 +39,7 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -40,7 +39,7 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
...
@@ -63,28 +62,31 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -63,28 +62,31 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilation_rate"
,
dilations
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
framework
::
LoDTensor
bias_tensor
;
...
@@ -98,17 +100,24 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -98,17 +100,24 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
anakin_bias_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
Conv2dFusionOpConverter
()
=
default
;
Conv2dFusionOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
7ad182e1
...
@@ -17,17 +17,14 @@
...
@@ -17,17 +17,14 @@
#include <map>
#include <map>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
DensityPriorBoxOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
DensityPriorBoxOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -81,27 +78,44 @@ void DensityPriorBoxOpConverter::operator()(
...
@@ -81,27 +78,44 @@ void DensityPriorBoxOpConverter::operator()(
std
::
vector
<
float
>
temp_v
=
{};
std
::
vector
<
float
>
temp_v
=
{};
engine_
->
AddOp
(
op_name
,
"PriorBox"
,
{
input_name
,
image_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"PriorBox"
,
{
input_name
,
image_name
},
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"min_size"
,
min_sizes
);
{
output_name
});
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"max_size"
,
max_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"min_size"
,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"aspect_ratio"
,
aspect_ratios
);
min_sizes
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"fixed_size"
,
fixed_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"max_size"
,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"fixed_ratio"
,
fixed_ratios
);
max_sizes
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"density"
,
dens
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"aspect_ratio"
,
engine_
->
AddOpAttr
(
op_name
,
"is_flip"
,
is_flip
);
aspect_ratios
);
engine_
->
AddOpAttr
(
op_name
,
"is_clip"
,
is_clip
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_size"
,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"variance"
,
variances
);
fixed_sizes
);
engine_
->
AddOpAttr
(
op_name
,
"img_h"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_ratio"
,
engine_
->
AddOpAttr
(
op_name
,
"img_w"
,
static_cast
<
int
>
(
0
));
fixed_ratios
);
engine_
->
AddOpAttr
(
op_name
,
"step_h"
,
step_h
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"density"
,
dens
);
engine_
->
AddOpAttr
(
op_name
,
"step_w"
,
step_w
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_flip"
,
is_flip
);
engine_
->
AddOpAttr
(
op_name
,
"offset"
,
offset
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_clip"
,
is_clip
);
engine_
->
AddOpAttr
<
PTuple
<
std
::
string
>>
(
op_name
,
"order"
,
t_order
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"variance"
,
variances
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_h"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_w"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_h"
,
step_h
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_w"
,
step_w
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"offset"
,
offset
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
std
::
string
>
>
(
op_name
,
"order"
,
t_order
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
DensityPriorBoxOpConverter
()
=
default
;
DensityPriorBoxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
7ad182e1
...
@@ -16,19 +16,14 @@
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
DetectionOutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
DetectionOutOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
target_name
=
op_desc
.
Input
(
"TargetBox"
).
front
();
auto
target_name
=
op_desc
.
Input
(
"TargetBox"
).
front
();
auto
prior_box_name
=
op_desc
.
Input
(
"PriorBox"
).
front
();
auto
prior_box_name
=
op_desc
.
Input
(
"PriorBox"
).
front
();
...
@@ -52,22 +47,28 @@ void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -52,22 +47,28 @@ void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
"Not support encode_center_size code_type in DetectionOut of anakin"
);
"Not support encode_center_size code_type in DetectionOut of anakin"
);
}
}
engine_
->
AddOp
(
op_name
,
"DetectionOutput"
,
this
->
engine_
->
AddOp
(
op_name
,
"DetectionOutput"
,
{
target_name
,
scores_name
,
prior_box_name
},
{
output_name
});
{
target_name
,
scores_name
,
prior_box_name
},
engine_
->
AddOpAttr
(
op_name
,
"share_location"
,
true
);
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"variance_encode_in_target"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"share_location"
,
true
);
engine_
->
AddOpAttr
(
op_name
,
"class_num"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"variance_encode_in_target"
,
false
);
engine_
->
AddOpAttr
(
op_name
,
"background_id"
,
background_label
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"class_num"
,
static_cast
<
int
>
(
0
));
engine_
->
AddOpAttr
(
op_name
,
"keep_top_k"
,
keep_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"background_id"
,
background_label
);
engine_
->
AddOpAttr
(
op_name
,
"code_type"
,
anakin_code_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"keep_top_k"
,
keep_top_k
);
engine_
->
AddOpAttr
(
op_name
,
"conf_thresh"
,
score_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"code_type"
,
anakin_code_type
);
engine_
->
AddOpAttr
(
op_name
,
"nms_top_k"
,
nms_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"conf_thresh"
,
score_threshold
);
engine_
->
AddOpAttr
(
op_name
,
"nms_thresh"
,
nms_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_top_k"
,
nms_top_k
);
engine_
->
AddOpAttr
(
op_name
,
"nms_eta"
,
nms_eta
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_thresh"
,
nms_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_eta"
,
nms_eta
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/detection_out.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
DetectionOutOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
DetectionOutOpConverter
()
=
default
;
DetectionOutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
7ad182e1
...
@@ -19,21 +19,16 @@
...
@@ -19,21 +19,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
DropoutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
DropoutOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Mask"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Mask"
).
size
(),
1
);
...
@@ -43,25 +38,30 @@ void DropoutOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -43,25 +38,30 @@ void DropoutOpConverter::operator()(const framework::proto::OpDesc &op,
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/dropout.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
DropoutOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
DropoutOpConverter
()
=
default
;
DropoutOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
7ad182e1
...
@@ -19,18 +19,15 @@
...
@@ -19,18 +19,15 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
ElementwiseAddOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
ElementwiseAddOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -43,14 +40,16 @@ void ElementwiseAddOpConverter::operator()(
...
@@ -43,14 +40,16 @@ void ElementwiseAddOpConverter::operator()(
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Add"
;
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
}
void
ElementwiseMulOpConverter
::
operator
()(
template
<
typename
TargetT
>
void
ElementwiseMulOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
@@ -63,26 +62,25 @@ void ElementwiseMulOpConverter::operator()(
...
@@ -63,26 +62,25 @@ void ElementwiseMulOpConverter::operator()(
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
,
y_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
// Fill a number to weight_1 as a placeholder.
std
::
string
elementwise_type
=
"Prod"
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
auto
*
weight1
=
elementwise_type
);
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
auto
*
placeholder_data
=
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
placeholder_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ElementwiseAddOpConverter
()
=
default
;
ElementwiseAddOpConverter
()
=
default
;
...
@@ -33,7 +34,8 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
...
@@ -33,7 +34,8 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
private:
private:
};
};
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ElementwiseMulOpConverter
()
=
default
;
ElementwiseMulOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
7ad182e1
...
@@ -19,17 +19,16 @@
...
@@ -19,17 +19,16 @@
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
FcBaseOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
FcBaseOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
==
3
;
bool
with_bias
=
input_names
.
size
()
==
3
;
...
@@ -51,13 +50,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -51,13 +50,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
int
out_dim
=
weight_shape
[
1
];
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
const
int
w_k
=
weight_shape
[
1
];
...
@@ -79,12 +78,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -79,12 +78,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
}
}
}
}
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// get bias
// get bias
if
(
with_bias
)
{
if
(
with_bias
)
{
...
@@ -104,13 +104,14 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -104,13 +104,14 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
auto
*
weight2
=
anakin_bias_shape
);
GraphGlobalMem
<
TargetT
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
...
@@ -118,5 +119,10 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -118,5 +119,10 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
FcBaseOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
FcBaseOpConverter
()
=
default
;
FcBaseOpConverter
()
=
default
;
...
@@ -32,13 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter {
...
@@ -32,13 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter {
};
};
// with bias
// with bias
class
FcOpConverter
:
public
FcBaseOpConverter
{
template
<
typename
TargetT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
public:
public:
FcOpConverter
()
=
default
;
FcOpConverter
()
=
default
;
};
};
// without bias
// without bias
class
MulOpConverter
:
public
FcBaseOpConverter
{
template
<
typename
TargetT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
>
{
public:
public:
MulOpConverter
()
=
default
;
MulOpConverter
()
=
default
;
};
};
...
...
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
7ad182e1
...
@@ -15,20 +15,16 @@
...
@@ -15,20 +15,16 @@
#include "paddle/fluid/inference/anakin/convert/flatten.h"
#include "paddle/fluid/inference/anakin/convert/flatten.h"
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
FlattenOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
FlattenOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
...
@@ -41,12 +37,17 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -41,12 +37,17 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
std
::
vector
<
int
>
out_dims
=
{
0
,
-
1
,
1
,
1
};
std
::
vector
<
int
>
out_dims
=
{
0
,
-
1
,
1
,
1
};
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
out_dims
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
out_dims
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/flatten.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
FlattenOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
FlattenOpConverter
()
=
default
;
FlattenOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
7ad182e1
...
@@ -17,23 +17,16 @@
...
@@ -17,23 +17,16 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
Im2SequenceConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
Im2SequenceConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
0
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
0
);
...
@@ -43,21 +36,24 @@ void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -43,21 +36,24 @@ void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op,
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
kernels
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"kernels"
));
auto
kernels
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"kernels"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"paddings"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"paddings"
,
paddings
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"window_size"
,
kernels
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"window_size"
,
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilations"
,
dilations
);
kernels
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilations"
,
dilations
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
<::
anakin
::
saber
::
NV
>
);
paddle/fluid/inference/anakin/convert/im2sequence.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
Im2SequenceConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
Im2SequenceConverter
()
=
default
;
Im2SequenceConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
7ad182e1
...
@@ -32,10 +32,10 @@ namespace paddle {
...
@@ -32,10 +32,10 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
using
AnakinNvEngine
=
template
<
typename
TargetT
>
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
class
AnakinOpConverter
{
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
;
public:
public:
AnakinOpConverter
()
=
default
;
AnakinOpConverter
()
=
default
;
...
@@ -45,7 +45,7 @@ class AnakinOpConverter {
...
@@ -45,7 +45,7 @@ class AnakinOpConverter {
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
BlockDesc
&
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
Anakin
NvEngine
*
engine
,
const
framework
::
Scope
&
scope
,
Anakin
EngineT
*
engine
,
bool
test_mode
=
false
)
{
bool
test_mode
=
false
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
std
::
string
op_type
=
op_desc
.
Type
();
std
::
string
op_type
=
op_desc
.
Type
();
...
@@ -65,7 +65,7 @@ class AnakinOpConverter {
...
@@ -65,7 +65,7 @@ class AnakinOpConverter {
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
Anakin
NvEngine
*
engine
)
{
const
framework
::
Scope
&
scope
,
Anakin
EngineT
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
...
@@ -79,7 +79,7 @@ class AnakinOpConverter {
...
@@ -79,7 +79,7 @@ class AnakinOpConverter {
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
Anakin
NvEngine
*
engine
)
{
const
std
::
vector
<
std
::
string
>
&
outputs
,
Anakin
EngineT
*
engine
)
{
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
// if the max_batch size
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
...
@@ -128,40 +128,60 @@ class AnakinOpConverter {
...
@@ -128,40 +128,60 @@ class AnakinOpConverter {
engine
->
InitNet
();
engine
->
InitNet
();
}
}
void
SetEngine
(
Anakin
NvEngine
*
engine
)
{
engine_
=
engine
;
}
void
SetEngine
(
Anakin
EngineT
*
engine
)
{
engine_
=
engine
;
}
virtual
~
AnakinOpConverter
()
{}
virtual
~
AnakinOpConverter
()
{}
protected:
protected:
bool
test_mode_
;
bool
test_mode_
;
Anakin
NvEngine
*
engine_
{
nullptr
};
Anakin
EngineT
*
engine_
{
nullptr
};
private:
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
std
::
mutex
mutex_
;
};
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
>;
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
struct anakin_##op_type__##_converter \
place_type__, place_class__) \
: public ::paddle::framework::Registrar { \
struct anakin_##op_type__##_##place_type__##_converter \
anakin_##op_type__##_converter() { \
: public ::paddle::framework::Registrar { \
LOG(INFO) << "register convert " << #op_type__; \
anakin_##op_type__##_##place_type__##_converter() { \
::paddle::inference::Registry< \
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::anakin::AnakinOpConverter>::Global() \
::paddle::inference::Registry< \
.Register<::paddle::inference::anakin::Converter__>(#op_type__); \
::paddle::inference::anakin::AnakinOpConverter<place_class__>>:: \
} \
Global() \
}; \
.Register<::paddle::inference::anakin::Converter__>(#op_type__); \
anakin_##op_type__##_converter anakin_##op_type__##_converter__; \
} \
int TouchConverterRegister_anakin_##op_type__() { \
}; \
anakin_##op_type__##_converter__.Touch(); \
anakin_##op_type__##_##place_type__##_converter \
return 0; \
anakin_##op_type__##_##place_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \
anakin_##op_type__##_##place_type__##_converter__.Touch(); \
return 0; \
}
}
#define USE_ANAKIN_CONVERTER(op_type__) \
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
extern int TouchConverterRegister_anakin_##op_type__(); \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
::anakin::saber::NV)
TouchConverterRegister_anakin_##op_type__();
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86)
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__) \
extern int TouchConverterRegister_anakin_##op_type__##_##place_type__(); \
int use_op_converter_anakin_##op_type__##_##place_type__ \
__attribute__((unused)) = \
TouchConverterRegister_anakin_##op_type__##_##place_type__();
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU)
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
7ad182e1
...
@@ -17,23 +17,16 @@
...
@@ -17,23 +17,16 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
Pool2dOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
Pool2dOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -65,17 +58,22 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -65,17 +58,22 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
}
}
engine_
->
AddOp
(
op_name
,
"Pooling"
,
{
x_name
},
{
y_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Pooling"
,
{
x_name
},
{
y_name
});
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"pool_size"
,
ksize
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"pool_size"
,
ksize
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
engine_
->
AddOpAttr
(
op_name
,
"method"
,
anakin_pool_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"method"
,
anakin_pool_type
);
engine_
->
AddOpAttr
(
op_name
,
"global_pooling"
,
global_pooling
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"global_pooling"
,
global_pooling
);
engine_
->
AddOpAttr
(
op_name
,
"cmp_out_shape_floor_as_conv"
,
!
ceil_mode
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"cmp_out_shape_floor_as_conv"
,
!
ceil_mode
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/pool2d.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
Pool2dOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
Pool2dOpConverter
()
=
default
;
Pool2dOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
7ad182e1
...
@@ -16,19 +16,14 @@
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
ReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
ReluOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -37,14 +32,14 @@ void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -37,14 +32,14 @@ void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
}
void
LeakyReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
LeakyReluOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -54,13 +49,19 @@ void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -54,13 +49,19 @@ void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op,
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
alpha
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
alpha
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
NV
>
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
<::
anakin
::
saber
::
X86
>
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/relu.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ReluOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ReluOpConverter
()
=
default
;
ReluOpConverter
()
=
default
;
...
@@ -33,7 +34,8 @@ class ReluOpConverter : public AnakinOpConverter {
...
@@ -33,7 +34,8 @@ class ReluOpConverter : public AnakinOpConverter {
virtual
~
ReluOpConverter
()
{}
virtual
~
ReluOpConverter
()
{}
};
};
class
LeakyReluOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
LeakyReluOpConverter
()
=
default
;
LeakyReluOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
7ad182e1
...
@@ -15,20 +15,16 @@
...
@@ -15,20 +15,16 @@
#include "paddle/fluid/inference/anakin/convert/reshape.h"
#include "paddle/fluid/inference/anakin/convert/reshape.h"
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
ReshapeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
ReshapeOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
...
@@ -37,17 +33,23 @@ void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -37,17 +33,23 @@ void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op,
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
auto
shape
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"shape"
));
auto
shape
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"shape"
));
if
(
shape
.
size
()
<
4
)
{
if
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
end
(),
4
-
shape
.
size
(),
1
);
shape
.
insert
(
shape
.
end
(),
4
-
shape
.
size
(),
1
);
}
}
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
shape
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
shape
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/reshape.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ReshapeOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ReshapeOpConverter
()
=
default
;
ReshapeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
7ad182e1
...
@@ -25,10 +25,10 @@ namespace paddle {
...
@@ -25,10 +25,10 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
RoiAlignOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
RoiAlignOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"ROIs"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"ROIs"
).
size
(),
1
);
...
@@ -44,16 +44,21 @@ void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -44,16 +44,21 @@ void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op,
auto
pooled_width
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"pooled_width"
));
auto
pooled_width
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"pooled_width"
));
auto
sampling_ratio
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"sampling_ratio"
));
auto
sampling_ratio
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"sampling_ratio"
));
engine_
->
AddOp
(
op_name
,
"RoiAlign"
,
{
input_x_name
,
input_rois_name
},
this
->
engine_
->
AddOp
(
op_name
,
"RoiAlign"
,
{
input_x_name
,
input_rois_name
},
{
output_name
});
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"spatial_scale"
,
spatial_scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"spatial_scale"
,
spatial_scale
);
engine_
->
AddOpAttr
(
op_name
,
"pooled_height"
,
pooled_height
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_height"
,
pooled_height
);
engine_
->
AddOpAttr
(
op_name
,
"pooled_width"
,
pooled_width
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_width"
,
pooled_width
);
engine_
->
AddOpAttr
(
op_name
,
"sampling_ratio"
,
sampling_ratio
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"sampling_ratio"
,
sampling_ratio
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/roi_align.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
RoiAlignOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
RoiAlignOpConverter
()
=
default
;
RoiAlignOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
7ad182e1
...
@@ -16,19 +16,14 @@
...
@@ -16,19 +16,14 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
ScaleOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
ScaleOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -44,14 +39,14 @@ void ScaleOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -44,14 +39,14 @@ void ScaleOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE
(
bias_after_scale
,
PADDLE_ENFORCE
(
bias_after_scale
,
"The anakin scale layer only support bias after scale now."
);
"The anakin scale layer only support bias after scale now."
);
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_
ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
REGISTER_
CUDA_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
<::
anakin
::
saber
::
NV
>
);
paddle/fluid/inference/anakin/convert/scale.h
浏览文件 @
7ad182e1
...
@@ -22,7 +22,8 @@ namespace paddle {
...
@@ -22,7 +22,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
ScaleOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
ScaleOpConverter
()
=
default
;
ScaleOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
7ad182e1
...
@@ -14,19 +14,14 @@
...
@@ -14,19 +14,14 @@
#include "paddle/fluid/inference/anakin/convert/softmax.h"
#include "paddle/fluid/inference/anakin/convert/softmax.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
SoftMaxOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
SoftMaxOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
...
@@ -41,12 +36,18 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -41,12 +36,18 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
));
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/softmax.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
SoftMaxOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
SoftMaxOpConverter
()
=
default
;
SoftMaxOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
7ad182e1
...
@@ -16,23 +16,16 @@
...
@@ -16,23 +16,16 @@
#include <algorithm>
#include <algorithm>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
SplitOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
SplitOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_names
=
op_desc
.
Output
(
"Out"
);
auto
y_names
=
op_desc
.
Output
(
"Out"
);
...
@@ -51,14 +44,19 @@ void SplitOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -51,14 +44,19 @@ void SplitOpConverter::operator()(const framework::proto::OpDesc &op,
num_sum
+=
output_lengths
[
i
];
num_sum
+=
output_lengths
[
i
];
slice_point
.
push_back
(
num_sum
);
slice_point
.
push_back
(
num_sum
);
}
}
engine_
->
AddOp
(
op_name
,
"Slice"
,
{
input_name
},
y_names
);
this
->
engine_
->
AddOp
(
op_name
,
"Slice"
,
{
input_name
},
y_names
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"slice_point"
,
slice_point
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"slice_point"
,
slice_point
);
// slice_dim is useless in anakin
// slice_dim is useless in anakin
engine_
->
AddOpAttr
(
op_name
,
"slice_dim"
,
4
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"slice_dim"
,
4
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/split.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
SplitOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
SplitOpConverter
()
=
default
;
SplitOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
7ad182e1
...
@@ -17,22 +17,17 @@
...
@@ -17,22 +17,17 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
SumOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
SumOpConverter
<
TargetT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -43,13 +38,17 @@ void SumOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -43,13 +38,17 @@ void SumOpConverter::operator()(const framework::proto::OpDesc &op,
std
::
vector
<
float
>
coeff
=
{
1
,
1
};
std
::
vector
<
float
>
coeff
=
{
1
,
1
};
std
::
string
elementwise_type
=
"Add"
;
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/sum.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
SumOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
SumOpConverter
()
=
default
;
SumOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
7ad182e1
...
@@ -21,12 +21,14 @@ namespace paddle {
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
static
void
test_activation_op
(
const
std
::
string
&
op_type
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
PADDLE_ENFORCE
(
converter
!=
nullptr
);
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -41,13 +43,42 @@ static void test_activation_op(const std::string &op_type) {
...
@@ -41,13 +43,42 @@ static void test_activation_op(const std::string &op_type) {
validator
.
Execute
(
5
);
validator
.
Execute
(
5
);
}
}
TEST
(
sigm_op
,
test
)
{
test_activation_op
(
"sigmoid"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
tanh_op
,
test
)
{
test_activation_op
(
"tanh"
);
}
TEST
(
sigm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"sigmoid"
,
ctx
,
true
);
}
TEST
(
tanh_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"tanh"
,
ctx
,
true
);
}
#endif
TEST
(
sigm_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_activation_op
<::
anakin
::
saber
::
X86
>
(
"sigmoid"
,
ctx
,
false
);
}
TEST
(
tanh_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_activation_op
<::
anakin
::
saber
::
X86
>
(
"tanh"
,
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
sigmoid
);
USE_OP
(
sigmoid
);
USE_OP
(
tanh
);
USE_OP
(
tanh
);
USE_CPU_ANAKIN_CONVERTER
(
sigmoid
);
USE_CPU_ANAKIN_CONVERTER
(
tanh
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
tanh
);
#endif
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
浏览文件 @
7ad182e1
...
@@ -21,16 +21,19 @@ namespace paddle {
...
@@ -21,16 +21,19 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
affine_channel
,
native
)
{
template
<
typename
TargetT
>
void
test_affine_channel_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
// Declare the difference between the inputs.
// Declare the difference between the inputs.
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
validator
.
DeclParamVar
(
"bias"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"bias"
,
{
3
});
// Prepare Op descriptions.
// Prepare Op descriptions.
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -47,9 +50,26 @@ TEST(affine_channel, native) {
...
@@ -47,9 +50,26 @@ TEST(affine_channel, native) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
affine_channel_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
affine_channel_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
affine_channel
);
USE_OP
(
affine_channel
);
USE_CPU_ANAKIN_CONVERTER
(
affine_channel
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
affine_channel
);
USE_ANAKIN_CONVERTER
(
affine_channel
);
#endif
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
浏览文件 @
7ad182e1
...
@@ -19,12 +19,14 @@ namespace paddle {
...
@@ -19,12 +19,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
batch_norm_op
,
test
)
{
template
<
typename
TargetT
>
void
test_batchnorm_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
(
std
::
unordered_set
<
std
::
string
>
parameters
(
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
"batch_norm_variance"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
...
@@ -64,8 +66,26 @@ TEST(batch_norm_op, test) {
...
@@ -64,8 +66,26 @@ TEST(batch_norm_op, test) {
validator
.
Execute
(
1
,
neglected_output
);
validator
.
Execute
(
1
,
neglected_output
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
batch_norm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
batch_norm_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
batch_norm
);
USE_OP
(
batch_norm
);
USE_CPU_ANAKIN_CONVERTER
(
batch_norm
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
batch_norm
);
USE_ANAKIN_CONVERTER
(
batch_norm
);
#endif
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
7ad182e1
...
@@ -21,10 +21,12 @@ namespace paddle {
...
@@ -21,10 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
concat_op
,
test
)
{
template
<
typename
TargetT
>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
...
@@ -44,31 +46,26 @@ TEST(concat_op, test) {
...
@@ -44,31 +46,26 @@ TEST(concat_op, test) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
TEST
(
concat_op
,
test2
)
{
#ifdef PADDLE_WITH_CUDA
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
TEST
(
concat_op
,
gpu
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
gpu_place
(
0
);
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
4
});
test_concat_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
validator
.
DeclInputVar
(
"concat_x2"
,
{
3
,
4
});
}
validator
.
DeclInputVar
(
"concat_x3"
,
{
2
,
4
});
#endif
validator
.
DeclOutputVar
(
"concat_out"
,
{
6
,
4
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"concat"
);
desc
.
SetInput
(
"X"
,
{
"concat_x1"
,
"concat_x2"
,
"concat_x3"
});
desc
.
SetOutput
(
"Out"
,
{
"concat_out"
});
int
axis
=
0
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
TEST
(
concat_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_concat_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
concat
);
USE_OP
(
concat
);
USE_CPU_ANAKIN_CONVERTER
(
concat
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
concat
);
USE_ANAKIN_CONVERTER
(
concat
);
#endif
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
7ad182e1
...
@@ -21,13 +21,12 @@ namespace paddle {
...
@@ -21,13 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
conv2d_op
,
test
)
{
template
<
typename
TargetT
>
auto
*
conv2d_converter
=
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"conv2d"
);
ASSERT_TRUE
(
conv2d_converter
!=
nullptr
);
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
...
@@ -54,9 +53,27 @@ TEST(conv2d_op, test) {
...
@@ -54,9 +53,27 @@ TEST(conv2d_op, test) {
validator
.
Execute
(
3
);
validator
.
Execute
(
3
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
conv2d_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
conv2d_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
conv2d
);
USE_OP
(
conv2d
);
USE_CPU_ANAKIN_CONVERTER
(
conv2d
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
conv2d
);
USE_ANAKIN_CONVERTER
(
conv2d
);
#endif
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
浏览文件 @
7ad182e1
...
@@ -21,10 +21,12 @@ namespace paddle {
...
@@ -21,10 +21,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
dropout_op
,
native
)
{
template
<
typename
TargetT
>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
...
@@ -45,9 +47,26 @@ TEST(dropout_op, native) {
...
@@ -45,9 +47,26 @@ TEST(dropout_op, native) {
validator
.
Execute
(
1
,
neglected_output
);
validator
.
Execute
(
1
,
neglected_output
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
dropout_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_dropout_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
dropout_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_dropout_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
dropout
);
USE_OP
(
dropout
);
USE_CPU_ANAKIN_CONVERTER
(
dropout
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
dropout
);
USE_ANAKIN_CONVERTER
(
dropout
);
#endif
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
7ad182e1
...
@@ -21,10 +21,14 @@ namespace paddle {
...
@@ -21,10 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
static
void
test_elementwise_op
(
const
std
::
string
&
op_type
)
{
template
<
typename
TargetT
>
static
void
test_elementwise_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
...
@@ -43,14 +47,41 @@ static void test_elementwise_op(const std::string &op_type) {
...
@@ -43,14 +47,41 @@ static void test_elementwise_op(const std::string &op_type) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
TEST
(
elementwise_op
,
native_add
)
{
test_elementwise_op
(
"elementwise_add"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
elementwise_op
,
native_mul
)
{
test_elementwise_op
(
"elementwise_mul"
);
}
TEST
(
elementwise_op
,
native_add_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_add"
,
ctx
,
true
);
}
TEST
(
elementwise_op
,
native_mul_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_mul"
,
ctx
,
true
);
}
#endif
TEST
(
elementwise_op
,
native_add_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_add"
,
ctx
,
false
);
}
TEST
(
elementwise_op
,
native_mul_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_mul"
,
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
elementwise_add
);
USE_OP
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_OP
(
elementwise_mul
);
USE_OP
(
elementwise_mul
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
#endif
USE_CPU_ANAKIN_CONVERTER
(
elementwise_add
);
USE_CPU_ANAKIN_CONVERTER
(
elementwise_mul
);
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
7ad182e1
...
@@ -20,13 +20,13 @@ namespace paddle {
...
@@ -20,13 +20,13 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
fc_op
,
test
)
{
template
<
typename
TargetT
>
auto
*
fc_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"fc"
);
void
test_mul_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
ASSERT_TRUE
(
fc_converter
);
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
...
@@ -42,9 +42,26 @@ TEST(fc_op, test) {
...
@@ -42,9 +42,26 @@ TEST(fc_op, test) {
validator
.
Execute
(
10
);
validator
.
Execute
(
10
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
mul_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_mul_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
mul_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_mul_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
mul
);
USE_OP
(
mul
);
USE_CPU_ANAKIN_CONVERTER
(
fc
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
fc
);
USE_ANAKIN_CONVERTER
(
fc
);
#endif
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
浏览文件 @
7ad182e1
...
@@ -20,13 +20,12 @@ namespace paddle {
...
@@ -20,13 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
flatten_op
,
test
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"flatten"
);
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
ASSERT_TRUE
(
converter
);
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -42,10 +41,27 @@ TEST(flatten_op, test) {
...
@@ -42,10 +41,27 @@ TEST(flatten_op, test) {
validator
.
Execute
(
5
);
validator
.
Execute
(
5
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
flatten_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_flatten_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
flatten_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_flatten_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
reshape
);
USE_OP
(
reshape
);
USE_OP_ITSELF
(
flatten
);
USE_OP_ITSELF
(
flatten
);
USE_CPU_ANAKIN_CONVERTER
(
flatten
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
flatten
);
USE_ANAKIN_CONVERTER
(
flatten
);
#endif
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
浏览文件 @
7ad182e1
...
@@ -19,15 +19,14 @@ namespace paddle {
...
@@ -19,15 +19,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
test_pool2d
(
bool
global_pooling
,
bool
ceil_mode
,
template
<
typename
TargetT
>
void
test_pool2d
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
bool
global_pooling
,
bool
ceil_mode
,
std
::
string
pool_type
=
"max"
)
{
std
::
string
pool_type
=
"max"
)
{
auto
*
pool2d_converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"pool2d"
);
ASSERT_TRUE
(
pool2d_converter
);
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
// So, the ITensor's Dims of input and output should be C * H * W.
...
@@ -64,56 +63,61 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
...
@@ -64,56 +63,61 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
void
test_pool2d2
(
bool
global_pooling
,
bool
ceil_mode
,
#ifdef PADDLE_WITH_CUDA
std
::
string
pool_type
=
"max"
)
{
TEST
(
Pool2dOpConverter
,
normal
)
{
auto
*
pool2d_converter
=
platform
::
CUDAPlace
gpu_place
(
0
);
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"pool2d"
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
ASSERT_TRUE
(
pool2d_converter
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
false
);
}
framework
::
Scope
scope
;
TEST
(
Pool2dOpConverter
,
test_global_pooling
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
platform
::
CUDAPlace
gpu_place
(
0
);
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
true
,
false
);
// The ITensor's Dims should not contain the batch size.
}
// So, the ITensor's Dims of input and output should be C * H * W.
validator
.
DeclInputVar
(
"pool2d_x"
,
{
1
,
1
,
17
,
17
});
validator
.
DeclOutputVar
(
"pool2d_out"
,
{
1
,
1
,
17
,
17
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"pool2d"
);
desc
.
SetInput
(
"X"
,
{
"pool2d_x"
});
desc
.
SetOutput
(
"Out"
,
{
"pool2d_out"
});
std
::
vector
<
int
>
ksize
({
3
,
3
});
std
::
vector
<
int
>
strides
({
1
,
1
});
std
::
vector
<
int
>
paddings
({
1
,
1
});
std
::
string
pooling_t
=
pool_type
;
desc
.
SetAttr
(
"pooling_type"
,
pooling_t
);
TEST
(
Pool2dOpConverter
,
max_ceil_test
)
{
desc
.
SetAttr
(
"ksize"
,
ksize
);
platform
::
CUDAPlace
gpu_place
(
0
);
desc
.
SetAttr
(
"strides"
,
strides
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
);
desc
.
SetAttr
(
"global_pooling"
,
global_pooling
);
}
desc
.
SetAttr
(
"ceil_mode"
,
true
);
LOG
(
INFO
)
<<
"set OP"
;
TEST
(
Pool2dOpConverter
,
avg_ceil_test
)
{
validator
.
SetOp
(
*
desc
.
Proto
());
platform
::
CUDAPlace
gpu_place
(
0
);
LOG
(
INFO
)
<<
"execute"
;
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
,
"avg"
);
}
#endif
validator
.
Execute
(
1
);
TEST
(
Pool2dOpConverter
,
normal_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
true
,
false
);
}
}
TEST
(
Pool2dOpConverter
,
normal
)
{
test_pool2d
(
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test_cpu
)
{
TEST
(
Pool2dOpConverter
,
test_global_pooling
)
{
test_pool2d
(
true
,
false
);
}
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test
)
{
test_pool2d
(
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test_cpu
)
{
TEST
(
Pool2dOpConverter
,
avg_ceil_test
)
{
test_pool2d
(
false
,
true
,
"avg"
);
}
platform
::
CPUPlace
cpu_place
;
TEST
(
Pool2dOpConverter
,
avg_ceil_test2
)
{
test_pool2d2
(
false
,
true
,
"avg"
);
}
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
,
"avg"
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
pool2d
);
USE_OP
(
pool2d
);
USE_CPU_ANAKIN_CONVERTER
(
pool2d
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
pool2d
);
USE_ANAKIN_CONVERTER
(
pool2d
);
#endif
paddle/fluid/inference/anakin/convert/test_relu_op.cc
浏览文件 @
7ad182e1
...
@@ -21,12 +21,14 @@ namespace paddle {
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
static
void
test_relu_op
(
const
std
::
string
&
op_type
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
PADDLE_ENFORCE
(
converter
!=
nullptr
);
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -44,14 +46,44 @@ static void test_relu_op(const std::string &op_type) {
...
@@ -44,14 +46,44 @@ static void test_relu_op(const std::string &op_type) {
validator
.
Execute
(
5
);
validator
.
Execute
(
5
);
}
}
TEST
(
activation
,
relu
)
{
test_relu_op
(
"relu"
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
activation
,
leaky_relu
)
{
test_relu_op
(
"leaky_relu"
);
}
TEST
(
relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"relu"
,
ctx
,
true
);
}
TEST
(
leaky_relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"leaky_relu"
,
ctx
,
true
);
}
#endif
/* seems bug here
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu", ctx, false);
}
TEST(leaky_relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false);
}
*/
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
relu
);
USE_OP
(
relu
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_OP
(
leaky_relu
);
USE_OP
(
leaky_relu
);
USE_CPU_ANAKIN_CONVERTER
(
relu
);
USE_CPU_ANAKIN_CONVERTER
(
leaky_relu
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
#endif
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
浏览文件 @
7ad182e1
...
@@ -20,12 +20,12 @@ namespace paddle {
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
reshape
,
test
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"reshape"
);
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
ASSERT_TRUE
(
converter
);
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
...
@@ -45,10 +45,12 @@ TEST(reshape, test) {
...
@@ -45,10 +45,12 @@ TEST(reshape, test) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
TEST
(
reshape
,
test2
)
{
template
<
typename
TargetT
>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
...
@@ -66,9 +68,39 @@ TEST(reshape, test2) {
...
@@ -66,9 +68,39 @@ TEST(reshape, test2) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
reshape1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
reshape2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
reshape1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
reshape2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
reshape
);
USE_OP
(
reshape
);
USE_CPU_ANAKIN_CONVERTER
(
reshape
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
reshape
);
USE_ANAKIN_CONVERTER
(
reshape
);
#endif
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
浏览文件 @
7ad182e1
...
@@ -20,12 +20,12 @@ namespace paddle {
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
softmax
,
test
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"softmax"
);
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
ASSERT_TRUE
(
converter
);
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
...
@@ -41,9 +41,27 @@ TEST(softmax, test) {
...
@@ -41,9 +41,27 @@ TEST(softmax, test) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
softmax_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_softmax_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
relu_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_softmax_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
softmax
);
USE_OP
(
softmax
);
USE_CPU_ANAKIN_CONVERTER
(
softmax
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
softmax
);
USE_ANAKIN_CONVERTER
(
softmax
);
#endif
paddle/fluid/inference/anakin/convert/test_split_op.cc
浏览文件 @
7ad182e1
...
@@ -21,12 +21,14 @@ namespace paddle {
...
@@ -21,12 +21,14 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
template
<
int
Axis
>
template
<
typename
TargetT
,
int
Axis
>
void
AnakinSliceTest
(
const
std
::
vector
<
int
>
&
in_shape
,
void
AnakinSliceTest
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
const
std
::
vector
<
int
>
&
in_shape
,
const
std
::
vector
<
int
>
&
sections
)
{
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
std
::
vector
<
std
::
string
>
output_vars
;
...
@@ -55,51 +57,58 @@ void AnakinSliceTest(const std::vector<int> &in_shape,
...
@@ -55,51 +57,58 @@ void AnakinSliceTest(const std::vector<int> &in_shape,
// batch = 0, axis = 1, same shape
// batch = 0, axis = 1, same shape
TEST
(
split_op
,
test_same_shape_axis1_batch1
)
{
TEST
(
split_op
,
test_same_shape_axis1_batch1
)
{
AnakinSliceTest
<
1
>
({
1
,
4
,
2
,
2
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
4
,
2
,
2
},
{
2
,
2
});
}
}
// batch = 0, axis = 1, different shape
// batch = 0, axis = 1, different shape
TEST
(
split_op
,
test_different_shape_axis1_batch1
)
{
TEST
(
split_op
,
test_different_shape_axis1_batch1
)
{
AnakinSliceTest
<
1
>
({
1
,
3
,
2
,
2
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
}
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
// batch = 10, axis = 1, same shape
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
3
,
2
,
2
},
{
2
,
1
});
TEST
(
split_op
,
test_same_shape_axis1_batch10
)
{
AnakinSliceTest
<
1
>
({
1
,
4
,
2
,
2
},
{
2
,
2
});
}
// batch = 10, axis = 1, different shape
TEST
(
split_op
,
test_different_shape_axis1_batch10
)
{
AnakinSliceTest
<
1
>
({
1
,
3
,
2
,
2
},
{
2
,
1
});
}
}
// batch = 0, axis = 2, same shape
// batch = 0, axis = 2, same shape
TEST
(
split_op
,
test_same_shape_axis2_batch1
)
{
TEST
(
split_op
,
test_same_shape_axis2_batch1
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
4
,
2
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
}
// batch = 0, axis = 2, different shape
// batch = 0, axis = 2, different shape
TEST
(
split_op
,
test_different_shape_axis2_batch1
)
{
TEST
(
split_op
,
test_different_shape_axis2_batch1
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
3
,
2
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
}
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
// batch = 10, axis = 2, same shape
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
3
,
2
},
{
2
,
1
});
TEST
(
split_op
,
test_same_shape_axis2_batch10
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
4
,
2
},
{
2
,
2
});
}
// batch = 10, axis = 2, different shape
TEST
(
split_op
,
test_different_shape_axis2_batch10
)
{
AnakinSliceTest
<
2
>
({
1
,
3
,
3
,
2
},
{
2
,
1
});
}
}
// batch = 0, axis = 3, same shape
// batch = 0, axis = 3, same shape
TEST
(
split_op
,
test_same_shape_axis3_batch1
)
{
TEST
(
split_op
,
test_same_shape_axis3_batch1
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
4
},
{
2
,
2
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
}
// batch = 0, axis = 3, different shape
// batch = 0, axis = 3, different shape
TEST
(
split_op
,
test_different_shape_axis3_batch1
)
{
TEST
(
split_op
,
test_different_shape_axis3_batch1
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
3
},
{
2
,
1
});
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
}
// batch = 10, axis = 3, same shape
TEST
(
split_op
,
test_same_shape_axis3_batch10
)
{
TEST
(
split_op
,
test_different_shape_axis1_batch1_cpu
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
4
},
{
2
,
2
});
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
1
>
(
ctx
,
false
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
TEST
(
split_op
,
test_different_shape_axis2_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
2
>
(
ctx
,
false
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
}
// batch = 10, axis = 3, different shape
TEST
(
split_op
,
test_different_shape_axis3_batch10
)
{
TEST
(
split_op
,
test_different_shape_axis3_batch1_cpu
)
{
AnakinSliceTest
<
3
>
({
1
,
3
,
2
,
3
},
{
2
,
1
});
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
3
>
(
ctx
,
false
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
}
}
// namespace anakin
}
// namespace anakin
...
@@ -107,4 +116,7 @@ TEST(split_op, test_different_shape_axis3_batch10) {
...
@@ -107,4 +116,7 @@ TEST(split_op, test_different_shape_axis3_batch10) {
}
// namespace paddle
}
// namespace paddle
USE_OP
(
split
);
USE_OP
(
split
);
USE_CPU_ANAKIN_CONVERTER
(
split
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
split
);
USE_ANAKIN_CONVERTER
(
split
);
#endif
paddle/fluid/inference/anakin/convert/test_sum_op.cc
浏览文件 @
7ad182e1
...
@@ -22,10 +22,12 @@ namespace paddle {
...
@@ -22,10 +22,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
sum
,
native
)
{
template
<
typename
TargetT
>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
...
@@ -40,9 +42,26 @@ TEST(sum, native) {
...
@@ -40,9 +42,26 @@ TEST(sum, native) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
sum_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_sum_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
sum_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_sum_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
sum
);
USE_OP
(
sum
);
USE_CPU_ANAKIN_CONVERTER
(
sum
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
sum
);
USE_ANAKIN_CONVERTER
(
sum
);
#endif
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
浏览文件 @
7ad182e1
...
@@ -20,12 +20,12 @@ namespace paddle {
...
@@ -20,12 +20,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
transpose_op
,
test
)
{
template
<
typename
TargetT
>
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"transpose"
);
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
ASSERT_TRUE
(
converter
!=
nullptr
);
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
...
@@ -43,11 +43,12 @@ TEST(transpose_op, test) {
...
@@ -43,11 +43,12 @@ TEST(transpose_op, test) {
validator
.
Execute
(
3
);
validator
.
Execute
(
3
);
}
}
// test input shape's dims < 4
template
<
typename
TargetT
>
TEST
(
transpose_op
,
test2
)
{
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
AnakinConvertValidation
<
TargetT
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
...
@@ -65,9 +66,38 @@ TEST(transpose_op, test2) {
...
@@ -65,9 +66,38 @@ TEST(transpose_op, test2) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
#ifdef PADDLE_WITH_CUDA
TEST
(
transpose1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
transpose2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
TEST
(
transpose1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
transpose2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
transpose
);
USE_OP
(
transpose
);
USE_CPU_ANAKIN_CONVERTER
(
transpose
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
transpose
);
USE_ANAKIN_CONVERTER
(
transpose
);
#endif
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
7ad182e1
...
@@ -17,20 +17,16 @@
...
@@ -17,20 +17,16 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
TransposeOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
template
<
typename
TargetT
>
const
framework
::
BlockDesc
&
block_desc
,
void
TransposeOpConverter
<
TargetT
>::
operator
()(
const
framework
::
Scope
&
scope
,
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
...
@@ -38,7 +34,7 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -38,7 +34,7 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Permute"
,
{
input
},
{
output
});
this
->
engine_
->
AddOp
(
op_name
,
"Permute"
,
{
input
},
{
output
});
auto
axis
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"axis"
));
auto
axis
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"axis"
));
size_t
axis_size
=
axis
.
size
();
size_t
axis_size
=
axis
.
size
();
...
@@ -46,11 +42,17 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -46,11 +42,17 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
axis
.
push_back
(
axis_size
);
axis
.
push_back
(
axis_size
);
axis_size
+=
1
;
axis_size
+=
1
;
}
}
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dims"
,
axis
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
axis
);
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
NV
>
);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
<::
anakin
::
saber
::
X86
>
);
paddle/fluid/inference/anakin/convert/transpose.h
浏览文件 @
7ad182e1
...
@@ -20,7 +20,8 @@ namespace paddle {
...
@@ -20,7 +20,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
TransposeOpConverter
:
public
AnakinOpConverter
{
template
<
typename
TargetT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
>
{
public:
public:
TransposeOpConverter
()
=
default
;
TransposeOpConverter
()
=
default
;
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
7ad182e1
...
@@ -32,14 +32,8 @@ limitations under the License. */
...
@@ -32,14 +32,8 @@ limitations under the License. */
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -55,8 +49,8 @@ float random(float low, float high) {
...
@@ -55,8 +49,8 @@ float random(float low, float high) {
return
dist
(
mt
);
return
dist
(
mt
);
}
}
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
Place
&
place
,
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
Place
&
place
)
{
auto
dims
=
tensor
->
dims
();
auto
dims
=
tensor
->
dims
();
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
...
@@ -78,17 +72,19 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
...
@@ -78,17 +72,19 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
* anakin
* anakin
* layer.
* layer.
*/
*/
template
<
typename
TargetT
>
class
AnakinConvertValidation
{
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
public:
public:
AnakinConvertValidation
()
=
delete
;
AnakinConvertValidation
()
=
delete
;
AnakinConvertValidation
(
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
AnakinConvertValidation
(
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
framework
::
Scope
*
scope
)
framework
::
Scope
*
scope
,
:
parameters_
(
parameters
),
scope_
(
scope
),
place_
(
0
)
{
const
platform
::
DeviceContext
&
ctx
,
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
bool
use_gpu
=
true
)
engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
true
));
}
}
// Declare a Variable as input with random initialization.
// Declare a Variable as input with random initialization.
...
@@ -108,11 +104,10 @@ class AnakinConvertValidation {
...
@@ -108,11 +104,10 @@ class AnakinConvertValidation {
}
}
void
DeclVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
void
DeclVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
platform
::
CUDADeviceContext
ctx
(
place_
);
auto
*
x
=
scope_
->
Var
(
name
);
auto
*
x
=
scope_
->
Var
(
name
);
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
RandomizeTensor
(
x_tensor
,
place_
,
ctx
);
RandomizeTensor
(
x_tensor
,
ctx_
.
GetPlace
()
);
std
::
vector
<
int64_t
>
dim_vec_int64
;
std
::
vector
<
int64_t
>
dim_vec_int64
;
for
(
auto
&
ele
:
dim_vec
)
{
for
(
auto
&
ele
:
dim_vec
)
{
...
@@ -132,7 +127,7 @@ class AnakinConvertValidation {
...
@@ -132,7 +127,7 @@ class AnakinConvertValidation {
// should init anakin engine here.
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
>::
Global
().
ConvertOp
(
Singleton
<
AnakinOpConverter
<
TargetT
>
>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
true
/*test_mode*/
);
engine_
->
Freeze
();
engine_
->
Freeze
();
...
@@ -160,11 +155,8 @@ class AnakinConvertValidation {
...
@@ -160,11 +155,8 @@ class AnakinConvertValidation {
void
Execute
(
int
batch_size
,
void
Execute
(
int
batch_size
,
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{})
{
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{})
{
// Execute Fluid Op
// Execute Fluid Op
platform
::
CUDADeviceContext
ctx
(
place_
);
op_
->
Run
(
*
scope_
,
ctx_
.
GetPlace
());
op_
->
Run
(
*
scope_
,
place_
);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
if
(
parameters_
.
count
(
input
))
continue
;
...
@@ -180,20 +172,27 @@ class AnakinConvertValidation {
...
@@ -180,20 +172,27 @@ class AnakinConvertValidation {
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
fluid_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
_
,
&
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
outputs
.
insert
({
output
,
tensor
});
outputs
.
insert
({
output
,
tensor
});
}
}
engine_
->
Execute
(
inputs
,
outputs
,
stream_
);
if
(
!
use_gpu_
)
{
engine_
->
Execute
(
inputs
,
outputs
);
}
else
{
cudaStream_t
stream
;
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream
),
0
);
engine_
->
Execute
(
inputs
,
outputs
,
stream
);
}
int
i_output
=
0
;
int
i_output
=
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
anakin_out
;
std
::
vector
<
float
>
anakin_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
anakin_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
_
,
&
anakin_out
);
size_t
anakin_out_size
=
anakin_out
.
size
();
size_t
anakin_out_size
=
anakin_out
.
size
();
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
...
@@ -205,15 +204,17 @@ class AnakinConvertValidation {
...
@@ -205,15 +204,17 @@ class AnakinConvertValidation {
private:
private:
std
::
unique_ptr
<
AnakinNvEngineT
>
engine_
{
nullptr
};
std
::
unique_ptr
<
AnakinNvEngineT
>
engine_
{
nullptr
};
cudaStream_t
stream_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
framework
::
ProgramDesc
program_desc_
;
framework
::
ProgramDesc
program_desc_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
framework
::
Scope
*
scope_
;
framework
::
Scope
*
scope_
;
platform
::
CUDAPlace
place_
;
const
platform
::
DeviceContext
&
ctx_
;
bool
use_gpu_
{
true
};
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
>;
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
7ad182e1
...
@@ -69,11 +69,11 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
...
@@ -69,11 +69,11 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
)
{
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
#ifdef PADDLE_WITH_CUDA
cudaStream_t
stream
)
{
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
#endif
for
(
const
auto
&
input
:
inputs
)
{
for
(
const
auto
&
input
:
inputs
)
{
auto
*
tensor
=
input
.
second
;
auto
*
tensor
=
input
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
auto
*
data
=
tensor
->
data
<
float
>
();
...
@@ -105,6 +105,35 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
...
@@ -105,6 +105,35 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
fluid_input_shape
);
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
)
{
BindInput
(
inputs
);
net_
->
prediction
();
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CPUPlace
cpu_place
;
auto
*
tensor
=
output
.
second
;
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
*
anakin_data
=
anakin_output
->
data
();
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
tensor
->
Resize
(
framework
::
make_ddim
(
anakin_output_shape
));
auto
*
fluid_data
=
tensor
->
mutable_data
<
float
>
(
cpu_place
);
memory
::
Copy
(
cpu_place
,
static_cast
<
void
*>
(
fluid_data
),
cpu_place
,
static_cast
<
void
*>
(
anakin_data
),
tensor
->
numel
()
*
sizeof
(
float
));
}
}
#ifdef PADDLE_WITH_CUDA
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
)
{
BindInput
(
inputs
);
net_
->
prediction
();
net_
->
prediction
();
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
for
(
const
auto
&
output
:
outputs
)
{
for
(
const
auto
&
output
:
outputs
)
{
...
@@ -121,6 +150,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
...
@@ -121,6 +150,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
}
}
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
}
}
#endif
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
...
@@ -140,7 +170,15 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
...
@@ -140,7 +170,15 @@ AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
return
std
::
unique_ptr
<
AnakinEngine
>
(
engine
);
return
std
::
unique_ptr
<
AnakinEngine
>
(
engine
);
}
}
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
>;
#endif
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
>;
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/anakin/engine.h
浏览文件 @
7ad182e1
...
@@ -32,7 +32,6 @@
...
@@ -32,7 +32,6 @@
#include "saber/saber_types.h"
#include "saber/saber_types.h"
using
anakin
::
Precision
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
namespace
anakin
{
namespace
anakin
{
...
@@ -94,9 +93,16 @@ class AnakinEngine {
...
@@ -94,9 +93,16 @@ class AnakinEngine {
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
int
GetDevice
()
{
return
device_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
);
cudaStream_t
stream
);
#endif
private:
void
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
);
private:
private:
bool
initialized_
{
false
};
bool
initialized_
{
false
};
...
@@ -108,24 +114,25 @@ class AnakinEngine {
...
@@ -108,24 +114,25 @@ class AnakinEngine {
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
};
};
template
<
typename
TargetT
>
class
AnakinEngineManager
{
class
AnakinEngineManager
{
using
Anakin
NvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
using
Anakin
EngineT
=
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
;
public:
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
}
Anakin
Nv
EngineT
*
Get
(
const
std
::
string
&
name
)
const
{
AnakinEngineT
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
return
engines_
.
at
(
name
).
get
();
}
}
Anakin
NvEngineT
*
Create
(
Anakin
EngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
bool
need_summary
,
int
device
,
int
max_batch_siz
e
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shap
e
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
auto
*
p
=
new
AnakinEngine
<
TargetT
,
Precision
::
FP32
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
engines_
[
engine_name
].
reset
(
p
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
return
p
;
...
@@ -138,7 +145,7 @@ class AnakinEngineManager {
...
@@ -138,7 +145,7 @@ class AnakinEngineManager {
}
}
private:
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
Anakin
Nv
EngineT
>>
engines_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
AnakinEngineT
>>
engines_
;
std
::
mutex
mut_
;
std
::
mutex
mut_
;
};
};
}
// namespace anakin
}
// namespace anakin
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7ad182e1
...
@@ -64,20 +64,20 @@ struct Argument {
...
@@ -64,20 +64,20 @@ struct Argument {
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
#define DECL_ARGUMENT_FIELD(field__, Field, type__)
\
public: \
public:
\
type__& field__() { \
type__& field__() {
\
PADDLE_ENFORCE(Has(#field__)
);
\
PADDLE_ENFORCE(Has(#field__)
, "There is no such field");
\
return field__##_; \
return field__##_;
\
} \
}
\
void Set##Field(const type__& x) { \
void Set##Field(const type__& x) {
\
field__##_ = x; \
field__##_ = x;
\
valid_fields_.insert(#field__); \
valid_fields_.insert(#field__);
\
} \
}
\
DECL_ARGUMENT_FIELD_VALID(field__); \
DECL_ARGUMENT_FIELD_VALID(field__);
\
type__* field__##_ptr() { return &field__##_; } \
type__* field__##_ptr() { return &field__##_; }
\
\
\
private: \
private:
\
type__ field__##_;
type__ field__##_;
#define DECL_ARGUMENT_FIELD_VALID(field__) \
#define DECL_ARGUMENT_FIELD_VALID(field__) \
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7ad182e1
...
@@ -114,6 +114,7 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -114,6 +114,7 @@ void IRPassManager::CreatePasses(Argument *argument,
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
pass
->
Set
(
"program"
,
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
pass
->
Set
(
"engine_opt_info"
,
new
std
::
map
<
std
::
string
,
std
::
string
>
(
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
7ad182e1
...
@@ -194,20 +194,49 @@ void AnakinSubgraphPass::CreateAnakinOp(
...
@@ -194,20 +194,49 @@ void AnakinSubgraphPass::CreateAnakinOp(
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
auto
*
anakin_engine
=
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
inference
::
Singleton
<
anakin
::
AnakinEngineManager
>::
Global
().
Create
(
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
#endif
}
else
{
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
}
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
.
Proto
());
if
(
use_gpu
)
{
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
auto
*
anakin_engine
=
.
ConvertBlockToAnakinEngine
(
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
&
block_desc_temp
,
scope
,
::
anakin
::
saber
::
NV
>>::
Global
()
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
.
Get
(
engine_key
);
param_set
,
output_mapping
,
anakin_engine
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
NV
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
else
{
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<::
anakin
::
saber
::
X86
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
}
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
7ad182e1
...
@@ -70,4 +70,3 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
...
@@ -70,4 +70,3 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
anakin_target
(
inference_anakin_api_shared
)
endif
()
endif
()
inference_analysis_test
(
faster_rcnn_test SRCS faster_rcnn_test.cc EXTRA_DEPS paddle_fluid
)
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7ad182e1
...
@@ -268,9 +268,11 @@ void AnalysisConfig::Update() {
...
@@ -268,9 +268,11 @@ void AnalysisConfig::Update() {
PADDLE_ENFORCE
(
!
use_tensorrt_
,
PADDLE_ENFORCE
(
!
use_tensorrt_
,
"Anakin sub-graph and TensorRT sub-graph are not allowed to "
"Anakin sub-graph and TensorRT sub-graph are not allowed to "
"run at the same time!"
);
"run at the same time!"
);
PADDLE_ENFORCE
(
if
(
use_gpu_
)
{
use_gpu_
,
LOG
(
INFO
)
<<
"Run Anakin GPU mode"
;
"Anakin sub-graph engine need gpu, please use the EnableGpu API."
);
}
else
{
LOG
(
INFO
)
<<
"Run Anakin CPU mode"
;
}
pass_builder
()
->
ClearPasses
();
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7ad182e1
...
@@ -382,7 +382,7 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -382,7 +382,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetTensorRtUseStaticEngine
(
config_
.
trt_use_static_engine_
);
argument_
.
SetTensorRtUseStaticEngine
(
config_
.
trt_use_static_engine_
);
}
}
if
(
config_
.
use_gpu
()
&&
config_
.
anakin_engine_enabled
())
{
if
(
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
7ad182e1
...
@@ -34,28 +34,16 @@ limitations under the License. */
...
@@ -34,28 +34,16 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
inference
::
Singleton
;
using
inference
::
Singleton
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
inference
::
anakin
::
AnakinEngine
;
using
inference
::
anakin
::
AnakinEngine
;
class
AnakinEngineOp
:
public
framework
::
OperatorBase
{
class
AnakinEngineOp
:
public
framework
::
OperatorBase
{
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
private:
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
AnakinNvEngineT
*
anakin_engine_
;
std
::
string
engine_key_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
public:
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
AnakinEngineOp
(
const
std
::
string
&
type
,
...
@@ -66,10 +54,10 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -66,10 +54,10 @@ class AnakinEngineOp : public framework::OperatorBase {
input_names_
=
Inputs
(
"Xs"
);
input_names_
=
Inputs
(
"Xs"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
for
(
const
auto
&
param
:
params
)
{
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
param_names_
.
insert
(
param
);
}
}
anakin_engine_
=
nullptr
;
}
}
protected:
protected:
...
@@ -80,7 +68,6 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -80,7 +68,6 @@ class AnakinEngineOp : public framework::OperatorBase {
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
const
platform
::
Place
&
dev_place
)
const
{
auto
*
engine
=
GetEngine
(
scope
,
dev_place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
auto
stream
=
...
@@ -92,7 +79,6 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -92,7 +79,6 @@ class AnakinEngineOp : public framework::OperatorBase {
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
auto
&
t
=
...
@@ -110,17 +96,21 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -110,17 +96,21 @@ class AnakinEngineOp : public framework::OperatorBase {
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
output_index
+=
1
;
}
}
engine
->
Execute
(
inputs
,
outputs
,
stream
);
if
(
use_gpu_
)
{
}
#ifdef PADDLE_WITH_CUDA
auto
*
engine
=
AnakinNvEngineT
*
GetEngine
(
const
framework
::
Scope
&
scope
,
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
const
platform
::
Place
&
dev_place
)
const
{
::
anakin
::
saber
::
NV
>>::
Global
()
if
(
anakin_engine_
==
nullptr
)
{
.
Get
(
engine_key_
);
anakin_engine_
=
engine
->
Execute
(
inputs
,
outputs
,
stream
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
#endif
}
else
{
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
>>::
Global
()
.
Get
(
engine_key_
);
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
}
}
return
anakin_engine_
;
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录