Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
2a344823
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2a344823
编写于
5月 08, 2020
作者:
W
Wilber
提交者:
GitHub
5月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add eltwise_activate fuse. test=develop (#3367)
* add eltwise_activate_fuse. test=develop
上级
06f77998
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
331 addition
and
138 deletion
+331
-138
lite/api/paddle_use_passes.h
lite/api/paddle_use_passes.h
+1
-1
lite/backends/cuda/math/elementwise.cu
lite/backends/cuda/math/elementwise.cu
+77
-18
lite/backends/cuda/math/elementwise.h
lite/backends/cuda/math/elementwise.h
+10
-8
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc
+17
-6
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.h
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.h
+1
-1
lite/core/mir/fusion/elementwise_add_activation_fuser.cc
lite/core/mir/fusion/elementwise_add_activation_fuser.cc
+36
-20
lite/core/mir/fusion/elementwise_add_activation_fuser.h
lite/core/mir/fusion/elementwise_add_activation_fuser.h
+11
-3
lite/core/optimizer.h
lite/core/optimizer.h
+1
-1
lite/kernels/cuda/elementwise_compute.cu
lite/kernels/cuda/elementwise_compute.cu
+151
-70
lite/kernels/cuda/elementwise_compute.h
lite/kernels/cuda/elementwise_compute.h
+26
-8
lite/operators/fusion_elementwise_activation_ops.cc
lite/operators/fusion_elementwise_activation_ops.cc
+0
-2
未找到文件。
lite/api/paddle_use_passes.h
浏览文件 @
2a344823
...
...
@@ -37,7 +37,7 @@ USE_MIR_PASS(identity_dropout_eliminate_pass);
USE_MIR_PASS
(
lite_conv_elementwise_fuse_pass
);
USE_MIR_PASS
(
lite_conv_activation_fuse_pass
);
USE_MIR_PASS
(
lite_var_conv_2d_activation_fuse_pass
);
USE_MIR_PASS
(
lite_elementwise_a
dd_a
ctivation_fuse_pass
);
USE_MIR_PASS
(
lite_elementwise_activation_fuse_pass
);
USE_MIR_PASS
(
lite_quant_dequant_fuse_pass
);
USE_MIR_PASS
(
type_precision_cast_pass
);
USE_MIR_PASS
(
type_layout_cast_pass
);
...
...
lite/backends/cuda/math/elementwise.cu
浏览文件 @
2a344823
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "lite/backends/cuda/math/elementwise.h"
#include "lite/utils/cp_logging.h"
namespace
paddle
{
namespace
lite
{
...
...
@@ -62,6 +63,52 @@ __global__ void elementwise_relu_kernel(const size_t total,
}
}
template
<
typename
Dtype
>
__global__
void
elementwise_abs_kernel
(
const
size_t
total
,
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
BinaryOperation
type
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
tid
<
total
)
{
int
idx
=
tid
/
post
%
n
;
Dtype
temp
;
#if __CUDA_ARCH__ >= 350
temp
=
binary_calc
(
__ldg
(
x_data
+
tid
),
__ldg
(
y_data
+
idx
),
type
);
#else
temp
=
binary_calc
(
x_data
[
tid
],
y_data
[
idx
],
type
);
#endif
out_data
[
tid
]
=
temp
>
0
?
temp
:
-
temp
;
}
}
template
<
typename
Dtype
>
__global__
void
elementwise_tanh_kernel
(
const
size_t
total
,
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
BinaryOperation
type
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
tid
<
total
)
{
int
idx
=
tid
/
post
%
n
;
Dtype
temp
;
#if __CUDA_ARCH__ >= 350
temp
=
binary_calc
(
__ldg
(
x_data
+
tid
),
__ldg
(
y_data
+
idx
),
type
);
#else
temp
=
binary_calc
(
x_data
[
tid
],
y_data
[
idx
],
type
);
#endif
out_data
[
tid
]
=
tanh
(
temp
);
}
}
template
<
typename
Dtype
>
__global__
void
elementwise_add_kernel
(
const
size_t
total
,
const
Dtype
*
x_data
,
...
...
@@ -135,19 +182,30 @@ void elementwise(const Dtype* x_data,
}
template
<
typename
Dtype
>
void
elementwise_relu
(
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
BinaryOperation
type
,
cudaStream_t
stream
)
{
void
elementwise_act
(
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
std
::
string
act
,
BinaryOperation
type
,
cudaStream_t
stream
)
{
int
num
=
pre
*
n
*
post
;
int
thread
=
256
;
int
block
=
(
num
+
thread
-
1
)
/
thread
;
elementwise_relu_kernel
<<<
block
,
thread
,
0
,
stream
>>>
(
num
,
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
,
type
);
if
(
act
==
"relu"
)
{
elementwise_relu_kernel
<<<
block
,
thread
,
0
,
stream
>>>
(
num
,
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
,
type
);
}
else
if
(
act
==
"tanh"
)
{
elementwise_tanh_kernel
<<<
block
,
thread
,
0
,
stream
>>>
(
num
,
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
,
type
);
}
else
if
(
act
==
"abs"
)
{
elementwise_abs_kernel
<<<
block
,
thread
,
0
,
stream
>>>
(
num
,
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
,
type
);
}
else
{
LOG
(
FATAL
)
<<
"not supported activate type: "
<<
act
;
}
}
template
void
elementwise
(
const
float
*
,
...
...
@@ -159,14 +217,15 @@ template void elementwise(const float*,
BinaryOperation
,
cudaStream_t
);
template
void
elementwise_relu
(
const
float
*
,
const
float
*
,
float
*
,
int
,
int
,
int
,
BinaryOperation
,
cudaStream_t
);
template
void
elementwise_act
(
const
float
*
x_data
,
const
float
*
y_data
,
float
*
out_data
,
int
pre
,
int
n
,
int
post
,
std
::
string
act
,
BinaryOperation
type
,
cudaStream_t
stream
);
template
<
typename
Dtype
>
void
elementwise_add
(
int
num
,
...
...
lite/backends/cuda/math/elementwise.h
浏览文件 @
2a344823
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <cuda.h>
#include <cuda_runtime.h>
#include <string>
#include "lite/backends/cuda/math/utils.h"
namespace
paddle
{
...
...
@@ -33,14 +34,15 @@ void elementwise(const Dtype* x_data,
cudaStream_t
stream
);
template
<
typename
Dtype
>
void
elementwise_relu
(
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
BinaryOperation
type
,
cudaStream_t
stream
);
void
elementwise_act
(
const
Dtype
*
x_data
,
const
Dtype
*
y_data
,
Dtype
*
out_data
,
int
pre
,
int
n
,
int
post
,
std
::
string
act
,
BinaryOperation
type
,
cudaStream_t
stream
);
template
<
typename
Dtype
>
void
elementwise_add
(
int
num
,
...
...
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc
浏览文件 @
2a344823
...
...
@@ -22,20 +22,31 @@ namespace paddle {
namespace
lite
{
namespace
mir
{
void
ElementwiseA
ddA
ctivationFusePass
::
Apply
(
void
ElementwiseActivationFusePass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
fusion
::
ElementwiseAddActivationFuser
fuser
(
"relu"
);
fuser
(
graph
.
get
());
// initialze fuser params
std
::
vector
<
std
::
string
>
elt_types
{
"elementwise_add"
,
"elementwise_sub"
,
"elementwise_mul"
};
std
::
vector
<
std
::
string
>
act_types
{
"relu"
,
"abs"
,
"tanh"
};
// start fuse using params
for
(
auto
elt_type
:
elt_types
)
{
for
(
auto
act_type
:
act_types
)
{
fusion
::
ElementwiseActivationFuser
fuser
(
elt_type
,
act_type
);
fuser
(
graph
.
get
());
}
}
}
}
// namespace mir
}
// namespace lite
}
// namespace paddle
REGISTER_MIR_PASS
(
lite_elementwise_a
dd_a
ctivation_fuse_pass
,
paddle
::
lite
::
mir
::
ElementwiseA
ddA
ctivationFusePass
)
REGISTER_MIR_PASS
(
lite_elementwise_activation_fuse_pass
,
paddle
::
lite
::
mir
::
ElementwiseActivationFusePass
)
.
BindTargets
({
TARGET
(
kAny
)})
.
ExcludeTargets
({
TARGET
(
kXPU
)})
.
ExcludeTargets
({
TARGET
(
kBM
)})
.
ExcludeTargets
({
TARGET
(
kX86
)})
.
BindKernel
(
"fusion_elementwise_add_activation"
);
.
BindKernel
(
"fusion_elementwise_add_activation"
)
.
BindKernel
(
"fusion_elementwise_sub_activation"
);
lite/core/mir/fusion/elementwise_add_activation_fuse_pass.h
浏览文件 @
2a344823
...
...
@@ -22,7 +22,7 @@ namespace paddle {
namespace
lite
{
namespace
mir
{
class
ElementwiseA
ddA
ctivationFusePass
:
public
ProgramPass
{
class
ElementwiseActivationFusePass
:
public
ProgramPass
{
public:
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
};
...
...
lite/core/mir/fusion/elementwise_add_activation_fuser.cc
浏览文件 @
2a344823
...
...
@@ -21,21 +21,21 @@ namespace lite {
namespace
mir
{
namespace
fusion
{
void
ElementwiseA
ddA
ctivationFuser
::
BuildPattern
()
{
void
ElementwiseActivationFuser
::
BuildPattern
()
{
// create input nodes.
auto
*
x
=
VarNode
(
"x"
)
->
assert_is_op_input
(
"elementwise_add"
,
"X"
)
->
AsInput
();
auto
*
y
=
VarNode
(
"y"
)
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
)
->
AsInput
();
auto
*
x
=
VarNode
(
"x"
)
->
assert_is_op_input
(
eltwise_type_
,
"X"
)
->
AsInput
();
auto
*
y
=
VarNode
(
"y"
)
->
assert_is_op_input
(
eltwise_type_
,
"Y"
)
->
AsInput
();
// create op nodes
auto
*
add
=
OpNode
(
"add"
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
)
auto
*
elt
=
OpNode
(
"elt"
,
eltwise_type_
)
->
assert_is_op
(
eltwise_type_
)
->
AsIntermediate
();
auto
*
act
=
OpNode
(
"act"
,
act_type_
)
->
assert_is_op
(
act_type_
)
->
AsIntermediate
();
// create intermediate nodes
auto
*
add
_out
=
VarNode
(
"add_out"
)
->
assert_is_op_output
(
"elementwise_add"
,
"Out"
)
auto
*
elt
_out
=
VarNode
(
"add_out"
)
->
assert_is_op_output
(
eltwise_type_
,
"Out"
)
->
assert_is_op_input
(
act_type_
,
"X"
)
->
AsIntermediate
();
...
...
@@ -44,21 +44,29 @@ void ElementwiseAddActivationFuser::BuildPattern() {
VarNode
(
"output"
)
->
assert_is_op_output
(
act_type_
,
"Out"
)
->
AsOutput
();
// create topology.
std
::
vector
<
PMNode
*>
add
_inputs
{
x
,
y
};
add_inputs
>>
*
add
>>
*
add
_out
;
*
add
_out
>>
*
act
>>
*
out
;
std
::
vector
<
PMNode
*>
elt
_inputs
{
x
,
y
};
elt_inputs
>>
*
elt
>>
*
elt
_out
;
*
elt
_out
>>
*
act
>>
*
out
;
}
void
ElementwiseA
ddA
ctivationFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
void
ElementwiseActivationFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
auto
op_desc
=
GenOpDesc
(
matched
);
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
"fusion_elementwise_add_activation"
);
auto
old_op
=
matched
.
at
(
"add"
)
->
stmt
()
->
op
();
std
::
shared_ptr
<
lite
::
OpLite
>
op
;
if
(
eltwise_type_
==
"elementwise_add"
)
{
op
=
LiteOpRegistry
::
Global
().
Create
(
"fusion_elementwise_add_activation"
);
}
else
if
(
eltwise_type_
==
"elementwise_sub"
)
{
op
=
LiteOpRegistry
::
Global
().
Create
(
"fusion_elementwise_sub_activation"
);
}
else
if
(
eltwise_type_
==
"elementwise_mul"
)
{
op
=
LiteOpRegistry
::
Global
().
Create
(
"fusion_elementwise_mul_activation"
);
}
else
{
LOG
(
FATAL
)
<<
"not supported elementwise_type: "
<<
eltwise_type_
;
}
auto
old_op
=
matched
.
at
(
"elt"
)
->
stmt
()
->
op
();
auto
*
scope
=
old_op
->
scope
();
auto
&
valid_places
=
old_op
->
valid_places
();
op
->
Attach
(
op_desc
,
scope
);
auto
*
new_op_node
=
graph
->
GraphCreateInstructNode
(
op
,
valid_places
);
IR_NODE_LINK_TO
(
matched
.
at
(
"x"
),
new_op_node
);
...
...
@@ -66,12 +74,20 @@ void ElementwiseAddActivationFuser::InsertNewNode(SSAGraph* graph,
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"output"
));
}
cpp
::
OpDesc
ElementwiseAddActivationFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
auto
*
desc
=
matched
.
at
(
"add"
)
->
stmt
()
->
op_info
();
cpp
::
OpDesc
ElementwiseActivationFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
auto
*
desc
=
matched
.
at
(
"elt"
)
->
stmt
()
->
op_info
();
cpp
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_elementwise_add_activation"
);
if
(
eltwise_type_
==
"elementwise_add"
)
{
op_desc
.
SetType
(
"fusion_elementwise_add_activation"
);
}
else
if
(
eltwise_type_
==
"elementwise_sub"
)
{
op_desc
.
SetType
(
"fusion_elementwise_sub_activation"
);
}
else
if
(
eltwise_type_
==
"elementwise_mul"
)
{
op_desc
.
SetType
(
"fusion_elementwise_mul_activation"
);
}
else
{
LOG
(
FATAL
)
<<
"not supported elementwise_type: "
<<
eltwise_type_
;
}
op_desc
.
SetInput
(
"X"
,
{
matched
.
at
(
"x"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"Y"
,
{
matched
.
at
(
"y"
)
->
arg
()
->
name
});
op_desc
.
SetOutput
(
"Out"
,
{
matched
.
at
(
"output"
)
->
arg
()
->
name
});
...
...
lite/core/mir/fusion/elementwise_add_activation_fuser.h
浏览文件 @
2a344823
...
...
@@ -23,15 +23,23 @@ namespace lite {
namespace
mir
{
namespace
fusion
{
class
ElementwiseAddActivationFuser
:
public
FuseBase
{
// Detect elementwise and activation ops, and then merge into
// fusion_eltsiwise_act op.
// Example:
// elementwise_add + relu fuse.
// fusion::ElementwiseActivationFuser fuser("elementwise_add", "relu");
// fuser(graph.get());
class
ElementwiseActivationFuser
:
public
FuseBase
{
public:
explicit
ElementwiseAddActivationFuser
(
const
std
::
string
&
act_type
)
:
act_type_
(
act_type
)
{}
explicit
ElementwiseActivationFuser
(
const
std
::
string
&
eltwise_type
,
const
std
::
string
&
act_type
)
:
eltwise_type_
(
eltwise_type
),
act_type_
(
act_type
)
{}
void
BuildPattern
()
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
private:
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
std
::
string
eltwise_type_
;
std
::
string
act_type_
;
};
...
...
lite/core/optimizer.h
浏览文件 @
2a344823
...
...
@@ -74,7 +74,7 @@ class Optimizer {
"lite_scale_activation_fuse_pass"
,
//
#if (defined LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) || (defined LITE_WITH_CUDA) || \
(defined LITE_WITH_ARM)
"lite_elementwise_a
dd_a
ctivation_fuse_pass"
,
//
"lite_elementwise_activation_fuse_pass"
,
//
#endif
"__xpu__resnet_fuse_pass"
,
"__xpu__multi_encoder_fuse_pass"
,
...
...
lite/kernels/cuda/elementwise_compute.cu
浏览文件 @
2a344823
...
...
@@ -70,7 +70,30 @@ inline bool is_broadcast(const DDim& x_dims,
return
true
;
}
#define ELEMENTWISE_COMPUTE(OP, WITH_RELU) \
#define ELEMENTWISE_COMPUTE(OP) \
auto& param = this->Param<param_t>(); \
auto& ctx = this->ctx_->template As<CUDAContext>(); \
auto stream = ctx.exec_stream(); \
const lite::Tensor* x = param.X; \
const lite::Tensor* y = param.Y; \
lite::Tensor* out = param.Out; \
int axis = param.axis; \
auto* x_data = x->data<float>(); \
auto* y_data = y->data<float>(); \
auto out_data = out->mutable_data<float>(TARGET(kCUDA)); \
int pixel_num = x->numel(); \
int pre = 1; \
int n = pixel_num; \
int post = 1; \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
}
#define ELEMENTWISE_COMPUTE_ACT(OP) \
auto& param = this->Param<param_t>(); \
auto& ctx = this->ctx_->template As<CUDAContext>(); \
auto stream = ctx.exec_stream(); \
...
...
@@ -85,25 +108,43 @@ inline bool is_broadcast(const DDim& x_dims,
int pre = 1; \
int n = pixel_num; \
int post = 1; \
if (WITH_RELU) { \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise_relu( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise_relu( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
} \
auto act = param.act_type; \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise_act( \
x_data, y_data, out_data, pre, n, post, act, OP, stream); \
} else { \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
} \
lite::cuda::math::elementwise_act( \
x_data, y_data, out_data, 1, pixel_num, 1, act, OP, stream); \
}
#define ELEMENTWISE_COMPUTE_NHWC(OP) \
std::map<int, int> pos_map = {{0, 0}, {1, 3}, {2, 1}, {3, 2}}; \
auto& param = this->Param<param_t>(); \
auto& ctx = this->ctx_->template As<CUDAContext>(); \
auto stream = ctx.exec_stream(); \
const lite::Tensor* x = param.X; \
const lite::Tensor* y = param.Y; \
lite::Tensor* out = param.Out; \
int axis = param.axis; \
if (axis < 0) axis = x->dims().size() - y->dims().size(); \
CHECK(axis >= 0) << "invalid axis of elementwise op"; \
axis = pos_map[axis]; \
auto* x_data = x->data<float>(); \
auto* y_data = y->data<float>(); \
auto out_data = out->mutable_data<float>(TARGET(kCUDA)); \
int pixel_num = x->numel(); \
int pre = 1; \
int n = pixel_num; \
int post = 1; \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
}
#define ELEMENTWISE_COMPUTE_
NHWC(OP, WITH_RELU)
\
#define ELEMENTWISE_COMPUTE_
ACT_NHWC(OP)
\
std::map<int, int> pos_map = {{0, 0}, {1, 3}, {2, 1}, {3, 2}}; \
auto& param = this->Param<param_t>(); \
auto& ctx = this->ctx_->template As<CUDAContext>(); \
...
...
@@ -122,80 +163,83 @@ inline bool is_broadcast(const DDim& x_dims,
int pre = 1; \
int n = pixel_num; \
int post = 1; \
if (WITH_RELU) { \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise_relu( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise_relu( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
} \
auto act = param.act_type; \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise_act( \
x_data, y_data, out_data, pre, n, post, act, OP, stream); \
} else { \
if (is_broadcast(x->dims(), y->dims(), axis, &pre, &n, &post)) { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, pre, n, post, OP, stream); \
} else { \
lite::cuda::math::elementwise( \
x_data, y_data, out_data, 1, pixel_num, 1, OP, stream); \
} \
lite::cuda::math::elementwise_act( \
x_data, y_data, out_data, 1, pixel_num, 1, act, OP, stream); \
}
void
ElementwiseAddCompute
::
Run
()
{
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
,
false
)
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseAddComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
,
false
)
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseSubCompute
::
Run
()
{
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
,
false
)
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseSubComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
,
false
)
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseMulCompute
::
Run
()
{
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
,
false
)
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseMulComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
,
false
)
ELEMENTWISE_COMPUTE_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseAddActivationCompute
::
Run
()
{
ELEMENTWISE_COMPUTE_ACT
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseAddActivationComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_ACT_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
Elementwise
AddRelu
Compute
::
Run
()
{
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
,
true
)
void
Elementwise
SubActivation
Compute
::
Run
()
{
ELEMENTWISE_COMPUTE
_ACT
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
Elementwise
AddRelu
ComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_
NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kADD
,
true
)
void
Elementwise
SubActivation
ComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_
ACT_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kSUB
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseMul
Relu
Compute
::
Run
()
{
ELEMENTWISE_COMPUTE
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
,
true
)
void
ElementwiseMul
Activation
Compute
::
Run
()
{
ELEMENTWISE_COMPUTE
_ACT
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
void
ElementwiseMul
Relu
ComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_
NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
,
true
)
void
ElementwiseMul
Activation
ComputeNHWC
::
Run
()
{
ELEMENTWISE_COMPUTE_
ACT_NHWC
(
lite
::
cuda
::
math
::
BinaryOperation
::
kMUL
)
cudaError_t
error
=
cudaGetLastError
();
if
(
error
!=
cudaSuccess
)
LOG
(
INFO
)
<<
cudaGetErrorString
(
error
);
}
...
...
@@ -298,23 +342,25 @@ REGISTER_LITE_KERNEL(elementwise_mul,
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kCUDA
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseAddReluCompute
,
def
)
REGISTER_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kCUDA
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseAddActivationCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kCUDA
,
kFloat
,
kNHWC
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseAddReluComputeNHWC
,
nhwc_format
)
REGISTER_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kCUDA
,
kFloat
,
kNHWC
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseAddActivationComputeNHWC
,
nhwc_format
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
...
...
@@ -329,23 +375,58 @@ REGISTER_LITE_KERNEL(fusion_elementwise_add_activation,
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_mul_activation
,
kCUDA
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseMulReluCompute
,
def
)
REGISTER_LITE_KERNEL
(
fusion_elementwise_sub_activation
,
kCUDA
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseSubActivationCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_mul_activation
,
kCUDA
,
kFloat
,
kNHWC
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseMulReluComputeNHWC
,
nhwc_format
)
REGISTER_LITE_KERNEL
(
fusion_elementwise_sub_activation
,
kCUDA
,
kFloat
,
kNHWC
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseSubActivationComputeNHWC
,
nhwc_format
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_mul_activation
,
kCUDA
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseMulActivationCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_mul_activation
,
kCUDA
,
kFloat
,
kNHWC
,
paddle
::
lite
::
kernels
::
cuda
::
ElementwiseMulActivationComputeNHWC
,
nhwc_format
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
...
...
lite/kernels/cuda/elementwise_compute.h
浏览文件 @
2a344823
...
...
@@ -74,40 +74,58 @@ class ElementwiseMulComputeNHWC
virtual
~
ElementwiseMulComputeNHWC
()
=
default
;
};
class
ElementwiseAdd
Relu
Compute
class
ElementwiseAdd
Activation
Compute
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
ElementwiseAdd
Relu
Compute
()
=
default
;
virtual
~
ElementwiseAdd
Activation
Compute
()
=
default
;
};
class
ElementwiseAdd
Relu
ComputeNHWC
class
ElementwiseAdd
Activation
ComputeNHWC
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
ElementwiseAdd
Relu
ComputeNHWC
()
=
default
;
virtual
~
ElementwiseAdd
Activation
ComputeNHWC
()
=
default
;
};
class
Elementwise
MulRelu
Compute
class
Elementwise
SubActivation
Compute
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
Elementwise
MulRelu
Compute
()
=
default
;
virtual
~
Elementwise
SubActivation
Compute
()
=
default
;
};
class
Elementwise
MulRelu
ComputeNHWC
class
Elementwise
SubActivation
ComputeNHWC
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
ElementwiseMulReluComputeNHWC
()
=
default
;
virtual
~
ElementwiseSubActivationComputeNHWC
()
=
default
;
};
class
ElementwiseMulActivationCompute
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
ElementwiseMulActivationCompute
()
=
default
;
};
class
ElementwiseMulActivationComputeNHWC
:
public
KernelLite
<
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
)
>
{
public:
using
param_t
=
operators
::
FusionElementwiseActivationParam
;
void
Run
()
override
;
virtual
~
ElementwiseMulActivationComputeNHWC
()
=
default
;
};
}
// namespace cuda
...
...
lite/operators/fusion_elementwise_activation_ops.cc
浏览文件 @
2a344823
...
...
@@ -44,8 +44,6 @@ bool FusionElementwiseActivationOp::AttachImpl(const cpp::OpDesc& opdesc,
param_
.
Out
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
param_
.
act_type
=
opdesc
.
GetAttr
<
std
::
string
>
(
"act_type"
);
// TODO(sangoly): support more activation types.
CHECK
(
param_
.
act_type
==
"relu"
)
<<
"Only relu activation be supported now"
;
return
true
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录