Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
62ea82d0
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
62ea82d0
编写于
9月 10, 2019
作者:
W
Wilber
提交者:
cyj1986
9月 10, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add elementwise_sub and modify argmax (#1964)
上级
111db475
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
479 addition
and
16 deletion
+479
-16
lite/api/_paddle_use_kernels.h
lite/api/_paddle_use_kernels.h
+1
-0
lite/api/_paddle_use_ops.h
lite/api/_paddle_use_ops.h
+1
-1
lite/backends/arm/math/elementwise.cc
lite/backends/arm/math/elementwise.cc
+245
-0
lite/backends/arm/math/elementwise.h
lite/backends/arm/math/elementwise.h
+14
-0
lite/kernels/arm/argmax_compute.cc
lite/kernels/arm/argmax_compute.cc
+6
-2
lite/kernels/arm/argmax_compute_test.cc
lite/kernels/arm/argmax_compute_test.cc
+2
-2
lite/kernels/arm/elementwise_compute.cc
lite/kernels/arm/elementwise_compute.cc
+68
-4
lite/kernels/arm/elementwise_compute.h
lite/kernels/arm/elementwise_compute.h
+16
-0
lite/operators/argmax_op.cc
lite/operators/argmax_op.cc
+2
-2
lite/operators/op_params.h
lite/operators/op_params.h
+0
-1
lite/tests/kernels/argmax_compute_test.cc
lite/tests/kernels/argmax_compute_test.cc
+3
-3
lite/tests/kernels/elementwise_compute_test.cc
lite/tests/kernels/elementwise_compute_test.cc
+121
-1
未找到文件。
lite/api/_paddle_use_kernels.h
浏览文件 @
62ea82d0
...
...
@@ -45,6 +45,7 @@ USE_LITE_KERNEL(box_coder, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_max
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_div
,
kARM
,
kFloat
,
kNCHW
,
def
);
...
...
lite/api/_paddle_use_ops.h
浏览文件 @
62ea82d0
...
...
@@ -51,7 +51,7 @@ USE_LITE_OP(batch_norm)
USE_LITE_OP
(
fusion_elementwise_sub_activation
)
USE_LITE_OP
(
transpose
)
USE_LITE_OP
(
transpose2
)
USE_LITE_OP
(
argmax
)
USE_LITE_OP
(
arg
_
max
)
USE_LITE_OP
(
axpy
)
USE_LITE_OP
(
leaky_relu
)
USE_LITE_OP
(
relu_clipped
)
...
...
lite/backends/arm/math/elementwise.cc
浏览文件 @
62ea82d0
...
...
@@ -266,6 +266,251 @@ void elementwise_add_relu_broadcast<float>(const float* dinx,
}
}
template
<
>
void
elementwise_sub
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
num
)
{
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
cnt
;
i
++
)
{
const
float
*
dinx_ptr
=
dinx
+
(
i
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
i
<<
4
);
float
*
dout_ptr
=
dout
+
(
i
<<
4
);
float32x4_t
dinx0
=
vld1q_f32
(
dinx_ptr
);
float32x4_t
dinx1
=
vld1q_f32
(
dinx_ptr
+
4
);
float32x4_t
dinx2
=
vld1q_f32
(
dinx_ptr
+
8
);
float32x4_t
dinx3
=
vld1q_f32
(
dinx_ptr
+
12
);
float32x4_t
diny0
=
vld1q_f32
(
diny_ptr
);
float32x4_t
diny1
=
vld1q_f32
(
diny_ptr
+
4
);
float32x4_t
diny2
=
vld1q_f32
(
diny_ptr
+
8
);
float32x4_t
diny3
=
vld1q_f32
(
diny_ptr
+
12
);
dinx0
=
vsubq_f32
(
dinx0
,
diny0
);
dinx1
=
vsubq_f32
(
dinx1
,
diny1
);
dinx2
=
vsubq_f32
(
dinx2
,
diny2
);
dinx3
=
vsubq_f32
(
dinx3
,
diny3
);
vst1q_f32
(
dout_ptr
,
dinx0
);
vst1q_f32
(
dout_ptr
+
4
,
dinx1
);
vst1q_f32
(
dout_ptr
+
8
,
dinx2
);
vst1q_f32
(
dout_ptr
+
12
,
dinx3
);
}
if
(
remain
>
0
)
{
const
float
*
dinx_ptr
=
dinx
+
(
cnt
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
cnt
<<
4
);
float
*
dout_ptr
=
dout
+
(
cnt
<<
4
);
for
(
int
i
=
0
;
i
<
remain
;
i
++
)
{
*
dout_ptr
=
*
dinx_ptr
-
*
diny_ptr
;
dout_ptr
++
;
dinx_ptr
++
;
diny_ptr
++
;
}
}
}
template
<
>
void
elementwise_sub_relu
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
num
)
{
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
float32x4_t
vzero
=
vdupq_n_f32
(
0.
f
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
cnt
;
i
++
)
{
const
float
*
dinx_ptr
=
dinx
+
(
i
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
i
<<
4
);
float
*
dout_ptr
=
dout
+
(
i
<<
4
);
float32x4_t
dinx0
=
vld1q_f32
(
dinx_ptr
);
float32x4_t
dinx1
=
vld1q_f32
(
dinx_ptr
+
4
);
float32x4_t
dinx2
=
vld1q_f32
(
dinx_ptr
+
8
);
float32x4_t
dinx3
=
vld1q_f32
(
dinx_ptr
+
12
);
float32x4_t
diny0
=
vld1q_f32
(
diny_ptr
);
float32x4_t
diny1
=
vld1q_f32
(
diny_ptr
+
4
);
float32x4_t
diny2
=
vld1q_f32
(
diny_ptr
+
8
);
float32x4_t
diny3
=
vld1q_f32
(
diny_ptr
+
12
);
dinx0
=
vsubq_f32
(
dinx0
,
diny0
);
dinx1
=
vsubq_f32
(
dinx1
,
diny1
);
dinx2
=
vsubq_f32
(
dinx2
,
diny2
);
dinx3
=
vsubq_f32
(
dinx3
,
diny3
);
// relu
dinx0
=
vmaxq_f32
(
dinx0
,
vzero
);
dinx1
=
vmaxq_f32
(
dinx1
,
vzero
);
dinx2
=
vmaxq_f32
(
dinx2
,
vzero
);
dinx3
=
vmaxq_f32
(
dinx3
,
vzero
);
vst1q_f32
(
dout_ptr
,
dinx0
);
vst1q_f32
(
dout_ptr
+
4
,
dinx1
);
vst1q_f32
(
dout_ptr
+
8
,
dinx2
);
vst1q_f32
(
dout_ptr
+
12
,
dinx3
);
}
if
(
remain
>
0
)
{
const
float
*
dinx_ptr
=
dinx
+
(
cnt
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
cnt
<<
4
);
float
*
dout_ptr
=
dout
+
(
cnt
<<
4
);
for
(
int
i
=
0
;
i
<
remain
;
i
++
)
{
float
tmp
=
*
dinx_ptr
-
*
diny_ptr
;
*
dout_ptr
=
tmp
>
0.
f
?
tmp
:
0.
f
;
dout_ptr
++
;
dinx_ptr
++
;
diny_ptr
++
;
}
}
}
template
<
>
void
elementwise_sub_broadcast
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
batch
,
int
channels
,
int
num
)
{
#pragma omp parallel for collapse(2)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
const
float
*
din_ptr
=
dinx
+
offset
;
const
float
diny_data
=
diny
[
j
];
float
*
dout_ptr
=
dout
+
offset
;
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
float32x4_t
rb
=
vdupq_n_f32
(
diny_data
);
for
(
int
k
=
0
;
k
<
cnt
;
++
k
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
float32x4_t
din2
=
vld1q_f32
(
din_ptr
+
8
);
float32x4_t
din3
=
vld1q_f32
(
din_ptr
+
12
);
din0
=
vsubq_f32
(
din0
,
rb
);
din1
=
vsubq_f32
(
din1
,
rb
);
din2
=
vsubq_f32
(
din2
,
rb
);
din3
=
vsubq_f32
(
din3
,
rb
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
vst1q_f32
(
dout_ptr
+
8
,
din2
);
vst1q_f32
(
dout_ptr
+
12
,
din3
);
din_ptr
+=
16
;
dout_ptr
+=
16
;
}
if
(
remain
>=
8
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
din0
=
vsubq_f32
(
din0
,
rb
);
din1
=
vsubq_f32
(
din1
,
rb
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
din_ptr
+=
8
;
dout_ptr
+=
8
;
remain
-=
8
;
}
if
(
remain
>=
4
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
din0
=
vsubq_f32
(
din0
,
rb
);
vst1q_f32
(
dout_ptr
,
din0
);
din_ptr
+=
4
;
dout_ptr
+=
4
;
remain
-=
4
;
}
if
(
remain
>
0
)
{
for
(
int
p
=
0
;
p
<
remain
;
p
++
)
{
*
dout_ptr
=
*
din_ptr
-
diny_data
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
}
template
<
>
void
elementwise_sub_relu_broadcast
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
batch
,
int
channels
,
int
num
)
{
float32x4_t
vzero
=
vdupq_n_f32
(
0.
f
);
#pragma omp parallel for collapse(2)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
const
float
*
din_ptr
=
dinx
+
offset
;
const
float
diny_data
=
diny
[
j
];
float
*
dout_ptr
=
dout
+
offset
;
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
float32x4_t
rb
=
vdupq_n_f32
(
diny_data
);
for
(
int
k
=
0
;
k
<
cnt
;
++
k
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
float32x4_t
din2
=
vld1q_f32
(
din_ptr
+
8
);
float32x4_t
din3
=
vld1q_f32
(
din_ptr
+
12
);
din0
=
vsubq_f32
(
din0
,
rb
);
din1
=
vsubq_f32
(
din1
,
rb
);
din2
=
vsubq_f32
(
din2
,
rb
);
din3
=
vsubq_f32
(
din3
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
din1
=
vmaxq_f32
(
din1
,
vzero
);
din2
=
vmaxq_f32
(
din2
,
vzero
);
din3
=
vmaxq_f32
(
din3
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
vst1q_f32
(
dout_ptr
+
8
,
din2
);
vst1q_f32
(
dout_ptr
+
12
,
din3
);
din_ptr
+=
16
;
dout_ptr
+=
16
;
}
if
(
remain
>=
8
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
din0
=
vsubq_f32
(
din0
,
rb
);
din1
=
vsubq_f32
(
din1
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
din1
=
vmaxq_f32
(
din1
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
din_ptr
+=
8
;
dout_ptr
+=
8
;
remain
-=
8
;
}
if
(
remain
>=
4
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
din0
=
vsubq_f32
(
din0
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
din_ptr
+=
4
;
dout_ptr
+=
4
;
remain
-=
4
;
}
if
(
remain
>
0
)
{
for
(
int
p
=
0
;
p
<
remain
;
p
++
)
{
float
tmp
=
*
din_ptr
-
diny_data
;
*
dout_ptr
=
tmp
>
0.
f
?
tmp
:
0.
f
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
}
template
<
>
void
elementwise_mul
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
...
...
lite/backends/arm/math/elementwise.h
浏览文件 @
62ea82d0
...
...
@@ -33,6 +33,20 @@ template <typename T>
void
elementwise_add_relu_broadcast
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
int
channels
,
int
num
);
template
<
typename
T
>
void
elementwise_sub
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
template
<
typename
T
>
void
elementwise_sub_relu
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
template
<
typename
T
>
void
elementwise_sub_broadcast
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
int
channels
,
int
num
);
template
<
typename
T
>
void
elementwise_sub_relu_broadcast
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
int
channels
,
int
num
);
template
<
typename
T
>
void
elementwise_mul
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
...
...
lite/kernels/arm/argmax_compute.cc
浏览文件 @
62ea82d0
...
...
@@ -40,8 +40,12 @@ void ArgmaxCompute::Run() {
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
argmax
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ArgmaxCompute
,
def
)
REGISTER_LITE_KERNEL
(
arg_max
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ArgmaxCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
lite/kernels/arm/argmax_compute_test.cc
浏览文件 @
62ea82d0
...
...
@@ -68,7 +68,7 @@ void argmax_compute_ref(const operators::ArgmaxParam& param) {
TEST
(
argmax_arm
,
retrive_op
)
{
auto
argmax
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"argmax"
);
"arg
_
max"
);
ASSERT_FALSE
(
argmax
.
empty
());
ASSERT_TRUE
(
argmax
.
front
());
}
...
...
@@ -136,4 +136,4 @@ TEST(argmax_arm, compute) {
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
argmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
arg
_
max
,
kARM
,
kFloat
,
kNCHW
,
def
);
lite/kernels/arm/elementwise_compute.cc
浏览文件 @
62ea82d0
...
...
@@ -116,6 +116,51 @@ void ElementwiseAddActivationCompute::Run() {
}
}
void
ElementwiseSubCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
const
float
*
y_data
=
param
.
Y
->
data
<
float
>
();
float
*
out_data
=
param
.
Out
->
mutable_data
<
float
>
();
int
axis
=
param
.
axis
;
auto
x_dims
=
param
.
X
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
int
pre
,
n
,
post
;
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
lite
::
arm
::
math
::
elementwise_sub_broadcast
(
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
);
}
else
{
lite
::
arm
::
math
::
elementwise_sub
(
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
}
}
void
ElementwiseSubActivationCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
FusionElementwiseActivationParam
>
();
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
const
float
*
y_data
=
param
.
Y
->
data
<
float
>
();
float
*
out_data
=
param
.
Out
->
mutable_data
<
float
>
();
int
axis
=
param
.
axis
;
std
::
string
act_type
=
param
.
act_type
;
auto
x_dims
=
param
.
X
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
int
pre
,
n
,
post
;
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
if
(
act_type
==
"relu"
)
{
lite
::
arm
::
math
::
elementwise_sub_relu_broadcast
(
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
);
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type
;
}
}
else
{
if
(
act_type
==
"relu"
)
{
lite
::
arm
::
math
::
elementwise_sub_relu
(
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type
;
}
}
}
void
ElementwiseMulCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
...
...
@@ -249,10 +294,6 @@ void ElementwiseDivActivationCompute::Run() {
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type
;
}
}
for
(
int
i
=
0
;
i
<
x_dims
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"x:"
<<
x_data
[
i
]
<<
" y:"
<<
y_data
[
i
]
<<
" out:"
<<
out_data
[
i
];
}
}
}
// namespace arm
...
...
@@ -283,6 +324,29 @@ REGISTER_LITE_KERNEL(
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_sub
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ElementwiseSubCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_sub_activation
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ElementwiseSubActivationCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_mul
,
kARM
,
kFloat
,
...
...
lite/kernels/arm/elementwise_compute.h
浏览文件 @
62ea82d0
...
...
@@ -38,6 +38,22 @@ class ElementwiseAddActivationCompute
virtual
~
ElementwiseAddActivationCompute
()
=
default
;
};
class
ElementwiseSubCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
;
virtual
~
ElementwiseSubCompute
()
=
default
;
};
class
ElementwiseSubActivationCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
;
virtual
~
ElementwiseSubActivationCompute
()
=
default
;
};
class
ElementwiseMulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
...
...
lite/operators/argmax_op.cc
浏览文件 @
62ea82d0
...
...
@@ -50,7 +50,7 @@ bool ArgmaxOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
param_
.
X
=
scope
->
FindVar
(
x
)
->
GetMutable
<
lite
::
Tensor
>
();
param_
.
Out
=
scope
->
FindVar
(
out
)
->
GetMutable
<
lite
::
Tensor
>
();
param_
.
Axis
=
op_desc
.
GetAttr
<
int
>
(
"A
xis"
);
param_
.
Axis
=
op_desc
.
GetAttr
<
int
64_t
>
(
"a
xis"
);
return
true
;
}
...
...
@@ -59,4 +59,4 @@ bool ArgmaxOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
argmax
,
paddle
::
lite
::
operators
::
ArgmaxOpLite
);
REGISTER_LITE_OP
(
arg
_
max
,
paddle
::
lite
::
operators
::
ArgmaxOpLite
);
lite/operators/op_params.h
浏览文件 @
62ea82d0
...
...
@@ -761,7 +761,6 @@ struct GenerateProposalsParam {
lite
::
Tensor
*
RpnRois
{};
lite
::
Tensor
*
RpnRoiProbs
{};
};
/// ----------------------- shape operators ----------------------
/// ----------------------- squeeze operators ----------------------
struct
SqueezeParam
{
const
lite
::
Tensor
*
X
{};
...
...
lite/tests/kernels/argmax_compute_test.cc
浏览文件 @
62ea82d0
...
...
@@ -25,7 +25,7 @@ class ArgmaxComputeTester : public arena::TestCase {
// common attributes for this op.
std
::
string
input_
=
"x"
;
std
::
string
output_
=
"out"
;
int
axis_
=
0.
;
int
64_t
axis_
=
0.
;
DDim
dims_
{{
2
,
5
,
20
,
30
}};
public:
...
...
@@ -82,10 +82,10 @@ class ArgmaxComputeTester : public arena::TestCase {
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
"argmax"
);
op_desc
->
SetType
(
"arg
_
max"
);
op_desc
->
SetInput
(
"X"
,
{
input_
});
op_desc
->
SetOutput
(
"Out"
,
{
output_
});
op_desc
->
SetAttr
(
"
A
xis"
,
axis_
);
op_desc
->
SetAttr
(
"
a
xis"
,
axis_
);
}
void
PrepareData
()
override
{
...
...
lite/tests/kernels/elementwise_compute_test.cc
浏览文件 @
62ea82d0
...
...
@@ -71,6 +71,57 @@ class ElementwiseComputeTester : public arena::TestCase {
}
};
class
ElementwiseSubComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
std
::
string
inputx_
=
"x"
;
std
::
string
inputy_
=
"y"
;
std
::
string
output_
=
"out"
;
int
axis_
;
DDim
dims_
{{
1
,
2
,
3
,
4
}};
public:
ElementwiseSubComputeTester
(
const
Place
&
place
,
const
std
::
string
&
alias
,
int
axis
)
:
TestCase
(
place
,
alias
),
axis_
(
axis
)
{}
void
RunBaseline
(
Scope
*
scope
)
override
{
auto
*
out
=
scope
->
NewTensor
(
output_
);
CHECK
(
out
);
out
->
Resize
(
dims_
);
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
x
=
scope
->
FindTensor
(
inputx_
);
const
auto
*
x_data
=
x
->
data
<
float
>
();
auto
*
y
=
scope
->
FindTensor
(
inputy_
);
const
auto
*
y_data
=
x
->
data
<
float
>
();
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
out_data
[
i
]
=
x_data
[
i
]
-
y_data
[
i
];
}
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
"elementwise_sub"
);
op_desc
->
SetInput
(
"X"
,
{
inputx_
});
op_desc
->
SetInput
(
"Y"
,
{
inputy_
});
op_desc
->
SetOutput
(
"Out"
,
{
output_
});
op_desc
->
SetAttr
(
"axis"
,
axis_
);
}
void
PrepareData
()
override
{
std
::
vector
<
float
>
data
(
dims_
.
production
());
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
data
[
i
]
=
i
*
1.1
;
}
SetCommonTensor
(
inputx_
,
dims_
,
data
.
data
());
SetCommonTensor
(
inputy_
,
dims_
,
data
.
data
());
}
};
class
ElementwiseMulComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
...
...
@@ -232,6 +283,65 @@ class FusionElementwiseAddActivationComputeTester : public arena::TestCase {
}
};
class
FusionElementwiseSubActivationComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
std
::
string
inputx_
=
"x"
;
std
::
string
inputy_
=
"y"
;
std
::
string
output_
=
"out"
;
int
axis_
;
std
::
string
act_type_
;
DDim
dims_
{{
1
,
2
,
3
,
4
}};
public:
FusionElementwiseSubActivationComputeTester
(
const
Place
&
place
,
const
std
::
string
&
alias
,
int
axis
,
std
::
string
act_type
)
:
TestCase
(
place
,
alias
),
axis_
(
axis
),
act_type_
(
act_type
)
{}
void
RunBaseline
(
Scope
*
scope
)
override
{
auto
*
out
=
scope
->
NewTensor
(
output_
);
CHECK
(
out
);
out
->
Resize
(
dims_
);
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
x
=
scope
->
FindTensor
(
inputx_
);
const
auto
*
x_data
=
x
->
data
<
float
>
();
auto
*
y
=
scope
->
FindTensor
(
inputy_
);
const
auto
*
y_data
=
x
->
data
<
float
>
();
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
out_data
[
i
]
=
x_data
[
i
]
-
y_data
[
i
];
if
(
act_type_
==
"relu"
)
{
out_data
[
i
]
=
out_data
[
i
]
>
0
?
out_data
[
i
]
:
0
;
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type_
;
}
}
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
"fusion_elementwise_sub_activation"
);
op_desc
->
SetInput
(
"X"
,
{
inputx_
});
op_desc
->
SetInput
(
"Y"
,
{
inputy_
});
op_desc
->
SetOutput
(
"Out"
,
{
output_
});
op_desc
->
SetAttr
(
"axis"
,
axis_
);
op_desc
->
SetAttr
(
"act_type"
,
act_type_
);
}
void
PrepareData
()
override
{
std
::
vector
<
float
>
data
(
dims_
.
production
());
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
data
[
i
]
=
i
*
1.1
;
}
SetCommonTensor
(
inputx_
,
dims_
,
data
.
data
());
SetCommonTensor
(
inputy_
,
dims_
,
data
.
data
());
}
};
class
FusionElementwiseMulActivationComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
...
...
@@ -441,7 +551,6 @@ class FusionElementwiseDivActivationComputeTester : public arena::TestCase {
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type_
;
}
LOG
(
INFO
)
<<
"fusion div resul:"
<<
out_data
[
i
];
}
}
...
...
@@ -476,6 +585,11 @@ void test_elementwise(Place place) {
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
tester_sub
(
new
ElementwiseSubComputeTester
(
place
,
"def"
,
axis
));
arena
::
Arena
arena_sub
(
std
::
move
(
tester_sub
),
place
,
2e-5
);
arena_sub
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
tester_mul
(
new
ElementwiseMulComputeTester
(
place
,
"def"
,
axis
));
arena
::
Arena
arena_mul
(
std
::
move
(
tester_mul
),
place
,
2e-5
);
...
...
@@ -511,6 +625,12 @@ void test_fusion_elementwise(Place place) {
arena
::
Arena
arena_add_act
(
std
::
move
(
tester_add_act
),
place
,
2e-5
);
arena_add_act
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
tester_sub_act
(
new
FusionElementwiseSubActivationComputeTester
(
place
,
"def"
,
axis
,
"relu"
));
arena
::
Arena
arena_sub_act
(
std
::
move
(
tester_sub_act
),
place
,
2e-5
);
arena_sub_act
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
tester_mul_act
(
new
FusionElementwiseMulActivationComputeTester
(
place
,
"def"
,
axis
,
"relu"
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录