Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
194e5a76
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
194e5a76
编写于
9月 10, 2020
作者:
S
sunsetlh
提交者:
GitHub
9月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] fix bugs: __xpu__conv2d, activation, elementwise (#4278)
上级
d91fdbb5
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
341 addition
and
86 deletion
+341
-86
lite/core/mir/fusion/__xpu__conv2d_fuse_pass.cc
lite/core/mir/fusion/__xpu__conv2d_fuse_pass.cc
+2
-0
lite/kernels/xpu/__xpu__conv2d_compute.cc
lite/kernels/xpu/__xpu__conv2d_compute.cc
+3
-3
lite/kernels/xpu/activation_compute.cc
lite/kernels/xpu/activation_compute.cc
+49
-4
lite/kernels/xpu/activation_compute.h
lite/kernels/xpu/activation_compute.h
+19
-2
lite/kernels/xpu/elementwise_compute.cc
lite/kernels/xpu/elementwise_compute.cc
+266
-75
lite/operators/__xpu__conv2d_op.cc
lite/operators/__xpu__conv2d_op.cc
+1
-1
lite/operators/op_params.h
lite/operators/op_params.h
+1
-1
未找到文件。
lite/core/mir/fusion/__xpu__conv2d_fuse_pass.cc
浏览文件 @
194e5a76
...
...
@@ -244,6 +244,7 @@ class XPUConv2dBlock0Fuser : public FuseBase {
std
::
string
output_name
=
""
;
if
(
_with_relu
)
{
op_desc
.
SetAttr
(
"act_type"
,
std
::
string
{
"relu"
});
output_name
=
matched
.
at
(
"relu_out"
)
->
arg
()
->
name
;
}
else
{
output_name
=
matched
.
at
(
"bn_out"
)
->
arg
()
->
name
;
...
...
@@ -433,6 +434,7 @@ class XPUConv2dBlock1Fuser : public FuseBase {
TARGET
(
kXPU
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
scope
->
NewTensor
(
max_output_name
);
op_desc
.
SetOutput
(
"OutputMax"
,
{
max_output_name
});
op_desc
.
SetAttr
(
"act_type"
,
std
::
string
{
"relu"
});
auto
conv_op
=
LiteOpRegistry
::
Global
().
Create
(
"__xpu__conv2d"
);
auto
&
valid_places
=
conv_old
->
valid_places
();
...
...
lite/kernels/xpu/__xpu__conv2d_compute.cc
浏览文件 @
194e5a76
...
...
@@ -48,8 +48,9 @@ void XPUConv2dCompute::Run() {
std
::
string
filter_type
=
param
.
filter_type
;
int
groups
=
param
.
groups
;
int
act_type
=
(
param
.
act_type
==
-
1
)
?
xdnn
::
Activation_t
::
RELU
:
param
.
act_type
;
// -1 means not init
int
act_type
=
(
param
.
act_type
==
"relu"
)
?
xdnn
::
Activation_t
::
RELU
:
xdnn
::
Activation_t
::
LINEAR
;
// -1 means not init
const
auto
*
bias
=
param
.
Bias
?
param
.
Bias
->
data
<
float
>
()
:
nullptr
;
const
auto
*
branch
=
param
.
Branch
?
param
.
Branch
->
data
<
float
>
()
:
nullptr
;
const
float
*
input_max
=
...
...
@@ -60,7 +61,6 @@ void XPUConv2dCompute::Run() {
float
*
output
=
param
.
Output
->
mutable_data
<
float
>
(
TARGET
(
kXPU
));
// TODO(luohang): now support for resnet50 first
CHECK_EQ
(
act_type
,
xdnn
::
Activation_t
::
RELU
);
CHECK_EQ
(
groups
,
1
);
CHECK_EQ
(
filter_type
,
"int16"
);
...
...
lite/kernels/xpu/activation_compute.cc
浏览文件 @
194e5a76
...
...
@@ -73,6 +73,19 @@ void AbsCompute::Run() {
CHECK_EQ
(
r
,
0
);
}
void
ExpCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
int
r
=
xdnn
::
activation_forward
(
ctx
.
GetRawContext
(),
/* context */
xdnn
::
Activation_t
::
EXP
,
/* type */
param
.
X
->
numel
(),
/* len */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
/* y */
);
CHECK_EQ
(
r
,
0
);
}
void
SquareCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
...
...
@@ -86,6 +99,19 @@ void SquareCompute::Run() {
CHECK_EQ
(
r
,
0
);
}
void
ReciprocalCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
int
r
=
xdnn
::
activation_forward
(
ctx
.
GetRawContext
(),
/* context */
xdnn
::
Activation_t
::
RECIPROCAL
,
/* type */
param
.
X
->
numel
(),
/* len */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
/* y */
);
CHECK_EQ
(
r
,
0
);
}
void
SqrtCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
...
...
@@ -103,11 +129,14 @@ void PowCompute::Run() {
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
xdnn
::
Activation_t
act_type
(
xdnn
::
Activation_t
::
ACT_POW
);
act_type
.
pow_factor
=
param
.
factor
;
int
r
=
xdnn
::
activation_forward
(
ctx
.
GetRawContext
(),
/* context */
xdnn
::
Activation_t
::
ACT_POW
,
/* type */
param
.
X
->
numel
(),
/* len */
param
.
X
->
data
<
float
>
(),
/* x */
ctx
.
GetRawContext
(),
/* context */
act_type
,
/* type */
param
.
X
->
numel
(),
/* len */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
/* y */
);
CHECK_EQ
(
r
,
0
);
}
...
...
@@ -158,6 +187,12 @@ REGISTER_LITE_KERNEL(
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
exp
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
ExpCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
square
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
SquareCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
...
...
@@ -181,3 +216,13 @@ REGISTER_LITE_KERNEL(
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
reciprocal
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
ReciprocalCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
lite/kernels/xpu/activation_compute.h
浏览文件 @
194e5a76
...
...
@@ -13,7 +13,6 @@
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace
paddle
{
...
...
@@ -57,6 +56,15 @@ class AbsCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual
~
AbsCompute
()
=
default
;
};
class
ExpCompute
:
public
KernelLite
<
TARGET
(
kXPU
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
virtual
void
Run
();
virtual
~
ExpCompute
()
=
default
;
};
class
SquareCompute
:
public
KernelLite
<
TARGET
(
kXPU
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
...
...
@@ -66,6 +74,15 @@ class SquareCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual
~
SquareCompute
()
=
default
;
};
class
ReciprocalCompute
:
public
KernelLite
<
TARGET
(
kXPU
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
virtual
void
Run
();
virtual
~
ReciprocalCompute
()
=
default
;
};
class
SqrtCompute
:
public
KernelLite
<
TARGET
(
kXPU
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
...
...
@@ -77,7 +94,7 @@ class SqrtCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
class
PowCompute
:
public
KernelLite
<
TARGET
(
kXPU
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
Activation
Param
;
using
param_t
=
operators
::
Pow
Param
;
virtual
void
Run
();
...
...
lite/kernels/xpu/elementwise_compute.cc
浏览文件 @
194e5a76
...
...
@@ -13,8 +13,12 @@
// limitations under the License.
#include "lite/kernels/xpu/elementwise_compute.h"
#include <algorithm>
#include <functional>
#include <string>
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
namespace
paddle
{
...
...
@@ -22,113 +26,300 @@ namespace lite {
namespace
kernels
{
namespace
xpu
{
inline
DDim
TrimTrailingSingularDims
(
const
DDim
&
dims
)
{
// Remove trailing dimensions of size 1 for y
auto
actual_dims_size
=
dims
.
size
();
for
(;
actual_dims_size
!=
0
;
--
actual_dims_size
)
{
if
(
dims
[
actual_dims_size
-
1
]
!=
1
)
break
;
}
std
::
vector
<
int64_t
>
trim_dims
;
trim_dims
.
resize
(
actual_dims_size
);
for
(
int
i
=
0
;
i
<
actual_dims_size
;
++
i
)
{
trim_dims
[
i
]
=
dims
[
i
];
}
if
(
trim_dims
.
size
()
==
0
)
{
return
DDim
();
}
DDim
actual_dims
=
DDim
(
trim_dims
);
return
actual_dims
;
}
inline
void
GetMidDims
(
const
DDim
&
x_dims
,
const
DDim
&
y_dims
,
const
int
axis
,
int
*
pre
,
int
*
n
,
int
*
post
,
int
*
mid_flag
=
NULL
)
{
*
pre
=
1
;
*
n
=
1
;
*
post
=
1
;
if
(
mid_flag
!=
NULL
)
{
*
mid_flag
=
0
;
int
mid
=
0
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
(
*
pre
)
*=
x_dims
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
++
i
)
{
if
(
x_dims
[
i
+
axis
]
!=
y_dims
[
i
])
{
// only support single y_dims[i] = 1 now.
CHECK_EQ
(
*
mid_flag
,
0
)
<<
"Broadcast support y_dims with single 1."
;
CHECK_EQ
(
y_dims
[
i
],
1
)
<<
"Broadcast dimension mismatch."
;
// m*n*k m*1*k
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
(
*
pre
)
*=
y_dims
[
j
];
}
*
n
=
std
::
max
(
x_dims
[
i
+
axis
],
y_dims
[
i
]);
*
mid_flag
=
1
;
mid
=
i
;
break
;
}
(
*
n
)
*=
y_dims
[
i
];
}
if
(
*
mid_flag
)
{
for
(
int
i
=
mid
+
1
;
i
<
x_dims
.
size
();
++
i
)
{
(
*
post
)
*=
x_dims
[
i
];
}
}
else
{
for
(
int
i
=
axis
+
y_dims
.
size
();
i
<
x_dims
.
size
();
++
i
)
{
(
*
post
)
*=
x_dims
[
i
];
}
}
}
else
{
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
(
*
pre
)
*=
x_dims
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
++
i
)
{
CHECK_EQ
(
x_dims
[
i
+
axis
],
y_dims
[
i
])
<<
"Broadcast dimension mismatch."
;
(
*
n
)
*=
y_dims
[
i
];
}
for
(
int
i
=
axis
+
y_dims
.
size
();
i
<
x_dims
.
size
();
++
i
)
{
(
*
post
)
*=
x_dims
[
i
];
}
}
}
void
ElementwiseAddCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
auto
&
x_dims
=
param
.
X
->
dims
()
.
data
()
;
auto
&
x_dims
=
param
.
X
->
dims
();
auto
&
y_dims
=
param
.
Y
->
dims
();
int
axis
=
param
.
axis
;
if
(
param
.
axis
==
-
1
)
{
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
auto
y_dims_untrimed
=
y_dims
;
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
auto
y_dims_after_trailing
=
TrimTrailingSingularDims
(
y_dims_untrimed
);
axis
=
(
y_dims_after_trailing
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
;
GetMidDims
(
x_dims
,
y_dims_after_trailing
,
axis
,
&
pre
,
&
n
,
&
post
);
int
len
=
pre
*
n
*
post
;
float
*
y_broadcast
=
nullptr
;
if
(
post
==
1
)
{
int
r
=
xdnn
::
matrix_vector_add
(
ctx
.
GetRawContext
(),
param
.
X
->
data
<
float
>
(),
param
.
Y
->
data
<
float
>
(),
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
pre
,
n
);
CHECK_EQ
(
r
,
0
);
return
;
}
int
iter
=
std
::
accumulate
(
x_dims
.
begin
(),
x_dims
.
begin
()
+
axis
,
1
,
std
::
multiplies
<
int
>
());
int
stride
=
param
.
Y
->
numel
();
for
(
int
i
=
0
;
i
<
iter
;
++
i
)
{
const
float
*
x_ptr
=
param
.
X
->
data
<
float
>
()
+
i
*
stride
;
const
float
*
y_ptr
=
param
.
Y
->
data
<
float
>
();
float
*
o_ptr
=
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
+
i
*
stride
;
int
r
=
xdnn
::
elementwise_add
(
ctx
.
GetRawContext
(),
/* context */
x_ptr
,
/* x */
y_ptr
,
/* y */
o_ptr
,
/* z */
stride
/* len */
);
if
(
pre
!=
1
||
post
!=
1
)
{
XPUScratchPadGuard
y_broadcast_xpu_guard_
=
TargetWrapperXPU
::
MallocScratchPad
(
len
*
sizeof
(
float
),
false
/* use_l3 */
);
y_broadcast
=
reinterpret_cast
<
float
*>
(
y_broadcast_xpu_guard_
->
addr_
);
int
r
=
xdnn
::
broadcast_ew
(
ctx
.
GetRawContext
(),
param
.
Y
->
data
<
float
>
(),
y_broadcast
,
pre
,
n
,
post
,
xdnn
::
ElementwiseOp
::
ASSIGN
);
CHECK_EQ
(
r
,
0
);
r
=
xdnn
::
elementwise_add
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
y_broadcast
,
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
return
;
}
int
r
=
xdnn
::
elementwise_add
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Y
->
data
<
float
>
(),
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
}
void
Elementwise
Sub
Compute
::
Run
()
{
void
Elementwise
Mul
Compute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
auto
&
x_dims
=
param
.
X
->
dims
()
.
data
()
;
auto
&
x_dims
=
param
.
X
->
dims
();
auto
&
y_dims
=
param
.
Y
->
dims
();
int
axis
=
param
.
axis
;
if
(
param
.
axis
==
-
1
)
{
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
auto
y_dims_untrimed
=
y_dims
;
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
auto
y_dims_after_trailing
=
TrimTrailingSingularDims
(
y_dims_untrimed
);
axis
=
(
y_dims_after_trailing
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
;
GetMidDims
(
x_dims
,
y_dims_after_trailing
,
axis
,
&
pre
,
&
n
,
&
post
);
int
len
=
pre
*
n
*
post
;
float
*
y_broadcast
=
nullptr
;
if
(
post
==
1
)
{
int
r
=
xdnn
::
matrix_vector_mul
(
ctx
.
GetRawContext
(),
param
.
X
->
data
<
float
>
(),
param
.
Y
->
data
<
float
>
(),
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
pre
,
n
);
CHECK_EQ
(
r
,
0
);
return
;
}
i
nt
iter
=
std
::
accumulate
(
x_dims
.
begin
(),
x_dims
.
begin
()
+
axis
,
1
,
std
::
multiplies
<
int
>
());
int
stride
=
param
.
Y
->
numel
();
for
(
int
i
=
0
;
i
<
iter
;
++
i
)
{
const
float
*
x_ptr
=
param
.
X
->
data
<
float
>
()
+
i
*
stride
;
const
float
*
y_ptr
=
param
.
Y
->
data
<
float
>
();
float
*
o_ptr
=
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
+
i
*
stride
;
int
r
=
xdnn
::
elementwise_sub
(
ctx
.
GetRawContext
(),
/* context */
x_ptr
,
/* x */
y_ptr
,
/* y */
o_ptr
,
/* z */
stride
/* len */
);
i
f
(
pre
!=
1
||
post
!=
1
)
{
XPUScratchPadGuard
y_broadcast_xpu_guard_
=
TargetWrapperXPU
::
MallocScratchPad
(
len
*
sizeof
(
float
),
false
/* use_l3 */
);
y_broadcast
=
reinterpret_cast
<
float
*>
(
y_broadcast_xpu_guard_
->
addr_
);
int
r
=
xdnn
::
broadcast_ew
(
ctx
.
GetRawContext
(),
param
.
Y
->
data
<
float
>
(),
y_broadcast
,
pre
,
n
,
post
,
xdnn
::
ElementwiseOp
::
ASSIGN
);
CHECK_EQ
(
r
,
0
);
r
=
xdnn
::
elementwise_mul
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
y_broadcast
,
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
return
;
}
int
r
=
xdnn
::
elementwise_mul
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Y
->
data
<
float
>
(),
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
}
void
Elementwise
Div
Compute
::
Run
()
{
void
Elementwise
Sub
Compute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
auto
&
x_dims
=
param
.
X
->
dims
()
.
data
()
;
auto
&
x_dims
=
param
.
X
->
dims
();
auto
&
y_dims
=
param
.
Y
->
dims
();
int
axis
=
param
.
axis
;
if
(
param
.
axis
==
-
1
)
{
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
}
int
iter
=
std
::
accumulate
(
x_dims
.
begin
(),
x_dims
.
begin
()
+
axis
,
1
,
std
::
multiplies
<
int
>
());
int
stride
=
param
.
Y
->
numel
();
for
(
int
i
=
0
;
i
<
iter
;
++
i
)
{
const
float
*
x_ptr
=
param
.
X
->
data
<
float
>
()
+
i
*
stride
;
const
float
*
y_ptr
=
param
.
Y
->
data
<
float
>
();
float
*
o_ptr
=
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
+
i
*
stride
;
int
r
=
xdnn
::
elementwise_div
(
ctx
.
GetRawContext
(),
/* context */
x_ptr
,
/* x */
y_ptr
,
/* y */
o_ptr
,
/* z */
stride
/* len */
);
auto
y_dims_untrimed
=
y_dims
;
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
auto
y_dims_after_trailing
=
TrimTrailingSingularDims
(
y_dims_untrimed
);
axis
=
(
y_dims_after_trailing
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
;
GetMidDims
(
x_dims
,
y_dims_after_trailing
,
axis
,
&
pre
,
&
n
,
&
post
);
int
len
=
pre
*
n
*
post
;
float
*
y_broadcast
=
nullptr
;
if
(
len
!=
param
.
Y
->
numel
())
{
XPUScratchPadGuard
y_broadcast_xpu_guard_
=
TargetWrapperXPU
::
MallocScratchPad
(
len
*
sizeof
(
float
),
false
/* use_l3 */
);
y_broadcast
=
reinterpret_cast
<
float
*>
(
y_broadcast_xpu_guard_
->
addr_
);
int
r
=
xdnn
::
broadcast_ew
(
ctx
.
GetRawContext
(),
param
.
Y
->
data
<
float
>
(),
y_broadcast
,
pre
,
n
,
post
,
xdnn
::
ElementwiseOp
::
ASSIGN
);
CHECK_EQ
(
r
,
0
);
r
=
xdnn
::
elementwise_sub
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
y_broadcast
,
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
return
;
}
int
r
=
xdnn
::
elementwise_sub
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Y
->
data
<
float
>
(),
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
}
void
Elementwise
Mul
Compute
::
Run
()
{
void
Elementwise
Div
Compute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
As
<
XPUContext
>
();
auto
&
x_dims
=
param
.
X
->
dims
()
.
data
()
;
auto
&
x_dims
=
param
.
X
->
dims
();
auto
&
y_dims
=
param
.
Y
->
dims
();
int
axis
=
param
.
axis
;
if
(
param
.
axis
==
-
1
)
{
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
}
int
iter
=
std
::
accumulate
(
x_dims
.
begin
(),
x_dims
.
begin
()
+
axis
,
1
,
std
::
multiplies
<
int
>
());
int
stride
=
param
.
Y
->
numel
();
for
(
int
i
=
0
;
i
<
iter
;
++
i
)
{
const
float
*
x_ptr
=
param
.
X
->
data
<
float
>
()
+
i
*
stride
;
const
float
*
y_ptr
=
param
.
Y
->
data
<
float
>
();
float
*
o_ptr
=
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
))
+
i
*
stride
;
int
r
=
xdnn
::
elementwise_mul
(
ctx
.
GetRawContext
(),
/* context */
x_ptr
,
/* x */
y_ptr
,
/* y */
o_ptr
,
/* z */
stride
/* len */
);
auto
y_dims_untrimed
=
y_dims
;
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
auto
y_dims_after_trailing
=
TrimTrailingSingularDims
(
y_dims_untrimed
);
axis
=
(
y_dims_after_trailing
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
;
GetMidDims
(
x_dims
,
y_dims_after_trailing
,
axis
,
&
pre
,
&
n
,
&
post
);
int
len
=
pre
*
n
*
post
;
float
*
y_broadcast
=
nullptr
;
if
(
len
!=
param
.
Y
->
numel
())
{
XPUScratchPadGuard
y_broadcast_xpu_guard_
=
TargetWrapperXPU
::
MallocScratchPad
(
len
*
sizeof
(
float
),
false
/* use_l3 */
);
y_broadcast
=
reinterpret_cast
<
float
*>
(
y_broadcast_xpu_guard_
->
addr_
);
int
r
=
xdnn
::
broadcast_ew
(
ctx
.
GetRawContext
(),
param
.
Y
->
data
<
float
>
(),
y_broadcast
,
pre
,
n
,
post
,
xdnn
::
ElementwiseOp
::
ASSIGN
);
CHECK_EQ
(
r
,
0
);
r
=
xdnn
::
elementwise_div
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
y_broadcast
,
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
return
;
}
int
r
=
xdnn
::
elementwise_div
(
ctx
.
GetRawContext
(),
/* context */
param
.
X
->
data
<
float
>
(),
/* x */
param
.
Y
->
data
<
float
>
(),
/* y */
param
.
Out
->
mutable_data
<
float
>
(
TARGET
(
kXPU
)),
/* z */
len
);
CHECK_EQ
(
r
,
0
);
}
}
// namespace xpu
}
// namespace kernels
}
// namespace lite
...
...
@@ -145,33 +336,33 @@ REGISTER_LITE_KERNEL(elementwise_add,
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_
sub
,
REGISTER_LITE_KERNEL
(
elementwise_
mul
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Sub
Compute
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Mul
Compute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_
div
,
REGISTER_LITE_KERNEL
(
elementwise_
sub
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Div
Compute
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Sub
Compute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_
mul
,
REGISTER_LITE_KERNEL
(
elementwise_
div
,
kXPU
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Mul
Compute
,
paddle
::
lite
::
kernels
::
xpu
::
Elementwise
Div
Compute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kXPU
))})
...
...
lite/operators/__xpu__conv2d_op.cc
浏览文件 @
194e5a76
...
...
@@ -138,7 +138,7 @@ bool XPUConv2dOp::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
param_
.
dilations
=
std
::
make_shared
<
std
::
vector
<
int
>>
(
dilations
);
param_
.
groups
=
op_desc
.
GetAttr
<
int
>
(
"groups"
);
if
(
op_desc
.
HasAttr
(
"act_type"
))
{
param_
.
act_type
=
op_desc
.
GetAttr
<
int
>
(
"act_type"
);
param_
.
act_type
=
op_desc
.
GetAttr
<
std
::
string
>
(
"act_type"
);
}
if
(
op_desc
.
HasAttr
(
"filter_type"
))
{
...
...
lite/operators/op_params.h
浏览文件 @
194e5a76
...
...
@@ -1836,7 +1836,7 @@ struct XPUConv2dParam : ParamBase {
lite
::
Tensor
*
OutputMax
{
nullptr
};
int
groups
{
1
};
int
act_type
{
-
1
};
std
::
string
act_type
{
""
};
std
::
string
filter_type
{
""
};
std
::
vector
<
int
>
strides
;
std
::
shared_ptr
<
std
::
vector
<
int
>>
paddings
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录