Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
fbe2c311
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fbe2c311
编写于
4月 14, 2022
作者:
L
Lijunhui
提交者:
GitHub
4月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[KP] Add registry for elementwise_add/max/min/sub/div/mul/floordiv on XPU2 with KP lib (#41494)
* regist elementwise_xxx
上级
4733fe60
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
51 addition
and
14 deletion
+51
-14
paddle/fluid/framework/new_executor/standalone_executor_test.cc
.../fluid/framework/new_executor/standalone_executor_test.cc
+4
-0
paddle/fluid/operators/elementwise/elementwise_add_op.kps
paddle/fluid/operators/elementwise/elementwise_add_op.kps
+1
-1
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
+12
-0
paddle/phi/kernels/funcs/elementwise_functor.h
paddle/phi/kernels/funcs/elementwise_functor.h
+4
-0
paddle/phi/kernels/impl/elementwise_kernel_impl.h
paddle/phi/kernels/impl/elementwise_kernel_impl.h
+1
-1
paddle/phi/kernels/kps/elementwise_kernel.cu
paddle/phi/kernels/kps/elementwise_kernel.cu
+29
-12
未找到文件。
paddle/fluid/framework/new_executor/standalone_executor_test.cc
浏览文件 @
fbe2c311
...
...
@@ -69,7 +69,11 @@ PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL
(
concat
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
concat_grad
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
matmul
,
GPU
,
ALL_LAYOUT
);
#ifdef PADDLE_WITH_XPU_KP
PD_DECLARE_KERNEL
(
add_raw
,
GPU
,
ALL_LAYOUT
);
#else
PD_DECLARE_KERNEL
(
add_raw
,
KPS
,
ALL_LAYOUT
);
#endif
PD_DECLARE_KERNEL
(
add
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
mean
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
sigmoid
,
GPU
,
ALL_LAYOUT
);
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.kps
浏览文件 @
fbe2c311
...
...
@@ -58,4 +58,4 @@ REGISTER_OP_CUDA_KERNEL(
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::bfloat16>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::complex<float>>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::complex<double>>);
#endif
\ No newline at end of file
#endif
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
浏览文件 @
fbe2c311
...
...
@@ -30,6 +30,18 @@ XPUOpMap& get_kp_ops() {
static
XPUOpMap
s_xpu_kp_kernels
{
{
"elementwise_add"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_div"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_sub"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_max"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_min"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_mul"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_floordiv"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
())})},
// activation op
{
"exp"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"hard_swish"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
...
...
paddle/phi/kernels/funcs/elementwise_functor.h
浏览文件 @
fbe2c311
...
...
@@ -542,7 +542,9 @@ struct InverseModuloFunctor<
template
<
typename
T
>
struct
FloorDivideFunctor
{
inline
HOSTDEVICE
T
operator
()(
const
T
a
,
const
T
b
)
const
{
#ifndef PADDLE_WITH_XPU_KP
PADDLE_ENFORCE
(
b
!=
0
,
DIV_ERROR_INFO
);
#endif
return
static_cast
<
T
>
(
std
::
trunc
(
a
/
b
));
}
};
...
...
@@ -550,7 +552,9 @@ struct FloorDivideFunctor {
template
<
typename
T
>
struct
InverseFloorDivideFunctor
{
inline
HOSTDEVICE
T
operator
()(
const
T
a
,
const
T
b
)
const
{
#ifndef PADDLE_WITH_XPU_KP
PADDLE_ENFORCE
(
a
!=
0
,
DIV_ERROR_INFO
);
#endif
return
static_cast
<
T
>
(
std
::
trunc
(
b
/
a
));
}
};
...
...
paddle/phi/kernels/impl/elementwise_kernel_impl.h
浏览文件 @
fbe2c311
...
...
@@ -17,7 +17,7 @@
#include "paddle/phi/kernels/elementwise_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
|| defined(__xpu__)
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#endif
...
...
paddle/phi/kernels/
gpu
/elementwise_kernel.cu
→
paddle/phi/kernels/
kps
/elementwise_kernel.cu
浏览文件 @
fbe2c311
...
...
@@ -13,8 +13,10 @@
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
...
...
@@ -40,7 +42,6 @@ namespace phi {
/**
* Kernels
*/
// Create the definition of Add
DEFINE_CUDA_ELEMENTWISE_OP
(
Add
)
// Create the definition of Subtract
...
...
@@ -62,19 +63,34 @@ DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow)
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
add_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
AddRawKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
subtract_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
SubtractRawKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
divide_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
DivideRawKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
multiply_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MultiplyRawKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
maximum_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MaximumRawKernel
,
float
)
{
}
PD_REGISTER_KERNEL
(
minimum_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MinimumRawKernel
,
float
)
{
}
PD_REGISTER_KERNEL
(
floor_divide_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
FloorDivideRawKernel
,
int
)
{}
#else
using
float16
=
phi
::
dtype
::
float16
;
using
bfloat16
=
phi
::
dtype
::
bfloat16
;
using
complex64
=
::
phi
::
dtype
::
complex
<
float
>
;
using
complex128
=
::
phi
::
dtype
::
complex
<
double
>
;
PD_REGISTER_KERNEL
(
fmax
,
GPU
,
ALL_LAYOUT
,
phi
::
FMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
fmax
,
KPS
,
ALL_LAYOUT
,
phi
::
FMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
fmin
,
GPU
,
ALL_LAYOUT
,
phi
::
FMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
fmin
,
KPS
,
ALL_LAYOUT
,
phi
::
FMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
add_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
AddRawKernel
,
float
,
...
...
@@ -87,7 +103,7 @@ PD_REGISTER_KERNEL(add_raw,
complex64
,
complex128
)
{}
PD_REGISTER_KERNEL
(
subtract_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
SubtractRawKernel
,
float
,
...
...
@@ -100,7 +116,7 @@ PD_REGISTER_KERNEL(subtract_raw,
complex64
,
complex128
)
{}
PD_REGISTER_KERNEL
(
divide_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
DivideRawKernel
,
float
,
...
...
@@ -112,7 +128,7 @@ PD_REGISTER_KERNEL(divide_raw,
complex64
,
complex128
)
{}
PD_REGISTER_KERNEL
(
multiply_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
MultiplyRawKernel
,
float
,
...
...
@@ -125,7 +141,7 @@ PD_REGISTER_KERNEL(multiply_raw,
complex128
,
bfloat16
)
{}
PD_REGISTER_KERNEL
(
maximum_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
MaximumRawKernel
,
float
,
...
...
@@ -135,7 +151,7 @@ PD_REGISTER_KERNEL(maximum_raw,
float16
,
bfloat16
)
{}
PD_REGISTER_KERNEL
(
minimum_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
MinimumRawKernel
,
float
,
...
...
@@ -145,7 +161,7 @@ PD_REGISTER_KERNEL(minimum_raw,
float16
,
bfloat16
)
{}
PD_REGISTER_KERNEL
(
modulo_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
ModuloRawKernel
,
float
,
...
...
@@ -153,16 +169,17 @@ PD_REGISTER_KERNEL(modulo_raw,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
floor_divide_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
FloorDivideRawKernel
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_pow_raw
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
ElementwisePowRawKernel
,
float
,
double
,
int
,
int64_t
)
{}
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录