Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a3d56a9c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a3d56a9c
编写于
5月 01, 2022
作者:
L
Lijunhui
提交者:
GitHub
5月 01, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[KP] Complete registry of elementwise ops on XPU with KP (#42056)
上级
ba486c5e
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
82 addition
and
15 deletion
+82
-15
paddle/fluid/framework/new_executor/standalone_executor_test.cc
.../fluid/framework/new_executor/standalone_executor_test.cc
+2
-1
paddle/fluid/operators/reduce_ops/reduce_amax_op.cu
paddle/fluid/operators/reduce_ops/reduce_amax_op.cu
+1
-0
paddle/fluid/operators/reduce_ops/reduce_amin_op.cu
paddle/fluid/operators/reduce_ops/reduce_amin_op.cu
+1
-0
paddle/fluid/operators/reduce_ops/reduce_op.h
paddle/fluid/operators/reduce_ops/reduce_op.h
+9
-4
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
+4
-0
paddle/phi/kernels/elementwise_kernel.cc
paddle/phi/kernels/elementwise_kernel.cc
+4
-4
paddle/phi/kernels/funcs/elementwise_functor.h
paddle/phi/kernels/funcs/elementwise_functor.h
+7
-0
paddle/phi/kernels/kps/elementwise_add_kernel.cu
paddle/phi/kernels/kps/elementwise_add_kernel.cu
+1
-0
paddle/phi/kernels/kps/elementwise_divide_kernel.cu
paddle/phi/kernels/kps/elementwise_divide_kernel.cu
+1
-0
paddle/phi/kernels/kps/elementwise_kernel.cu
paddle/phi/kernels/kps/elementwise_kernel.cu
+41
-0
paddle/phi/kernels/kps/elementwise_multiply_kernel.cu
paddle/phi/kernels/kps/elementwise_multiply_kernel.cu
+1
-0
paddle/phi/kernels/kps/elementwise_subtract_kernel.cu
paddle/phi/kernels/kps/elementwise_subtract_kernel.cu
+1
-0
paddle/phi/kernels/kps/logical_kernel.cu
paddle/phi/kernels/kps/logical_kernel.cu
+3
-3
paddle/phi/kernels/primitive/functor_primitives_xpu2.h
paddle/phi/kernels/primitive/functor_primitives_xpu2.h
+6
-3
未找到文件。
paddle/fluid/framework/new_executor/standalone_executor_test.cc
浏览文件 @
a3d56a9c
...
...
@@ -74,11 +74,12 @@ PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL
(
multiply
,
KPS
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
multiply_grad
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
divide
,
KPS
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
maximum
,
GPU
,
ALL_LAYOUT
);
#ifdef PADDLE_WITH_XPU_KP
PD_DECLARE_KERNEL
(
max_raw
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
maximum
,
GPU
,
ALL_LAYOUT
);
#else
PD_DECLARE_KERNEL
(
max_raw
,
KPS
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
maximum
,
KPS
,
ALL_LAYOUT
);
#endif
PD_DECLARE_KERNEL
(
mean
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
mean_grad
,
GPU
,
ALL_LAYOUT
);
...
...
paddle/fluid/operators/reduce_ops/reduce_amax_op.cu
浏览文件 @
a3d56a9c
...
...
@@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
...
...
paddle/fluid/operators/reduce_ops/reduce_amin_op.cu
浏览文件 @
a3d56a9c
...
...
@@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
...
...
paddle/fluid/operators/reduce_ops/reduce_op.h
浏览文件 @
a3d56a9c
...
...
@@ -29,7 +29,7 @@ limitations under the License. */
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#if defined(__HIPCC__) || defined(__NVCC__)
#if defined(__HIPCC__) || defined(__NVCC__)
|| defined(__xpu__)
#include "paddle/phi/kernels/gpu/reduce.h"
#include "paddle/phi/kernels/gpu/reduce_grad.h"
#endif
...
...
@@ -613,7 +613,7 @@ If reduce_all is true, just reduce along all dimensions and output a scalar.
virtual
std
::
string
GetOpType
()
const
=
0
;
};
#if defined(__HIPCC__) || defined(__NVCC__)
#if defined(__HIPCC__) || defined(__NVCC__)
|| defined(__xpu__)
template
<
typename
T
,
template
<
typename
>
class
ReduceOp
,
template
<
typename
,
typename
>
class
TransformOp
>
class
ReduceCudaKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
@@ -626,9 +626,12 @@ class ReduceCudaKernel : public framework::OpKernel<T> {
auto
pt_out_dtype
=
paddle
::
framework
::
TransToPhiDataType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
out_dtype
));
std
::
vector
<
int
>
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
#ifdef PADDLE_WITH_XPU_KP
auto
&
dev_ctx
=
context
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
#else
auto
&
dev_ctx
=
context
.
cuda_device_context
();
#endif
if
(
out_dtype
>=
0
)
{
output
->
mutable_data
(
dev_ctx
.
GetPlace
(),
pt_out_dtype
);
}
else
{
...
...
@@ -642,6 +645,7 @@ class ReduceCudaKernel : public framework::OpKernel<T> {
}
};
#ifndef PADDLE_WITH_XPU_KP
template
<
typename
T
,
template
<
typename
,
typename
>
class
TransformOp
>
class
ReduceCudaGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -686,6 +690,7 @@ class ReduceCudaGradKernel : public framework::OpKernel<T> {
}
};
#endif
#endif
}
// namespace operators
}
// namespace paddle
...
...
paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h
浏览文件 @
a3d56a9c
...
...
@@ -42,6 +42,8 @@ XPUOpMap& get_kp_ops() {
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"elementwise_floordiv"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
())})},
{
"elementwise_pow"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
// activation op
{
"exp"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"hard_swish"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
...
...
@@ -105,6 +107,8 @@ XPUOpMap& get_kp_ops() {
{
"reduce_prod"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"reduce_all"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
())})},
{
"reduce_any"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
())})},
{
"reduce_amax"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"reduce_amin"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
};
return
s_xpu_kp_kernels
;
...
...
paddle/phi/kernels/elementwise_kernel.cc
浏览文件 @
a3d56a9c
...
...
@@ -103,7 +103,7 @@ PD_REGISTER_KERNEL(elementwise_pow,
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
maximum
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
MaximumKernel
,
float
,
...
...
@@ -113,7 +113,7 @@ PD_REGISTER_KERNEL(maximum,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
PD_REGISTER_KERNEL
(
minimum
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
MinimumKernel
,
float
,
...
...
@@ -125,9 +125,9 @@ PD_REGISTER_KERNEL(minimum,
PD_REGISTER_KERNEL
(
modulo
,
GPU
,
ALL_LAYOUT
,
phi
::
ModuloKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
floor_divide
,
GPU
,
ALL_LAYOUT
,
phi
::
FloorDivideKernel
,
int
,
int64_t
)
{}
floor_divide
,
KPS
,
ALL_LAYOUT
,
phi
::
FloorDivideKernel
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_pow
,
GPU
,
KPS
,
ALL_LAYOUT
,
phi
::
ElementwisePowKernel
,
float
,
...
...
paddle/phi/kernels/funcs/elementwise_functor.h
浏览文件 @
a3d56a9c
...
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/hostdevice.h"
#if defined(__xpu__)
#include <xpu/runtime.h>
#include "xpu/kernel/math_xpu2.h" //pow()
#endif
namespace
phi
{
namespace
funcs
{
...
...
@@ -573,6 +577,9 @@ struct ElementwisePowFunctor {
return
std
::
llrint
(
std
::
pow
(
static_cast
<
double
>
(
a
),
static_cast
<
double
>
(
b
)));
}
#endif
#ifdef PADDLE_WITH_XPU_KP
return
pow
(
a
,
b
);
#endif
return
std
::
pow
(
a
,
b
);
}
...
...
paddle/phi/kernels/kps/elementwise_add_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -36,6 +36,7 @@ void AddKernel(const Context& dev_ctx,
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
add
,
KPS
,
ALL_LAYOUT
,
phi
::
AddKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
add_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
AddRawKernel
,
float
)
{}
#else
...
...
paddle/phi/kernels/kps/elementwise_divide_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -37,6 +37,7 @@ void DivideKernel(const Context& dev_ctx,
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
divide
,
KPS
,
ALL_LAYOUT
,
phi
::
DivideKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
divide_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
DivideRawKernel
,
float
)
{}
#else
...
...
paddle/phi/kernels/kps/elementwise_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -24,24 +24,65 @@ namespace phi {
// Create the definition of Maximum
DEFINE_CUDA_ELEMENTWISE_OP
(
Maximum
)
template
<
typename
T
,
typename
Context
>
void
MaximumKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
int
axis
=
-
1
;
MaximumRawKernel
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
out
);
}
// Create the definition of Minimum
DEFINE_CUDA_ELEMENTWISE_OP
(
Minimum
)
template
<
typename
T
,
typename
Context
>
void
MinimumKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
int
axis
=
-
1
;
MinimumRawKernel
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
out
);
}
// Create the definition of Modulo
DEFINE_CUDA_ELEMENTWISE_OP
(
Modulo
)
// Create the definition of FloorDivide
DEFINE_CUDA_ELEMENTWISE_OP
(
FloorDivide
)
template
<
typename
T
,
typename
Context
>
void
FloorDivideKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
int
axis
=
-
1
;
FloorDivideRawKernel
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
out
);
}
// Create the definition of Pow
DEFINE_CUDA_ELEMENTWISE_OP
(
ElementwisePow
)
template
<
typename
T
,
typename
Context
>
void
ElementwisePowKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
int
axis
=
-
1
;
ElementwisePowRawKernel
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
out
);
}
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
maximum
,
KPS
,
ALL_LAYOUT
,
phi
::
MaximumKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
maximum_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MaximumRawKernel
,
float
)
{
}
PD_REGISTER_KERNEL
(
minimum
,
KPS
,
ALL_LAYOUT
,
phi
::
MinimumKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
minimum_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MinimumRawKernel
,
float
)
{
}
PD_REGISTER_KERNEL
(
floor_divide
,
KPS
,
ALL_LAYOUT
,
phi
::
FloorDivideKernel
,
int
)
{
}
PD_REGISTER_KERNEL
(
floor_divide_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
FloorDivideRawKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
elementwise_pow
,
KPS
,
ALL_LAYOUT
,
phi
::
ElementwisePowKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
elementwise_pow_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
ElementwisePowRawKernel
,
float
)
{
}
#else
using
float16
=
phi
::
dtype
::
float16
;
...
...
paddle/phi/kernels/kps/elementwise_multiply_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -37,6 +37,7 @@ void MultiplyKernel(const Context& dev_ctx,
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
multiply
,
KPS
,
ALL_LAYOUT
,
phi
::
MultiplyKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
multiply_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
MultiplyRawKernel
,
float
)
{}
#else
...
...
paddle/phi/kernels/kps/elementwise_subtract_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -37,6 +37,7 @@ void SubtractKernel(const Context& dev_ctx,
}
// namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
subtract
,
KPS
,
ALL_LAYOUT
,
phi
::
SubtractKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
subtract_raw
,
KPS
,
ALL_LAYOUT
,
phi
::
SubtractRawKernel
,
float
)
{}
#else
...
...
paddle/phi/kernels/kps/logical_kernel.cu
浏览文件 @
a3d56a9c
...
...
@@ -65,9 +65,9 @@ void LogicalNotKernel(const Context& dev_ctx,
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL
(
logical_and
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalAndKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
O
r
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalOrKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
N
ot
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalNotKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
X
or
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalXorKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
o
r
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalOrKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
n
ot
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalNotKernel
,
int
)
{}
PD_REGISTER_KERNEL
(
logical_
x
or
,
KPS
,
ALL_LAYOUT
,
phi
::
LogicalXorKernel
,
int
)
{}
#else
#define REGISTER_LOGICAL_CUDA_KERNEL(logical_and, func_type) \
PD_REGISTER_KERNEL(logical_and, \
...
...
paddle/phi/kernels/primitive/functor_primitives_xpu2.h
100755 → 100644
浏览文件 @
a3d56a9c
...
...
@@ -124,7 +124,8 @@ struct MaxFunctor {
*/
template
<
typename
T
>
struct
AddFunctor
{
inline
T
initial
()
{
return
static_cast
<
T
>
(
0.0
f
);
}
inline
T
initial
()
{
/*return static_cast<T>(0.0f);*/
}
__device__
T
operator
()(
const
T
a
,
const
T
b
)
const
{
return
b
+
a
;
}
};
...
...
@@ -134,7 +135,8 @@ struct AddFunctor {
*/
template
<
typename
T
>
struct
MulFunctor
{
inline
T
initial
()
{
return
static_cast
<
T
>
(
1.0
f
);
}
inline
T
initial
()
{
/*return static_cast<T>(1.0f);*/
}
__device__
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
b
*
a
;
}
};
...
...
@@ -144,7 +146,8 @@ struct MulFunctor {
*/
template
<
typename
T
>
struct
LogicalOrFunctor
{
inline
T
initial
()
{
return
static_cast
<
T
>
(
false
);
}
inline
T
initial
()
{
/*return static_cast<T>(false);*/
}
__device__
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
b
||
a
;
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录