Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5829069d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5829069d
编写于
9月 14, 2022
作者:
Y
ykkk2333
提交者:
GitHub
9月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] migrate reduce kernels to phi, test=kunlun (#45973)
上级
d7e74e63
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
423 addition
and
412 deletion
+423
-412
paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc
paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc
+0
-165
paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc
paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc
+0
-161
paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc
paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc
+0
-83
paddle/phi/kernels/reduce_max_kernel.cc
paddle/phi/kernels/reduce_max_kernel.cc
+5
-1
paddle/phi/kernels/reduce_mean_kernel.cc
paddle/phi/kernels/reduce_mean_kernel.cc
+5
-1
paddle/phi/kernels/reduce_prod_kernel.cc
paddle/phi/kernels/reduce_prod_kernel.cc
+5
-1
paddle/phi/kernels/xpu/reduce.h
paddle/phi/kernels/xpu/reduce.h
+81
-0
paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc
paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc
+113
-0
paddle/phi/kernels/xpu/reduce_max_kernel.cc
paddle/phi/kernels/xpu/reduce_max_kernel.cc
+43
-0
paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc
paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc
+85
-0
paddle/phi/kernels/xpu/reduce_mean_kernel.cc
paddle/phi/kernels/xpu/reduce_mean_kernel.cc
+43
-0
paddle/phi/kernels/xpu/reduce_prod_kernel.cc
paddle/phi/kernels/xpu/reduce_prod_kernel.cc
+43
-0
未找到文件。
paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc
已删除
100644 → 0
浏览文件 @
d7e74e63
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceMaxXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
XPUReduce
<
DeviceContext
,
T
>
(
context
,
xpu
::
reduce_max
<
T
>
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceMaxGradXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
x_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
in_dtype
=
context
.
Attr
<
int
>
(
"in_dtype"
);
PADDLE_ENFORCE_EQ
(
in_dtype
==
-
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"XPU only support in_dtype == -1 in reduce_sum_grad op."
));
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
out_data
=
out
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
auto
*
x_grad_data
=
x_grad
->
data
<
T
>
();
const
auto
&
input_dim_size
=
x
->
dims
().
size
();
std
::
vector
<
int
>
true_dims
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
{
true_dims
.
push_back
(
dims
[
i
]
+
input_dim_size
);
}
else
{
true_dims
.
push_back
(
dims
[
i
]);
}
}
std
::
vector
<
int
>
ydims
(
input_dim_size
);
std
::
vector
<
int
>
xdims
((
input_dim_size
));
std
::
set
<
int
>
dims_set
(
true_dims
.
begin
(),
true_dims
.
end
());
for
(
auto
i
=
0
;
i
<
input_dim_size
;
i
++
)
{
xdims
[
i
]
=
x
->
dims
()[
i
];
if
(
dims_set
.
find
(
i
)
!=
dims_set
.
end
()
||
reduce_all
)
{
ydims
[
i
]
=
1
;
}
else
{
ydims
[
i
]
=
x
->
dims
()[
i
];
}
}
T
*
brocast1
=
nullptr
;
T
*
brocast2
=
nullptr
;
bool
*
equal
=
nullptr
;
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
brocast1
),
x
->
numel
()
*
sizeof
(
T
)),
XPU_SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
equal
),
x
->
numel
()
*
sizeof
(
bool
)),
XPU_SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
brocast2
),
x
->
numel
()
*
sizeof
(
T
)),
XPU_SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
// step 1. brocast out and out_grad
int
r
=
xpu
::
broadcast
<
T
>
(
dev_ctx
.
x_context
(),
out_data
,
brocast1
,
ydims
,
xdims
);
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
platform
::
errors
::
External
(
"XPU broadcast in reduce_max_grad op return"
" wrong value[%d %s]."
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
broadcast
<
T
>
(
dev_ctx
.
x_context
(),
out_grad_data
,
brocast2
,
ydims
,
xdims
);
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
platform
::
errors
::
External
(
"XPU broadcast in reduce_max_grad op return"
" wrong value[%d %s]."
,
r
,
XPUAPIErrorMsg
[
r
]));
// step 2. comparse out_brocast and x
r
=
xpu
::
equal
<
T
>
(
dev_ctx
.
x_context
(),
x_data
,
brocast1
,
equal
,
x
->
numel
());
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
platform
::
errors
::
External
(
"XPU equal in reduce_max_grad "
"op return wrong value[%d %s]."
,
r
,
XPUAPIErrorMsg
[
r
]));
// step 3. get x_grad
r
=
xpu
::
constant
<
T
>
(
dev_ctx
.
x_context
(),
brocast1
,
x
->
numel
(),
0
);
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
platform
::
errors
::
External
(
"XPU constant in reduce_max_grad op return"
" wrong value[%d %s]."
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
select
<
T
>
(
dev_ctx
.
x_context
(),
equal
,
brocast2
,
brocast1
,
x_grad_data
,
xdims
,
xdims
);
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
platform
::
errors
::
External
(
"XPU select in reduce_max_grad op return"
" wrong value[%d %s]."
,
r
,
XPUAPIErrorMsg
[
r
]));
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
dev_ctx
.
Wait
();
}
xpu_free
(
brocast1
);
xpu_free
(
brocast2
);
xpu_free
(
equal
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_XPU_KERNEL
(
reduce_max
,
ops
::
ReduceMaxXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
REGISTER_OP_XPU_KERNEL
(
reduce_max_grad
,
ops
::
ReduceMaxGradXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
#endif
paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc
已删除
100644 → 0
浏览文件 @
d7e74e63
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceMeanXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
context
.
GetPlace
()),
true
,
platform
::
errors
::
Unavailable
(
"This kernel only runs on XPU."
));
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
std
::
vector
<
int
>
xdims
;
for
(
int
i
=
0
;
i
<
input
->
dims
().
size
();
i
++
)
{
xdims
.
push_back
(
input
->
dims
()[
i
]);
}
auto
rdims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
const
auto
&
input_dim_size
=
input
->
dims
().
size
();
std
::
vector
<
int
>
reduce_dims
;
if
(
reduce_all
)
{
for
(
size_t
i
=
0
;
i
<
xdims
.
size
();
i
++
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
i
));
}
}
else
{
for
(
size_t
i
=
0
;
i
<
rdims
.
size
();
++
i
)
{
if
(
rdims
[
i
]
<
0
)
{
reduce_dims
.
push_back
(
rdims
[
i
]
+
input_dim_size
);
}
else
{
reduce_dims
.
push_back
(
rdims
[
i
]);
}
}
}
int
r
=
xpu
::
reduce_mean
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
input
->
data
<
T
>
()),
reinterpret_cast
<
XPUType
*>
(
output
->
data
<
T
>
()),
xdims
,
reduce_dims
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU reduce_mean kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceMeanGradXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
XPUType
*
x_data
=
reinterpret_cast
<
XPUType
*>
(
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
const
XPUType
*
dy_data
=
reinterpret_cast
<
const
XPUType
*>
(
output_grad
->
data
<
T
>
());
bool
reduce_all
=
ctx
.
Attr
<
bool
>
(
"reduce_all"
);
auto
reduce_dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
keep_dim
=
ctx
.
Attr
<
bool
>
(
"keep_dim"
);
std
::
vector
<
int
>
xdims
;
for
(
int
i
=
0
;
i
<
input
->
dims
().
size
();
i
++
)
{
xdims
.
push_back
(
input
->
dims
()[
i
]);
}
std
::
vector
<
int
>
ydims
;
for
(
int
i
=
0
;
i
<
output_grad
->
dims
().
size
();
i
++
)
{
ydims
.
push_back
(
output_grad
->
dims
()[
i
]);
}
int
reduce_numel
=
1
;
if
(
reduce_all
)
{
reduce_dims
.
clear
();
for
(
size_t
d
=
0
;
d
<
xdims
.
size
();
++
d
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
for
(
auto
&
d
:
reduce_dims
)
{
if
(
d
<
0
)
{
d
=
d
+
xdims
.
size
();
}
reduce_numel
*=
xdims
[
d
];
}
if
(
keep_dim
!=
true
)
{
sort
(
reduce_dims
.
begin
(),
reduce_dims
.
end
());
for
(
auto
&
d
:
reduce_dims
)
{
ydims
.
insert
(
ydims
.
begin
()
+
d
,
1
);
}
}
float
val
=
1.0
f
/
static_cast
<
float
>
(
reduce_numel
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
constant
(
dev_ctx
.
x_context
(),
x_data
,
input
->
numel
(),
static_cast
<
XPUType
>
(
val
));
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU constant kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
broadcast_mul
(
dev_ctx
.
x_context
(),
x_data
,
dy_data
,
x_data
,
xdims
,
ydims
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU broadcast_mul kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_XPU_KERNEL
(
reduce_mean
,
ops
::
ReduceMeanXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
REGISTER_OP_XPU_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceMeanGradXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
#endif
paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc
已删除
100644 → 0
浏览文件 @
d7e74e63
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceProdXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
context
.
GetPlace
()),
true
,
platform
::
errors
::
Unavailable
(
"This kernel only runs on XPU."
));
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
std
::
vector
<
int
>
xdims
;
for
(
int
i
=
0
;
i
<
input
->
dims
().
size
();
i
++
)
{
xdims
.
push_back
(
input
->
dims
()[
i
]);
}
auto
rdims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
const
auto
&
input_dim_size
=
input
->
dims
().
size
();
std
::
vector
<
int
>
reduce_dims
;
if
(
reduce_all
)
{
for
(
size_t
i
=
0
;
i
<
xdims
.
size
();
i
++
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
i
));
}
}
else
{
for
(
size_t
i
=
0
;
i
<
rdims
.
size
();
++
i
)
{
if
(
rdims
[
i
]
<
0
)
{
reduce_dims
.
push_back
(
rdims
[
i
]
+
input_dim_size
);
}
else
{
reduce_dims
.
push_back
(
rdims
[
i
]);
}
}
}
int
r
=
xpu
::
reduce_prod
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
input
->
data
<
T
>
()),
reinterpret_cast
<
XPUType
*>
(
output
->
data
<
T
>
()),
xdims
,
reduce_dims
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU reduce_prod kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_XPU_KERNEL
(
reduce_prod
,
ops
::
ReduceProdXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
#endif
paddle/phi/kernels/reduce_max_kernel.cc
浏览文件 @
5829069d
...
...
@@ -42,7 +42,7 @@ PD_REGISTER_KERNEL(
max
,
GPU
,
ALL_LAYOUT
,
phi
::
MaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
#endif
#if defined(PADDLE_WITH_XPU_KP)
#if defined(PADDLE_WITH_XPU_KP)
&& !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
max
,
KPS
,
ALL_LAYOUT
,
phi
::
MaxKernel
,
float
)
{}
#endif
...
...
@@ -50,3 +50,7 @@ PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {}
PD_REGISTER_KERNEL
(
max
,
OneDNN
,
ALL_LAYOUT
,
phi
::
MaxKernel
,
float
,
phi
::
dtype
::
bfloat16
)
{}
#endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
max
,
XPU
,
ALL_LAYOUT
,
phi
::
MaxKernel
,
float
)
{}
#endif
paddle/phi/kernels/reduce_mean_kernel.cc
浏览文件 @
5829069d
...
...
@@ -47,7 +47,7 @@ PD_REGISTER_KERNEL(mean,
phi
::
dtype
::
float16
)
{}
#endif
#if defined(PADDLE_WITH_XPU_KP)
#if defined(PADDLE_WITH_XPU_KP)
&& !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
mean
,
KPS
,
ALL_LAYOUT
,
phi
::
MeanKernel
,
float
)
{}
#endif
...
...
@@ -55,3 +55,7 @@ PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {}
PD_REGISTER_KERNEL
(
mean
,
OneDNN
,
ALL_LAYOUT
,
phi
::
MeanKernel
,
float
,
phi
::
dtype
::
bfloat16
)
{}
#endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
mean
,
XPU
,
ALL_LAYOUT
,
phi
::
MeanKernel
,
float
)
{}
#endif
paddle/phi/kernels/reduce_prod_kernel.cc
浏览文件 @
5829069d
...
...
@@ -39,6 +39,10 @@ PD_REGISTER_KERNEL(
prod
,
GPU
,
ALL_LAYOUT
,
phi
::
ProdKernel
,
float
,
double
,
int
,
int64_t
)
{}
#endif
#if defined(PADDLE_WITH_XPU_KP)
#if defined(PADDLE_WITH_XPU_KP)
&& !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
prod
,
KPS
,
ALL_LAYOUT
,
phi
::
ProdKernel
,
float
)
{}
#endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL
(
prod
,
XPU
,
ALL_LAYOUT
,
phi
::
ProdKernel
,
float
)
{}
#endif
paddle/phi/kernels/xpu/reduce.h
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <vector>
namespace
phi
{
template
<
typename
Context
,
typename
T
>
int
XPUReduce
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
out
,
std
::
function
<
int
(
xpu
::
Context
*
,
const
T
*
,
T
*
,
const
std
::
vector
<
int
>&
,
const
std
::
vector
<
int
>&
)
>
func
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
const
auto
*
x_data
=
x
.
data
<
T
>
();
auto
*
y_data
=
out
->
data
<
T
>
();
const
auto
&
input_dim_size
=
x
.
dims
().
size
();
std
::
vector
<
int
>
true_dims
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
{
true_dims
.
push_back
(
dims
[
i
]
+
input_dim_size
);
}
else
{
true_dims
.
push_back
(
dims
[
i
]);
}
}
std
::
vector
<
int
>
reduce_dims
;
std
::
vector
<
int
>
xdims
((
input_dim_size
));
for
(
int
i
=
0
;
i
<
input_dim_size
;
++
i
)
{
xdims
[
i
]
=
x
.
dims
()[
i
];
}
if
(
reduce_all
)
{
for
(
int
i
=
0
;
i
<
input_dim_size
;
++
i
)
{
reduce_dims
.
push_back
(
i
);
}
}
else
{
std
::
set
<
int
>
dims_set
(
true_dims
.
begin
(),
true_dims
.
end
());
for
(
auto
i
=
0
;
i
<
input_dim_size
;
i
++
)
{
if
(
dims_set
.
find
(
i
)
!=
dims_set
.
end
())
{
if
(
x
.
dims
()[
i
]
!=
1
)
{
reduce_dims
.
push_back
(
i
);
}
}
}
}
int
r
=
xpu
::
SUCCESS
;
if
(
reduce_dims
.
size
()
==
0
)
{
r
=
xpu
::
copy
<
T
>
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
x
.
numel
()
*
sizeof
(
T
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
}
else
{
r
=
func
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
xdims
,
reduce_dims
);
}
return
r
;
}
}
// namespace phi
paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_max_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
ReduceMaxGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out_grad
,
const
IntArray
&
dims_arr
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
x_grad
)
{
auto
dims
=
dims_arr
.
GetData
();
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
out_data
=
out
.
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
.
data
<
T
>
();
auto
*
x_grad_data
=
x_grad
->
data
<
T
>
();
const
auto
&
input_dim_size
=
x
.
dims
().
size
();
std
::
vector
<
int
>
true_dims
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
{
true_dims
.
push_back
(
dims
[
i
]
+
input_dim_size
);
}
else
{
true_dims
.
push_back
(
dims
[
i
]);
}
}
std
::
vector
<
int
>
ydims
(
input_dim_size
);
std
::
vector
<
int
>
xdims
((
input_dim_size
));
std
::
set
<
int
>
dims_set
(
true_dims
.
begin
(),
true_dims
.
end
());
for
(
auto
i
=
0
;
i
<
input_dim_size
;
i
++
)
{
xdims
[
i
]
=
x
.
dims
()[
i
];
if
(
dims_set
.
find
(
i
)
!=
dims_set
.
end
()
||
reduce_all
)
{
ydims
[
i
]
=
1
;
}
else
{
ydims
[
i
]
=
x
.
dims
()[
i
];
}
}
T
*
brocast1
=
nullptr
;
T
*
brocast2
=
nullptr
;
bool
*
equal
=
nullptr
;
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
brocast1
),
x
.
numel
()
*
sizeof
(
T
)),
XPU_SUCCESS
,
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
equal
),
x
.
numel
()
*
sizeof
(
bool
)),
XPU_SUCCESS
,
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
brocast2
),
x
.
numel
()
*
sizeof
(
T
)),
XPU_SUCCESS
,
errors
::
ResourceExhausted
(
"XPU has no enough memory"
));
// step 1. brocast out and out_grad
int
r
=
xpu
::
broadcast
<
T
>
(
dev_ctx
.
x_context
(),
out_data
,
brocast1
,
ydims
,
xdims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"broadcast"
);
r
=
xpu
::
broadcast
<
T
>
(
dev_ctx
.
x_context
(),
out_grad_data
,
brocast2
,
ydims
,
xdims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"broadcast"
);
// step 2. comparse out_brocast and x
r
=
xpu
::
equal
<
T
>
(
dev_ctx
.
x_context
(),
x_data
,
brocast1
,
equal
,
x
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"equal"
);
// step 3. get x_grad
r
=
xpu
::
constant
<
T
>
(
dev_ctx
.
x_context
(),
brocast1
,
x
.
numel
(),
0
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant"
);
r
=
xpu
::
select
<
T
>
(
dev_ctx
.
x_context
(),
equal
,
brocast2
,
brocast1
,
x_grad_data
,
xdims
,
xdims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"select"
);
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
dev_ctx
.
Wait
();
}
xpu_free
(
brocast1
);
xpu_free
(
brocast2
);
xpu_free
(
equal
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
max_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
ReduceMaxGradKernel
,
float
)
{
}
paddle/phi/kernels/xpu/reduce_max_kernel.cc
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_max_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
MaxRawKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
IntArray
&
dims
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
out
)
{
int
r
=
XPUReduce
<
Context
,
T
>
(
dev_ctx
,
x
,
dims
.
GetData
(),
keep_dim
,
reduce_all
,
out
,
xpu
::
reduce_max
<
T
>
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"reduce_max"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
max_raw
,
XPU
,
ALL_LAYOUT
,
phi
::
MaxRawKernel
,
float
)
{}
paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_mean_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
ReduceMeanGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out_grad
,
const
IntArray
&
dims_arr
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
x_grad
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
const
XPUType
*
dy_data
=
reinterpret_cast
<
const
XPUType
*>
(
out_grad
.
data
<
T
>
());
XPUType
*
x_data
=
reinterpret_cast
<
XPUType
*>
(
x_grad
->
data
<
T
>
());
auto
reduce_dims
=
dims_arr
.
GetData
();
std
::
vector
<
int
>
xdims
;
for
(
int
i
=
0
;
i
<
x
.
dims
().
size
();
i
++
)
{
xdims
.
push_back
(
x
.
dims
()[
i
]);
}
std
::
vector
<
int
>
ydims
;
for
(
int
i
=
0
;
i
<
out_grad
.
dims
().
size
();
i
++
)
{
ydims
.
push_back
(
out_grad
.
dims
()[
i
]);
}
int
reduce_numel
=
1
;
if
(
reduce_all
)
{
reduce_dims
.
clear
();
for
(
size_t
d
=
0
;
d
<
xdims
.
size
();
++
d
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
for
(
auto
&
d
:
reduce_dims
)
{
if
(
d
<
0
)
{
d
=
d
+
xdims
.
size
();
}
reduce_numel
*=
xdims
[
d
];
}
if
(
keep_dim
!=
true
)
{
sort
(
reduce_dims
.
begin
(),
reduce_dims
.
end
());
for
(
auto
&
d
:
reduce_dims
)
{
ydims
.
insert
(
ydims
.
begin
()
+
d
,
1
);
}
}
float
val
=
1.0
f
/
static_cast
<
float
>
(
reduce_numel
);
int
r
=
xpu
::
constant
(
dev_ctx
.
x_context
(),
x_data
,
x
.
numel
(),
static_cast
<
XPUType
>
(
val
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant"
);
r
=
xpu
::
broadcast_mul
(
dev_ctx
.
x_context
(),
x_data
,
dy_data
,
x_data
,
xdims
,
ydims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"broadcast_mul"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
mean_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
ReduceMeanGradKernel
,
float
)
{}
paddle/phi/kernels/xpu/reduce_mean_kernel.cc
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_mean_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
MeanRawKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
IntArray
&
dims
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
out
)
{
int
r
=
XPUReduce
<
Context
,
T
>
(
dev_ctx
,
x
,
dims
.
GetData
(),
keep_dim
,
reduce_all
,
out
,
xpu
::
reduce_mean
<
T
>
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"reduce_mean"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
mean_raw
,
XPU
,
ALL_LAYOUT
,
phi
::
MeanRawKernel
,
float
)
{}
paddle/phi/kernels/xpu/reduce_prod_kernel.cc
0 → 100644
浏览文件 @
5829069d
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_prod_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
ProdRawKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
IntArray
&
dims
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
out
)
{
int
r
=
XPUReduce
<
Context
,
T
>
(
dev_ctx
,
x
,
dims
.
GetData
(),
keep_dim
,
reduce_all
,
out
,
xpu
::
reduce_prod
<
T
>
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"reduce_prod"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
prod_raw
,
XPU
,
ALL_LAYOUT
,
phi
::
ProdRawKernel
,
float
)
{}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录