Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7b828f71
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7b828f71
编写于
5月 11, 2022
作者:
T
taixiurong
提交者:
GitHub
5月 11, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove old XDNN implementation test=kunlun (#42404)
上级
a1abb7c9
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
459 addition
and
380 deletion
+459
-380
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+2
-2
paddle/fluid/framework/data_type_transform.cc
paddle/fluid/framework/data_type_transform.cc
+76
-1
paddle/fluid/operators/log_loss_op_xpu.cc
paddle/fluid/operators/log_loss_op_xpu.cc
+51
-42
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
+19
-61
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
+96
-86
paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc
paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc
+112
-103
paddle/fluid/operators/optimizers/sgd_op_xpu.cc
paddle/fluid/operators/optimizers/sgd_op_xpu.cc
+28
-31
paddle/fluid/platform/device/xpu/xpu1_op_list.h
paddle/fluid/platform/device/xpu/xpu1_op_list.h
+0
-5
paddle/fluid/platform/device/xpu/xpu2_op_list.h
paddle/fluid/platform/device/xpu/xpu2_op_list.h
+2
-0
python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
.../paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
+42
-31
python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
+31
-18
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
7b828f71
...
...
@@ -9,7 +9,7 @@ SET(XPU_RT_LIB_NAME "libxpurt.so")
if
(
NOT DEFINED XPU_BASE_URL
)
SET
(
XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev"
)
SET
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/20220
425
"
)
SET
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/20220
510
"
)
else
()
SET
(
XPU_BASE_URL
"
${
XPU_BASE_URL
}
"
)
endif
()
...
...
@@ -17,7 +17,7 @@ endif()
# ubuntu and centos: use output by XDNN API team
if
(
NOT DEFINED XPU_XDNN_BASE_URL
)
SET
(
XPU_XDNN_BASE_URL_WITHOUT_DATE
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev"
)
SET
(
XPU_XDNN_BASE_URL
"
${
XPU_XDNN_BASE_URL_WITHOUT_DATE
}
/20220
425
"
)
SET
(
XPU_XDNN_BASE_URL
"
${
XPU_XDNN_BASE_URL_WITHOUT_DATE
}
/20220
510
"
)
else
()
SET
(
XPU_XDNN_BASE_URL
"
${
XPU_XDNN_BASE_URL
}
"
)
endif
()
...
...
paddle/fluid/framework/data_type_transform.cc
浏览文件 @
7b828f71
...
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/transform.h"
#if defined(PADDLE_WITH_XPU)
#include "paddle/fluid/platform/device/device_wrapper.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -28,6 +32,49 @@ struct CastDataTypeFunctor {
}
};
#if defined(PADDLE_WITH_XPU)
template
<
typename
InType
,
typename
OutType
>
static
void
XPUCastData
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
platform
::
XPUDeviceContext
*
dev_ctx
)
{
using
XPUInTDType
=
typename
XPUTypeTrait
<
InType
>::
Type
;
using
XPUOutTDType
=
typename
XPUTypeTrait
<
OutType
>::
Type
;
int
r
=
xpu
::
cast_v2
<
XPUInTDType
,
XPUOutTDType
>
(
dev_ctx
->
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in
.
data
<
InType
>
()),
reinterpret_cast
<
XPUOutTDType
*>
(
out
->
mutable_data
<
OutType
>
(
in
.
place
())),
in
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast_v2"
);
dev_ctx
->
Wait
();
}
template
<
typename
InType
>
static
void
XPUTransDataType
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
dst_type
,
const
platform
::
DeviceContext
*
ctx
)
{
auto
*
context
=
static_cast
<
const
platform
::
XPUDeviceContext
*>
(
ctx
);
#define XPUCastCallback(cpp_type, proto_type) \
do { \
if (dst_type == proto_type) { \
XPUCastData<InType, cpp_type>(in, out, context); \
} \
} while (0)
if
(
dst_type
==
proto
::
VarType
::
FP32
&&
dst_type
==
proto
::
VarType
::
FP16
&&
dst_type
==
proto
::
VarType
::
BOOL
&&
dst_type
==
proto
::
VarType
::
INT16
&&
dst_type
==
proto
::
VarType
::
INT32
&&
dst_type
==
proto
::
VarType
::
INT64
)
{
_ForEachDataType_
(
XPUCastCallback
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Data type (%s) is not supported in XPU when casting data type."
,
DataTypeToString
(
dst_type
)));
}
}
#endif
template
<
typename
InType
>
struct
CastDataType
{
CastDataType
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
...
...
@@ -88,6 +135,34 @@ void TransDataType(const Tensor& in,
auto
dst_type
=
type
;
auto
ctx
=
pool
.
Get
(
in
.
place
());
#if defined(PADDLE_WITH_XPU)
switch
(
src_type
)
{
case
proto
::
VarType
::
FP16
:
XPUTransDataType
<
platform
::
float16
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
case
proto
::
VarType
::
FP32
:
XPUTransDataType
<
float
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
case
proto
::
VarType
::
BOOL
:
XPUTransDataType
<
bool
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
case
proto
::
VarType
::
INT16
:
XPUTransDataType
<
int16_t
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
case
proto
::
VarType
::
INT32
:
XPUTransDataType
<
int
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
case
proto
::
VarType
::
INT64
:
XPUTransDataType
<
int64_t
>
(
in
,
out
,
dst_type
,
ctx
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Data type (%s) is not supported in XPU when casting data type."
,
DataTypeToString
(
src_type
)));
}
#else
switch
(
src_type
)
{
case
proto
::
VarType
::
FP16
:
framework
::
VisitDataType
(
dst_type
,
...
...
@@ -123,6 +198,7 @@ void TransDataType(const Tensor& in,
"Data type (%s) is not supported when casting data type."
,
DataTypeToString
(
src_type
)));
}
#endif
}
void
TransComplexToReal
(
const
proto
::
VarType
::
Type
&
dst_type
,
...
...
@@ -131,7 +207,6 @@ void TransComplexToReal(const proto::VarType::Type& dst_type,
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
ctx
=
pool
.
Get
(
in
.
place
());
out
->
Resize
(
in
.
dims
());
// complex -> real
switch
(
src_type
)
{
case
proto
::
VarType
::
COMPLEX64
:
...
...
paddle/fluid/operators/log_loss_op_xpu.cc
浏览文件 @
7b828f71
...
...
@@ -21,58 +21,67 @@ template <typename DeviceContext, typename T, typename AttrType = T>
class
LogLossXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predicted"
);
auto
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Labels"
);
auto
*
loss
=
ctx
.
Output
<
Tensor
>
(
"Loss"
);
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
AttrType
>
(
"epsilon"
));
loss
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
n
=
predict
->
numel
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
log_loss_fwd
(
dev_ctx
.
x_context
(),
n
,
epsilon
,
predict
->
data
<
T
>
(),
labels
->
data
<
T
>
(),
loss
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
/*** TODO wait XDNN new interface
auto* predict = ctx.Input<Tensor>("Predicted");
auto* labels = ctx.Input<Tensor>("Labels");
auto* loss = ctx.Output<Tensor>("Loss");
auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon"));
loss->mutable_data<T>(ctx.GetPlace());
int n = predict->numel();
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r =
xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon,
predict->data<T>(),
labels->data<T>(), loss->data<T>());
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check
whether "
"Baidu Kunlun Card is properly installed.",
r));
***/
}
};
template
<
typename
DeviceContext
,
typename
T
,
typename
AttrType
=
T
>
class
LogLossGradXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predicted"
);
auto
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Labels"
);
auto
*
dloss
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
auto
*
dpred
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Predicted"
));
if
(
!
dpred
)
{
return
;
}
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
AttrType
>
(
"epsilon"
));
dpred
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
n
=
predict
->
numel
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
log_loss_bwd
(
dev_ctx
.
x_context
(),
n
,
epsilon
,
predict
->
data
<
T
>
(),
labels
->
data
<
T
>
(),
dloss
->
data
<
T
>
(),
dpred
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
/*** TODO wait XDNN new interface
auto* predict = ctx.Input<Tensor>("Predicted");
auto* labels = ctx.Input<Tensor>("Labels");
auto* dloss = ctx.Input<Tensor>(framework::GradVarName("Loss"));
auto* dpred = ctx.Output<Tensor>(framework::GradVarName("Predicted"));
if (!dpred) {
return;
}
auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon"));
dpred->mutable_data<T>(ctx.GetPlace());
int n = predict->numel();
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon,
predict->data<T>(), labels->data<T>(),
dloss->data<T>(), dpred->data<T>());
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check
whether "
"Baidu Kunlun Card is properly installed.",
r));
***/
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
log_loss
,
ops
::
LogLossXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
REGISTER_OP_XPU_KERNEL
(
log_loss_grad
,
ops
::
LogLossGradXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
// namespace ops = paddle::operators;
// REGISTER_OP_XPU_KERNEL(
// log_loss, ops::LogLossXPUKernel<paddle::platform::XPUDeviceContext,
// float>);
// REGISTER_OP_XPU_KERNEL(
// log_loss_grad,
// ops::LogLossGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
浏览文件 @
7b828f71
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/
xpu/xpu_head
er.h"
#include "paddle/fluid/platform/device/
device_wrapp
er.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -42,68 +42,26 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
if
(
num_samples
==
0
)
{
return
;
}
size_t
indices_int32_size
=
num_samples
*
class_dim
*
sizeof
(
int
);
size_t
indices_int64_size
=
num_samples
*
class_dim
*
sizeof
(
int64_t
);
size_t
label_int32_size
=
num_samples
*
sizeof
(
int
);
size_t
label_int64_size
=
num_samples
*
sizeof
(
int64_t
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
*
indices_int32_device
=
NULL
;
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
indices_int32_device
),
indices_int32_size
),
XPU_SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"
\n\n
Out of memory error on XPU, Cannot allocate %s memory"
" on XPU.
\n\n
Please check whether there is any other process "
"using XPU.
\n
"
,
string
::
HumanReadableSize
(
indices_int32_size
)));
int
*
label_int32_device
=
NULL
;
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
label_int32_device
),
label_int32_size
),
XPU_SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"
\n\n
Out of memory error on XPU, Cannot allocate %s memory"
" on XPU.
\n\n
Please check whether there is any other process "
"using XPU.
\n
"
,
string
::
HumanReadableSize
(
label_int32_size
)));
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
int
size
=
num_samples
*
class_dim
;
int
*
indices_int32_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
size
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
indices_int32_ptr
);
int
*
label_int32_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
size
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
label_int32_ptr
);
int
*
indices_int32_host
=
reinterpret_cast
<
int
*>
(
std
::
malloc
(
indices_int32_size
));
int64_t
*
indices_int64_host
=
reinterpret_cast
<
int64_t
*>
(
std
::
malloc
(
indices_int64_size
));
int
*
label_int32_host
=
reinterpret_cast
<
int
*>
(
std
::
malloc
(
label_int32_size
));
int64_t
*
label_int64_host
=
reinterpret_cast
<
int64_t
*>
(
std
::
malloc
(
label_int64_size
));
dev_ctx
.
Wait
();
memory
::
Copy
(
platform
::
CPUPlace
(),
indices_int64_host
,
ctx
.
GetPlace
(),
indices_data
,
indices_int64_size
);
memory
::
Copy
(
platform
::
CPUPlace
(),
label_int64_host
,
ctx
.
GetPlace
(),
label_data
,
label_int64_size
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
label_int32_host
[
i
]
=
label_int64_host
[
i
];
for
(
size_t
j
=
0
;
j
<
class_dim
;
++
j
)
{
indices_int32_host
[
i
*
class_dim
+
j
]
=
indices_int64_host
[
i
*
class_dim
+
j
];
}
}
memory
::
Copy
(
ctx
.
GetPlace
(),
indices_int32_device
,
platform
::
CPUPlace
(),
indices_int32_host
,
indices_int32_size
);
memory
::
Copy
(
ctx
.
GetPlace
(),
label_int32_device
,
platform
::
CPUPlace
(),
label_int32_host
,
label_int32_size
);
int
r
=
xpu
::
accuracy
(
dev_ctx
.
x_context
(),
indices_int32_device
,
label_int32_device
,
num_samples
,
class_dim
,
correct_data
,
total_data
,
accuracy_data
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
Fatal
(
"XPU accuracy kernel error!"
));
dev_ctx
.
Wait
();
xpu_free
(
indices_int32_device
);
xpu_free
(
label_int32_device
);
std
::
free
(
indices_int32_host
);
std
::
free
(
indices_int64_host
);
std
::
free
(
label_int32_host
);
std
::
free
(
label_int64_host
);
int
r
=
xpu
::
cast_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
indices_data
,
indices_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast_v2"
);
r
=
xpu
::
cast_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
label_data
,
label_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast_v2"
);
r
=
xpu
::
accuracy
(
dev_ctx
.
x_context
(),
indices_int32_ptr
,
label_int32_ptr
,
num_samples
,
class_dim
,
correct_data
,
total_data
,
accuracy_data
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast_v2"
);
}
};
...
...
paddle/fluid/operators/optimizers/lamb_op_xpu.cc
浏览文件 @
7b828f71
...
...
@@ -25,101 +25,111 @@ template <typename DeviceContext, typename T>
class
LambOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
paddle
::
framework
::
LoDTensor
;
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
/*** TODO wait XDNN new interface
using paddle::framework::LoDTensor;
const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
using
paddle
::
framework
::
LoDTensor
;
using paddle::framework::LoDTensor;
// inputs
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
T
weight_decay
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"weight_decay"
));
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
auto
&
param
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Param"
),
"Input"
,
"Param"
,
"Lamb"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
&
mom1
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Moment1"
),
"Input"
,
"Moment1"
,
"Lamb"
);
auto
&
mom2
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Moment2"
),
"Input"
,
"Moment2"
,
"Lamb"
);
auto
&
lr
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"LearningRate"
),
"Input"
,
"LearningRate"
,
"Lamb"
);
// inputs
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
T weight_decay = static_cast<T>(ctx.Attr<float>("weight_decay"));
T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input",
"Param", "Lamb");
auto* grad_var = ctx.InputVar("Grad");
auto& mom1 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment1"), "Input",
"Moment1", "Lamb");
auto& mom2 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment2"), "Input",
"Moment2", "Lamb");
auto& lr = GET_DATA_SAFELY(ctx.Input<LoDTensor>("LearningRate"),
"Input",
"LearningRate", "Lamb");
auto
&
beta1_pow
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Beta1Pow"
),
"Input"
,
"Beta1Pow"
,
"Lamb"
);
auto
&
beta2_pow
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Beta2Pow"
),
"Input"
,
"Beta2Pow"
,
"Lamb"
);
auto& beta1_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta1Pow"),
"Input",
"Beta1Pow", "Lamb");
auto& beta2_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta2Pow"),
"Input",
"Beta2Pow", "Lamb");
auto
&
param_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"Lamb"
);
auto
&
mom1_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"Moment1Out"
),
"Output"
,
"Moment1Out"
,
"Lamb"
);
auto
&
mom2_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"Moment2Out"
),
"Output"
,
"Moment2Out"
,
"Lamb"
);
auto
&
beta1_pow_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"Beta1PowOut"
),
"Output"
,
"Beta1PowOut"
,
"Lamb"
);
auto
&
beta2_pow_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"Beta2PowOut"
),
"Output"
,
"Beta2PowOut"
,
"Lamb"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"),
"Output", "ParamOut", "Lamb");
auto& mom1_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment1Out"),
"Output", "Moment1Out", "Lamb");
auto& mom2_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment2Out"),
"Output", "Moment2Out", "Lamb");
auto& beta1_pow_out =
GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta1PowOut"),
"Output", "Beta1PowOut", "Lamb");
auto& beta2_pow_out =
GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta2PowOut"),
"Output", "Beta2PowOut", "Lamb");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
grad
=
*
ctx
.
Input
<
LoDTensor
>
(
"Grad"
);
int
r
=
xpu
::
lamb
(
dev_ctx
.
x_context
(),
grad
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
mom2
.
template
data
<
T
>(),
param
.
template
data
<
T
>(),
beta1_pow
.
template
data
<
T
>(),
beta2_pow
.
template
data
<
T
>(),
beta1
,
beta2
,
epsilon
,
weight_decay
,
lr
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
beta1_pow_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
beta2_pow_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
param
.
numel
());
if (grad_var->IsType<framework::LoDTensor>()) {
auto& grad = *ctx.Input<LoDTensor>("Grad");
int r = xpu::lamb(dev_ctx.x_context(), grad.template data<T>(),
mom1.template data<T>(), mom2.template data<T>(),
param.template data<T>(), beta1_pow.template
data<T>(),
beta2_pow.template data<T>(), beta1, beta2, epsilon,
weight_decay, lr.template data<T>(),
mom1_out.template mutable_data<T>(ctx.GetPlace()),
mom2_out.template mutable_data<T>(ctx.GetPlace()),
param_out.template mutable_data<T>(ctx.GetPlace()),
beta1_pow_out.template
mutable_data<T>(ctx.GetPlace()),
beta2_pow_out.template
mutable_data<T>(ctx.GetPlace()),
param.numel());
if
(
r
==
xpu
::
Error_t
::
INVALID_PARAM
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
InvalidArgument
(
"XPU kernel error of LambOp, error message: INVALID_PARAM, "
"please check your input & output."
));
}
else
if
(
r
==
xpu
::
Error_t
::
RUNTIME_ERROR
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
Unavailable
(
"XPU kernel error of LambOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."
));
}
else
if
(
r
==
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU kernel error of LambOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."
));
}
else
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU kernel error of LambOp, error "
"message: OTHER "
"XPU API returns error code: %d."
,
r
));
}
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Variable type not supported by lamb_op. Expect LoDTensor, "
"but got %s"
,
framework
::
ToTypeName
(
param_var
->
Type
())));
}
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of LambOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of LambOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of LambOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."));
} else {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of LambOp, error "
"message: OTHER "
"XPU API returns error code: %d.",
r));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable type not supported by lamb_op. Expect LoDTensor, "
"but got %s",
framework::ToTypeName(param_var->Type())));
}
**/
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
lamb
,
ops
::
LambOpXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
//
namespace ops = paddle::operators;
//
REGISTER_OP_XPU_KERNEL(
//
lamb, ops::LambOpXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc
浏览文件 @
7b828f71
...
...
@@ -40,113 +40,122 @@ template <typename DeviceContext, typename T>
class
RmspropOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
paddle
::
framework
::
LoDTensor
;
// check Param & Grad tensor type
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
LoDTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Tensor holds the wrong type,Expected Var(%s)'s "
"type is LoDTensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
LoDTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Tensor holds the wrong type,Expected Var(%s)'s "
"type is LoDTensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
// inputs
auto
&
param
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Param"
),
"Input"
,
"Param"
,
"Rmsprop"
);
auto
&
meanSquare
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"MeanSquare"
),
"Input"
,
"MeanSquare"
,
"Rmsprop"
);
auto
&
grad
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Grad"
),
"Input"
,
"Grad"
,
"Rmsprop"
);
auto
&
mom
=
GET_DATA_SAFELY
(
ctx
.
Input
<
LoDTensor
>
(
"Moment"
),
"Input"
,
"Moment"
,
"Rmsprop"
);
auto
*
learning_rate
=
ctx
.
Input
<
Tensor
>
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
learning_rate
->
dims
().
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"learining rate should have dimension = 1."
" But received learning rate dim [%s] "
,
learning_rate
->
dims
().
size
()));
T
lr
=
static_cast
<
T
>
(
GetAttrFromTensor
(
learning_rate
));
// constants
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
T
decay
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"decay"
));
T
momentum
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"momentum"
));
// outputs
auto
&
param_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"Rmsprop"
);
auto
&
mom_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"MomentOut"
),
"Output"
,
"MomentOut"
,
"Rmsprop"
);
auto
&
mom_sqrt_out
=
GET_DATA_SAFELY
(
ctx
.
Output
<
LoDTensor
>
(
"MeanSquareOut"
),
"Output"
,
"MeanSquareOut"
,
"Rmsprop"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
///// rmsprop优化算法
///
/// ms_out[i] = rho * ms[i] + (1 - rho) * (g[i] * g[i]);
///
/// mom_out[i] = momentum * mom[i] + lr *
/// (g[i] / ((float)sqrt(ms_out[i] + epsilon)));
///
/// p_out[i] = p[i] - mom_out[i];
/// DLL_EXPORT int rmsprop(Context* ctx, const float* p,
/// const float* ms, const float* g, const float* mom,
/// float epsilon, float rho, float momentum, float lr,
/// float *ms_out, float *mom_out, float *p_out, int n)
int
r
=
xpu
::
rmsprop
(
dev_ctx
.
x_context
(),
param
.
template
data
<
T
>(),
meanSquare
.
template
data
<
T
>(),
grad
.
template
data
<
T
>(),
mom
.
template
data
<
T
>(),
epsilon
,
decay
,
momentum
,
lr
,
mom_sqrt_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
param
.
numel
());
if
(
r
==
xpu
::
Error_t
::
INVALID_PARAM
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
InvalidArgument
(
"XPU kernel error of RmspropOp, error message: INVALID_PARAM, "
"please check your input & output."
));
}
else
if
(
r
==
xpu
::
Error_t
::
RUNTIME_ERROR
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
Unavailable
(
"XPU kernel error of RmspropOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."
));
}
else
if
(
r
==
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU kernel error of RmspropOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."
));
}
else
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU kernel error of RmspropOp, error "
"message: OTHER "
"XPU API returns error code: %d."
,
r
));
}
/*** TODO wait XDNN new interface
using paddle::framework::LoDTensor;
// check Param & Grad tensor type
const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE_EQ(param_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"Tensor holds the wrong type,Expected Var(%s)'s "
"type is LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE_EQ(grad_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"Tensor holds the wrong type,Expected Var(%s)'s "
"type is LoDTensor, "
"but the received is %s",
ctx.InputNames("Grad").front(),
framework::ToTypeName(grad_var->Type())));
// inputs
auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input",
"Param", "Rmsprop");
auto& meanSquare = GET_DATA_SAFELY(ctx.Input<LoDTensor>("MeanSquare"),
"Input", "MeanSquare", "Rmsprop");
auto& grad = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Grad"), "Input",
"Grad",
"Rmsprop");
auto& mom = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment"), "Input",
"Moment", "Rmsprop");
auto* learning_rate = ctx.Input<Tensor>("LearningRate");
PADDLE_ENFORCE_EQ(learning_rate->dims().size(), 1,
platform::errors::InvalidArgument(
"learining rate should have dimension = 1."
" But received learning rate dim [%s] ",
learning_rate->dims().size()));
T lr = static_cast<T>(GetAttrFromTensor(learning_rate));
// constants
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
T decay = static_cast<T>(ctx.Attr<float>("decay"));
T momentum = static_cast<T>(ctx.Attr<float>("momentum"));
// outputs
auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"),
"Output", "ParamOut", "Rmsprop");
auto& mom_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("MomentOut"),
"Output", "MomentOut", "Rmsprop");
auto& mom_sqrt_out =
GET_DATA_SAFELY(ctx.Output<LoDTensor>("MeanSquareOut"),
"Output", "MeanSquareOut",
"Rmsprop");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
///// rmsprop优化算法
///
/// ms_out[i] = rho * ms[i] + (1 - rho) * (g[i] * g[i]);
///
/// mom_out[i] = momentum * mom[i] + lr *
/// (g[i] / ((float)sqrt(ms_out[i] + epsilon)));
///
/// p_out[i] = p[i] - mom_out[i];
/// DLL_EXPORT int rmsprop(Context* ctx, const float* p,
/// const float* ms, const float* g, const float* mom,
/// float epsilon, float rho, float momentum, float lr,
/// float *ms_out, float *mom_out, float *p_out, int n)
int r = xpu::rmsprop(dev_ctx.x_context(), param.template data<T>(),
meanSquare.template data<T>(), grad.template
data<T>(),
mom.template data<T>(), epsilon, decay, momentum,
lr,
mom_sqrt_out.template
mutable_data<T>(ctx.GetPlace()),
mom_out.template mutable_data<T>(ctx.GetPlace()),
param_out.template mutable_data<T>(ctx.GetPlace()),
param.numel());
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of RmspropOp, error message: INVALID_PARAM,
"
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of RmspropOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of RmspropOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."));
} else {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of RmspropOp, error "
"message: OTHER "
"XPU API returns error code: %d.",
r));
}
***/
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
rmsprop
,
ops
::
RmspropOpXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
//
namespace ops = paddle::operators;
//
REGISTER_OP_XPU_KERNEL(
//
rmsprop,
//
ops::RmspropOpXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
paddle/fluid/operators/optimizers/sgd_op_xpu.cc
浏览文件 @
7b828f71
...
...
@@ -14,11 +14,15 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/optimizers/sgd_op.h"
#include <string>
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
SGDOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
...
...
@@ -48,40 +52,31 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
"numel = [%s], ParamOut's numel = [%s]"
,
grad
->
numel
(),
sz
));
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
T
*
lr_t
=
learning_rate
->
data
<
T
>
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
const
float
*
lr
=
nullptr
;
if
(
std
::
is_same
<
T
,
paddle
::
platform
::
float16
>::
value
)
{
float
*
lr_float
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
learning_rate
->
numel
());
int
r
=
xpu
::
cast_v2
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
lr_t
),
lr_float
,
learning_rate
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"clip_v2"
);
lr
=
lr_float
;
}
else
{
lr
=
reinterpret_cast
<
const
float
*>
(
lr_t
);
}
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
grad_data
=
grad
->
data
<
T
>
();
T
*
out_data
=
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
sgd
(
dev_ctx
.
x_context
(),
sz
,
grad_data
,
param_data
,
lr
,
out_data
);
if
(
r
==
xpu
::
Error_t
::
INVALID_PARAM
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
InvalidArgument
(
"XPU kernel error of SgdOp, error message: INVALID_PARAM, "
"please check your input & output."
));
}
else
if
(
r
==
xpu
::
Error_t
::
RUNTIME_ERROR
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
Unavailable
(
"XPU kernel error of SgdOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."
));
}
else
if
(
r
==
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
)
{
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
ResourceExhausted
(
"XPU kernel error of SgdOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."
));
}
}
else
{
PADDLE_ENFORCE_EQ
(
false
,
true
,
platform
::
errors
::
PermissionDenied
(
"Unsupported Variable Type of Param & Grad in "
"SgdOp-XPU. Excepted "
"LodTensor, But received [%s] and [%s]"
,
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
int
r
=
xpu
::
sgd
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
grad_data
),
reinterpret_cast
<
const
XPUType
*>
(
param_data
),
lr
,
reinterpret_cast
<
XPUType
*>
(
out_data
),
sz
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sgd"
);
}
}
};
...
...
@@ -90,6 +85,8 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_XPU_KERNEL
(
sgd
,
ops
::
SGDOpXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
sgd
,
ops
::
SGDOpXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
,
ops
::
SGDOpXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
plat
::
float16
>
);
#endif
paddle/fluid/platform/device/xpu/xpu1_op_list.h
浏览文件 @
7b828f71
...
...
@@ -145,7 +145,6 @@ XPUOpMap& get_kl1_ops() {
{
"hard_switch"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"iou_similarity"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"lamb"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"layer_norm_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"layer_norm"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
...
...
@@ -175,9 +174,6 @@ XPUOpMap& get_kl1_ops() {
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
INT64
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"log_loss_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"log_loss"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"logsumexp"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"log"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"lookup_table_v2_grad"
,
...
...
@@ -236,7 +232,6 @@ XPUOpMap& get_kl1_ops() {
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"rmsprop"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"rnn_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"rnn"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"roi_align_grad"
,
...
...
paddle/fluid/platform/device/xpu/xpu2_op_list.h
浏览文件 @
7b828f71
...
...
@@ -328,6 +328,8 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType
(
vartype
::
INT64
,
XPUPlace
())})},
{
"scatter"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT64
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"sgd"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP16
,
XPUPlace
())})},
{
"sigmoid_cross_entropy_with_logits_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"sigmoid_cross_entropy_with_logits"
,
...
...
python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
浏览文件 @
7b828f71
...
...
@@ -23,41 +23,52 @@ import paddle.fluid as fluid
from
paddle.fluid
import
compiler
,
Program
,
program_guard
import
paddle
from
op_test_xpu
import
XPUOpTest
from
xpu.get_test_cover_info
import
create_test_class
,
get_xpu_op_support_types
,
XPUOpTestWrapper
paddle
.
enable_static
()
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUAccuracyOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
init_dtype
()
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
)).
astype
(
'int64'
)
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
)).
astype
(
'int64'
)
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
num_correct
=
0
for
rowid
in
range
(
n
):
for
ele
in
indices
[
rowid
]:
if
ele
==
label
[
rowid
]:
num_correct
+=
1
break
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
self
.
attrs
=
{
'use_xpu'
:
True
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
)
class
XPUTestAccuracyOp
(
XPUOpTestWrapper
):
def
__init__
(
self
):
self
.
op_name
=
'accuracy'
self
.
use_dynamic_create_class
=
False
class
TestXPUAccuracyOp
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
init_dtype
()
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
)).
astype
(
'int64'
)
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
)).
astype
(
'int64'
)
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
num_correct
=
0
for
rowid
in
range
(
n
):
for
ele
in
indices
[
rowid
]:
if
ele
==
label
[
rowid
]:
num_correct
+=
1
break
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
self
.
attrs
=
{
'use_xpu'
:
True
}
def
init_dtype
(
self
):
self
.
dtype
=
self
.
in_type
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
)
support_types
=
get_xpu_op_support_types
(
'accuracy'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestAccuracyOp
,
stype
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
浏览文件 @
7b828f71
...
...
@@ -25,30 +25,43 @@ import paddle.fluid as fluid
from
paddle.fluid
import
core
from
paddle.fluid.op
import
Operator
from
op_test_xpu
import
XPUOpTest
from
xpu.get_test_cover_info
import
create_test_class
,
get_xpu_op_support_types
,
XPUOpTestWrapper
class
TestSGDOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sgd"
self
.
conf
()
w
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
"float32"
)
g
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
"float32"
)
lr
=
np
.
array
([
0.1
]).
astype
(
"float32"
)
self
.
inputs
=
{
'Param'
:
w
,
'Grad'
:
g
,
'LearningRate'
:
lr
}
self
.
outputs
=
{
'ParamOut'
:
w
-
lr
*
g
}
class
XPUTestSgdOp
(
XPUOpTestWrapper
):
def
__init__
(
self
):
self
.
op_name
=
'sgd'
self
.
use_dynamic_create_class
=
False
def
conf
(
self
):
self
.
h
=
102
self
.
w
=
105
class
TestSGDOp
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sgd"
self
.
dtype
=
self
.
in_type
self
.
conf
()
w
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
self
.
dtype
)
g
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
self
.
dtype
)
lr
=
np
.
array
([
0.1
]).
astype
(
self
.
dtype
)
def
test_check_output_with_place
(
self
):
self
.
check_output_with_place
(
paddle
.
XPUPlace
(
0
))
self
.
inputs
=
{
'Param'
:
w
,
'Grad'
:
g
,
'LearningRate'
:
lr
}
self
.
outputs
=
{
'ParamOut'
:
w
-
lr
*
g
}
def
conf
(
self
):
self
.
h
=
102
self
.
w
=
105
class
TestSGDOpCase8X
(
TestSGDOp
):
def
conf
(
self
):
self
.
h
=
10
self
.
w
=
64
def
test_check_output_with_place
(
self
):
self
.
check_output_with_place
(
paddle
.
XPUPlace
(
0
))
class
TestSGDOpCase8X
(
TestSGDOp
):
def
conf
(
self
):
self
.
h
=
10
self
.
w
=
64
support_types
=
get_xpu_op_support_types
(
'sgd'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestSgdOp
,
stype
)
class
TestSGDOpWithLargeInput
(
unittest
.
TestCase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录