Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f53e920d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f53e920d
编写于
11月 04, 2022
作者:
Y
ykkk2333
提交者:
GitHub
11月 04, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix deepfm and deep_wide bug, add embedding_sparse_grad kernel, test=kunlun (#47365)
上级
9e006987
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
129 addition
and
18 deletion
+129
-18
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+10
-3
paddle/fluid/platform/device/xpu/xpu2_op_list.h
paddle/fluid/platform/device/xpu/xpu2_op_list.h
+2
-0
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
+45
-15
paddle/phi/kernels/xpu/embedding_grad_kernel.cc
paddle/phi/kernels/xpu/embedding_grad_kernel.cc
+72
-0
未找到文件。
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
f53e920d
...
@@ -482,9 +482,17 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
...
@@ -482,9 +482,17 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
PADDLE_SELECTED_ROWS_ADD
(
phi
::
GPUContext
,
float
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
GPUContext
,
float
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
GPUContext
,
double
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
GPUContext
,
double
);
}
else
{
}
else
{
#endif
#if defined(PADDLE_WITH_XPU)
if
(
paddle
::
platform
::
is_xpu_place
(
place
))
{
PADDLE_SELECTED_ROWS_ADD
(
phi
::
XPUContext
,
float
);
}
else
{
#endif
#endif
PADDLE_SELECTED_ROWS_ADD
(
phi
::
CPUContext
,
float
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
CPUContext
,
float
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
CPUContext
,
double
);
PADDLE_SELECTED_ROWS_ADD
(
phi
::
CPUContext
,
double
);
#if defined(PADDLE_WITH_XPU)
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
}
}
#endif
#endif
...
@@ -858,6 +866,5 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
...
@@ -858,6 +866,5 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
dst_var
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
dst_var
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
}
}
}
}
// namespace imperative
}
// namespace imperative
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/device/xpu/xpu2_op_list.h
浏览文件 @
f53e920d
...
@@ -195,6 +195,8 @@ XPUOpMap& get_kl2_ops() {
...
@@ -195,6 +195,8 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType
(
vartype
::
FP16
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP16
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP64
,
XPUPlace
())})},
pOpKernelType
(
vartype
::
FP64
,
XPUPlace
())})},
{
"embedding_sparse_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"equal"
,
{
"equal"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT64
,
XPUPlace
()),
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT64
,
XPUPlace
()),
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
...
...
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
浏览文件 @
f53e920d
...
@@ -50,6 +50,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -50,6 +50,7 @@ void AdamDenseParamSparseGradKernel(
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta1_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
beta2_pow_out
,
DenseTensor
*
master_param_outs
)
{
DenseTensor
*
master_param_outs
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
float
*
param_ptr
=
nullptr
;
float
*
param_ptr
=
nullptr
;
funcs
::
GetDataPointer
<
Context
,
float
>
(
param
,
&
param_ptr
,
dev_ctx
);
funcs
::
GetDataPointer
<
Context
,
float
>
(
param
,
&
param_ptr
,
dev_ctx
);
...
@@ -62,16 +63,32 @@ void AdamDenseParamSparseGradKernel(
...
@@ -62,16 +63,32 @@ void AdamDenseParamSparseGradKernel(
float
*
lr_ptr
=
nullptr
;
float
*
lr_ptr
=
nullptr
;
funcs
::
GetDataPointer
<
Context
,
float
>
(
learning_rate
,
&
lr_ptr
,
dev_ctx
);
funcs
::
GetDataPointer
<
Context
,
float
>
(
learning_rate
,
&
lr_ptr
,
dev_ctx
);
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
float
*
beta1_pow_ptr
=
nullptr
;
float
*
beta1_pow_ptr
=
nullptr
;
const
float
*
beta1_const_pow_ptr
=
nullptr
;
const
float
*
beta1_const_pow_ptr
=
nullptr
;
if
(
beta1_pow
.
place
()
==
CPUPlace
())
{
if
(
beta1_pow
.
place
()
==
CPUPlace
())
{
DenseTensor
xpu_beta1_pow
;
if
(
beta1_pow
.
dtype
()
==
DataType
::
FLOAT16
)
{
phi
::
Copy
(
dev_ctx
,
beta1_pow
,
beta1_pow
.
place
(),
false
,
&
xpu_beta1_pow
);
XPUType
*
beta1_pow_t
=
if
(
xpu_beta1_pow
.
dtype
()
==
DataType
::
FLOAT16
)
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
beta1_pow
.
numel
());
funcs
::
GetDataPointer
<
Context
,
float
>
(
paddle
::
memory
::
Copy
(
param
.
place
(),
xpu_beta1_pow
,
&
beta1_pow_ptr
,
dev_ctx
);
beta1_pow_t
,
else
beta1_pow
.
place
(),
beta1_const_pow_ptr
=
xpu_beta1_pow
.
template
data
<
float
>();
beta1_pow
.
data
<
T
>
(),
sizeof
(
T
)
*
beta1_pow
.
numel
());
int
r
=
xpu
::
cast
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
beta1_pow_t
,
beta1_pow_ptr
,
beta1_pow
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
beta1_pow_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
beta1_pow
.
numel
());
paddle
::
memory
::
Copy
(
param
.
place
(),
beta1_pow_ptr
,
beta1_pow
.
place
(),
beta1_pow
.
data
<
T
>
(),
sizeof
(
T
)
*
beta1_pow
.
numel
());
}
}
else
{
}
else
{
if
(
beta1_pow
.
dtype
()
==
DataType
::
FLOAT16
)
if
(
beta1_pow
.
dtype
()
==
DataType
::
FLOAT16
)
funcs
::
GetDataPointer
<
Context
,
float
>
(
beta1_pow
,
&
beta1_pow_ptr
,
dev_ctx
);
funcs
::
GetDataPointer
<
Context
,
float
>
(
beta1_pow
,
&
beta1_pow_ptr
,
dev_ctx
);
...
@@ -81,14 +98,28 @@ void AdamDenseParamSparseGradKernel(
...
@@ -81,14 +98,28 @@ void AdamDenseParamSparseGradKernel(
float
*
beta2_pow_ptr
=
nullptr
;
float
*
beta2_pow_ptr
=
nullptr
;
const
float
*
beta2_const_pow_ptr
=
nullptr
;
const
float
*
beta2_const_pow_ptr
=
nullptr
;
if
(
beta2_pow
.
place
()
==
CPUPlace
())
{
if
(
beta2_pow
.
place
()
==
CPUPlace
())
{
DenseTensor
xpu_beta2_pow
;
if
(
beta2_pow
.
dtype
()
==
DataType
::
FLOAT16
)
{
phi
::
Copy
(
dev_ctx
,
beta2_pow
,
beta2_pow
.
place
(),
false
,
&
xpu_beta2_pow
);
XPUType
*
beta2_pow_t
=
if
(
xpu_beta2_pow
.
dtype
()
==
DataType
::
FLOAT16
)
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
beta2_pow
.
numel
());
funcs
::
GetDataPointer
<
Context
,
float
>
(
paddle
::
memory
::
Copy
(
param
.
place
(),
xpu_beta2_pow
,
&
beta2_pow_ptr
,
dev_ctx
);
beta2_pow_t
,
else
beta2_pow
.
place
(),
beta2_const_pow_ptr
=
xpu_beta2_pow
.
template
data
<
float
>();
beta2_pow
.
data
<
T
>
(),
sizeof
(
T
)
*
beta2_pow
.
numel
());
int
r
=
xpu
::
cast
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
beta2_pow_t
,
beta2_pow_ptr
,
beta2_pow
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
beta2_pow_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
beta2_pow
.
numel
());
paddle
::
memory
::
Copy
(
param
.
place
(),
beta2_pow_ptr
,
beta2_pow
.
place
(),
beta2_pow
.
data
<
T
>
(),
sizeof
(
T
)
*
beta2_pow
.
numel
());
}
}
else
{
}
else
{
if
(
beta2_pow
.
dtype
()
==
DataType
::
FLOAT16
)
if
(
beta2_pow
.
dtype
()
==
DataType
::
FLOAT16
)
funcs
::
GetDataPointer
<
Context
,
float
>
(
beta2_pow
,
&
beta2_pow_ptr
,
dev_ctx
);
funcs
::
GetDataPointer
<
Context
,
float
>
(
beta2_pow
,
&
beta2_pow_ptr
,
dev_ctx
);
...
@@ -195,7 +226,6 @@ void AdamDenseParamSparseGradKernel(
...
@@ -195,7 +226,6 @@ void AdamDenseParamSparseGradKernel(
int
row_count
=
grad_merge
.
rows
().
size
();
int
row_count
=
grad_merge
.
rows
().
size
();
std
::
vector
<
int
>
rows
(
row_count
);
std
::
vector
<
int
>
rows
(
row_count
);
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
int
*
xpu_rows
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
row_count
);
int
*
xpu_rows
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
row_count
);
std
::
vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
().
begin
(),
std
::
vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
().
begin
(),
grad_merge
.
rows
().
end
());
grad_merge
.
rows
().
end
());
...
...
paddle/phi/kernels/xpu/embedding_grad_kernel.cc
浏览文件 @
f53e920d
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
...
@@ -60,7 +61,78 @@ void EmbeddingGradKernel(const Context& ctx,
...
@@ -60,7 +61,78 @@ void EmbeddingGradKernel(const Context& ctx,
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"embedding_grad"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"embedding_grad"
);
}
}
template
<
typename
T
,
typename
Context
>
void
EmbeddingSparseGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
input
,
const
DenseTensor
&
weight
,
const
DenseTensor
&
out_grad
,
int64_t
padding_idx
,
SelectedRows
*
weight_grad
)
{
DDim
table_dim
=
weight
.
dims
();
xpu
::
ctx_guard
RAII_GUARD
(
ctx
.
x_context
());
std
::
vector
<
int64_t
>
ids
(
input
.
numel
());
if
(
input
.
dtype
()
==
phi
::
DataType
::
INT64
)
{
paddle
::
memory
::
Copy
(
CPUPlace
(),
ids
.
data
(),
input
.
place
(),
input
.
data
<
int64_t
>
(),
sizeof
(
int64_t
)
*
input
.
numel
());
}
else
if
(
input
.
dtype
()
==
phi
::
DataType
::
INT32
)
{
int64_t
*
id_t
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
input
.
numel
());
int
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
ctx
.
x_context
(),
input
.
data
<
int
>
(),
id_t
,
input
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
paddle
::
memory
::
Copy
(
CPUPlace
(),
ids
.
data
(),
input
.
place
(),
id_t
,
sizeof
(
int64_t
)
*
input
.
numel
());
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"emebdding input only support int32 and int64"
));
}
auto
ids_num
=
static_cast
<
int64_t
>
(
input
.
numel
());
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
auto
*
d_table
=
weight_grad
;
auto
*
d_output
=
&
out_grad
;
d_table
->
set_rows
(
ids
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
d_table_value
->
Resize
({
ids_num
,
table_dim
[
1
]});
ctx
.
template
Alloc
<
T
>(
d_table_value
);
d_table
->
set_height
(
table_dim
[
0
]);
auto
*
d_output_data
=
d_output
->
template
data
<
T
>();
auto
*
d_table_data
=
d_table_value
->
template
data
<
T
>();
auto
d_output_dims
=
d_output
->
dims
();
auto
d_output_dims_2d
=
flatten_to_2d
(
d_output_dims
,
d_output_dims
.
size
()
-
1
);
PADDLE_ENFORCE_EQ
(
d_table_value
->
dims
(),
d_output_dims_2d
,
phi
::
errors
::
InvalidArgument
(
"ShapeError: The shape of lookup_table@Grad and "
"output@Grad should be same. "
"But received lookup_table@Grad's shape = [%s], "
"output@Grad's shape = [%s]."
,
d_table_value
->
dims
(),
d_output_dims_2d
));
int
r
=
xpu
::
copy
<
T
>
(
ctx
.
x_context
(),
d_output_data
,
d_table_data
,
d_output
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
}
}
// namespace phi
}
// namespace phi
PD_REGISTER_KERNEL
(
PD_REGISTER_KERNEL
(
embedding_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
EmbeddingGradKernel
,
float
)
{}
embedding_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
EmbeddingGradKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
embedding_sparse_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
EmbeddingSparseGradKernel
,
float
)
{}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录