Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
70b9f2ac
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
70b9f2ac
编写于
2月 18, 2022
作者:
T
taixiurong
提交者:
GitHub
2月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dropout support Seed, fix elementwise_add_grad bug, test=kunlun (#39656)
上级
8363406a
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
27 addition
and
81 deletion
+27
-81
paddle/fluid/operators/dropout_op_xpu.cc
paddle/fluid/operators/dropout_op_xpu.cc
+17
-19
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
+10
-62
未找到文件。
paddle/fluid/operators/dropout_op_xpu.cc
浏览文件 @
70b9f2ac
...
...
@@ -32,20 +32,18 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
context
.
Attr
<
std
::
string
>
(
"dropout_implementation"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
PADDLE_ENFORCE_EQ
(
!
context
.
HasInput
(
"Seed"
),
true
,
platform
::
errors
::
InvalidArgument
(
(
"Input(Seed) not supported on XPU"
)));
auto
*
seed
=
context
.
HasInput
(
"Seed"
)
?
context
.
Input
<
Tensor
>
(
"Seed"
)
:
nullptr
;
int
is_upscale
=
(
dropout_implementation
==
"upscale_in_train"
);
if
(
!
context
.
Attr
<
bool
>
(
"is_test"
))
{
std
::
random_device
rnd
;
// int seed = (context.Attr<bool>("fix_seed")) ?
// int(context.Attr<int>("seed")) : (rnd());
int
seed
=
0
;
if
(
context
.
Attr
<
bool
>
(
"fix_seed"
)
==
true
)
{
seed
=
static_cast
<
int
>
(
context
.
Attr
<
int
>
(
"seed"
));
int
seed_data
=
0
;
if
(
seed
)
{
seed_data
=
*
(
seed
->
data
<
int
>
());
}
else
{
seed
=
rnd
();
seed_data
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
0
;
}
auto
*
mask
=
context
.
Output
<
Tensor
>
(
"Mask"
);
...
...
@@ -55,26 +53,26 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
int
r
=
xpu
::
constant
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
XPUTyp
*>
(
y_data
),
y
->
numel
(),
XPUTyp
(
0
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant"
);
r
=
xpu
::
constant
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
XPUTyp
*>
(
mask_data
),
mask
->
numel
(),
XPUTyp
(
0
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"constant"
);
return
;
}
int
r
=
xpu
::
dropout
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUTyp
*>
(
x
->
data
<
T
>
()),
reinterpret_cast
<
XPUTyp
*>
(
y
->
data
<
T
>
()),
reinterpret_cast
<
XPUTyp
*>
(
mask_data
),
seed
,
reinterpret_cast
<
XPUTyp
*>
(
mask_data
),
seed
_data
,
mask
->
numel
(),
is_upscale
,
dropout_prob
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"dropout
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"dropout"
);
}
else
{
float
scale
=
(
is_upscale
)
?
(
1.0
)
:
(
static_cast
<
float
>
(
1.0
f
-
dropout_prob
));
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUTyp
*>
(
x_data
),
reinterpret_cast
<
XPUTyp
*>
(
y_data
),
x
->
numel
(),
false
,
scale
,
0.0
f
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
}
}
};
...
...
@@ -103,7 +101,7 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
reinterpret_cast
<
const
XPUType
*>
(
mask_data
),
reinterpret_cast
<
XPUType
*>
(
grad_x
->
data
<
T
>
()),
grad_y
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"mul
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"mul"
);
return
;
}
...
...
@@ -117,13 +115,13 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
reinterpret_cast
<
const
XPUType
*>
(
mask
->
data
<
T
>
()),
reinterpret_cast
<
XPUType
*>
(
mask_new
),
mask
->
numel
(),
false
,
scale
,
0.0
f
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
r
=
xpu
::
mul
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
grad_y
->
data
<
T
>
()),
reinterpret_cast
<
const
XPUType
*>
(
mask_new
),
reinterpret_cast
<
XPUType
*>
(
grad_x
->
data
<
T
>
()),
grad_y
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"mul
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"mul"
);
}
else
{
int
r
=
xpu
::
dropout_grad
(
dev_ctx
.
x_context
(),
...
...
@@ -131,7 +129,7 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
reinterpret_cast
<
const
XPUType
*>
(
grad_y
->
data
<
T
>
()),
reinterpret_cast
<
XPUType
*>
(
grad_x
->
data
<
T
>
()),
dropout_prob
,
grad_y
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"dropout_grad
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"dropout_grad"
);
}
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
浏览文件 @
70b9f2ac
...
...
@@ -34,17 +34,6 @@ class ElementwiseAddXPUKernel : public framework::OpKernel<T> {
}
};
static
std
::
vector
<
int
>
get_rdims
(
const
std
::
vector
<
int
>&
xdims
,
const
std
::
vector
<
int
>&
ydims
)
{
std
::
vector
<
int
>
rdims
;
for
(
size_t
i
=
0
;
i
<
xdims
.
size
();
i
++
)
{
if
(
xdims
[
i
]
!=
ydims
[
i
])
{
rdims
.
push_back
(
i
);
}
}
return
rdims
;
}
template
<
typename
T
>
class
ElementwiseAddGradXPUKernel
:
public
ElemwiseGradKernel
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
...
...
@@ -53,64 +42,19 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
ElemwiseGradKernel
<
T
>::
Compute
(
ctx
);
auto
*
x
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
dz
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
const
framework
::
DDim
&
x_dims
=
x
->
dims
();
const
framework
::
DDim
&
y_dims
=
y
->
dims
();
const
framework
::
DDim
&
dz_dims
=
dz
->
dims
();
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
axis
=
(
axis
==
-
1
?
std
::
abs
(
x_dims
.
size
()
-
y_dims
.
size
())
:
axis
);
int
max_dim
=
std
::
max
(
x_dims
.
size
(),
y_dims
.
size
());
PADDLE_ENFORCE_GE
(
axis
,
0
,
platform
::
errors
::
InvalidArgument
(
"Axis should be great than or equal to 0, but received axis is %d."
,
axis
));
PADDLE_ENFORCE_LT
(
axis
,
max_dim
,
platform
::
errors
::
InvalidArgument
(
"Axis should be less than %d, but received axis is %d."
,
max_dim
,
axis
));
std
::
vector
<
int
>
x_dims_vec
(
max_dim
,
1
);
std
::
vector
<
int
>
y_dims_vec
(
max_dim
,
1
);
std
::
vector
<
int
>
z_dims_vec
(
max_dim
,
1
);
if
(
x_dims
.
size
()
==
max_dim
)
{
for
(
int
i
=
0
;
i
<
max_dim
;
i
++
)
{
x_dims_vec
[
i
]
=
x_dims
[
i
];
}
}
else
{
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
i
++
)
{
x_dims_vec
[
i
+
axis
]
=
x_dims
[
i
];
}
}
if
(
y_dims
.
size
()
==
max_dim
)
{
for
(
int
i
=
0
;
i
<
max_dim
;
i
++
)
{
y_dims_vec
[
i
]
=
y_dims
[
i
];
}
}
else
{
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
i
++
)
{
y_dims_vec
[
i
+
axis
]
=
y_dims
[
i
];
}
}
for
(
int
i
=
0
;
i
<
max_dim
;
i
++
)
{
z_dims_vec
[
i
]
=
dz_dims
[
i
];
}
std
::
vector
<
int
>
rdims_for_x
;
std
::
vector
<
int
>
rdims_for_y
;
rdims_for_x
=
get_rdims
(
x_dims_vec
,
z_dims_vec
);
rdims_for_y
=
get_rdims
(
y_dims_vec
,
z_dims_vec
);
const
T
*
dz_data
=
dz
->
data
<
T
>
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
if
(
dx
!=
nullptr
)
{
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
rdims_for_x
.
size
()
==
0
)
{
if
(
dx
->
dims
()
==
dz_dims
)
{
if
(
dx_data
!=
dz_data
)
{
framework
::
TensorCopy
(
*
dz
,
ctx
.
GetPlace
(),
...
...
@@ -123,27 +67,31 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
dx
->
clear
();
dx
->
mutable_data
<
T
>
(
x
->
dims
(),
ctx
.
GetPlace
());
}
std
::
vector
<
int
>
reduce_dims
=
GetReduceDim
(
dx
->
dims
(),
dz_dims
,
axis
);
std
::
vector
<
int
>
dz_vector
=
framework
::
vectorize
<
int
>
(
dz_dims
);
int
ret
=
xpu
::
reduce_sum
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
dz_data
),
reinterpret_cast
<
XPUType
*>
(
dx_data
),
z_dims_vec
,
rdims_for_x
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"reduce_sum
"
);
reinterpret_cast
<
XPUType
*>
(
dx_data
),
dz_vector
,
reduce_dims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"reduce_sum"
);
}
}
if
(
dy
!=
nullptr
)
{
T
*
dy_data
=
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
rdims_for_y
.
size
()
==
0
)
{
if
(
dy
->
dims
()
==
dz_dims
)
{
if
(
dy_data
!=
dz_data
)
{
framework
::
TensorCopy
(
*
dz
,
ctx
.
GetPlace
(),
ctx
.
template
device_context
<
platform
::
DeviceContext
>(),
dy
);
}
}
else
{
std
::
vector
<
int
>
reduce_dims
=
GetReduceDim
(
dy
->
dims
(),
dz_dims
,
axis
);
std
::
vector
<
int
>
dz_vector
=
framework
::
vectorize
<
int
>
(
dz_dims
);
int
ret
=
xpu
::
reduce_sum
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
dz_data
),
reinterpret_cast
<
XPUType
*>
(
dy_data
),
z_dims_vec
,
rdims_for_y
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"reduce_sum
"
);
reinterpret_cast
<
XPUType
*>
(
dy_data
),
dz_vector
,
reduce_dims
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"reduce_sum"
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录