Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
78d5cf7b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
78d5cf7b
编写于
5月 17, 2022
作者:
A
Aganlengzi
提交者:
GitHub
5月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] add reduce_max_grad op (#42672)
上级
c714926d
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
67 addition
and
0 deletion
+67
-0
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
+67
-0
未找到文件。
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
浏览文件 @
78d5cf7b
...
...
@@ -105,6 +105,68 @@ class ReduceMaxNPUKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ReduceMaxGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
int
in_dtype
=
context
.
Attr
<
int
>
(
"in_dtype"
);
PADDLE_ENFORCE_EQ
(
in_dtype
==
-
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"NPU only support in_dtype == -1 in reduce_max_grad op."
));
auto
*
x_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>();
auto
place
=
context
.
GetPlace
();
auto
stream
=
dev_ctx
.
stream
();
// broadcast
auto
x_dims_vec
=
phi
::
vectorize
(
x
->
dims
());
Tensor
transformed_out
(
x
->
type
());
transformed_out
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out
.
mutable_data
<
T
>
(
place
);
NpuOpRunner
r_brd_out
;
r_brd_out
.
SetType
(
"BroadcastTo"
)
.
AddInput
(
*
out
)
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddOutput
(
transformed_out
)
.
Run
(
stream
);
Tensor
transformed_out_grad
(
x
->
type
());
transformed_out_grad
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out_grad
.
mutable_data
<
T
>
(
place
);
NpuOpRunner
r_brd_out_grad
;
r_brd_out_grad
.
SetType
(
"BroadcastTo"
)
.
AddInput
(
*
out_grad
)
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddOutput
(
transformed_out_grad
)
.
Run
(
stream
);
// compare
Tensor
equal_cond
;
equal_cond
.
mutable_data
<
bool
>
(
x_grad
->
dims
(),
place
);
const
auto
&
r_equal
=
NpuOpRunner
(
"Equal"
,
{
*
x
,
transformed_out
},
{
equal_cond
},
{});
r_equal
.
Run
(
stream
);
// select
Tensor
t_zero
;
t_zero
.
mutable_data
<
T
>
(
x_grad
->
dims
(),
place
);
FillNpuTensorWithConstant
(
&
t_zero
,
static_cast
<
T
>
(
0
));
t_zero
.
Resize
(
x_grad
->
dims
());
const
auto
&
r_sel
=
NpuOpRunner
(
"SelectV2"
,
{
equal_cond
,
transformed_out_grad
,
t_zero
},
{
*
x_grad
},
{});
r_sel
.
Run
(
stream
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -115,3 +177,8 @@ REGISTER_OP_NPU_KERNEL(
ops
::
ReduceMaxNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
,
ops
::
ReduceMaxNPUKernel
<
plat
::
NPUDeviceContext
,
int64_t
>
,
ops
::
ReduceMaxNPUKernel
<
plat
::
NPUDeviceContext
,
int
>
);
REGISTER_OP_NPU_KERNEL
(
reduce_max_grad
,
ops
::
ReduceMaxGradNPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
ReduceMaxGradNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
,
ops
::
ReduceMaxGradNPUKernel
<
plat
::
NPUDeviceContext
,
int64_t
>
,
ops
::
ReduceMaxGradNPUKernel
<
plat
::
NPUDeviceContext
,
int
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录