Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
1ef1cace
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1ef1cace
编写于
9月 29, 2022
作者:
光明和真理
提交者:
GitHub
9月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] add mlu kernel for add_reduce_max_grad (#45651)
Co-authored-by:
N
liupeiyu
<
liupeiyu@cambricon.com
>
上级
8e9c719d
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
110 addition
and
0 deletion
+110
-0
paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc
paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc
+110
-0
未找到文件。
paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc
浏览文件 @
1ef1cace
...
...
@@ -92,6 +92,112 @@ class ReduceMaxMLUKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
class
ReduceMaxGradMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
reduce_dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
int
in_dtype
=
context
.
Attr
<
int
>
(
"in_dtype"
);
PADDLE_ENFORCE_EQ
(
in_dtype
==
-
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"MLU only support in_dtype == -1 in reduce_max_grad op."
));
auto
*
x_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
place
=
context
.
GetPlace
();
// broadcast
auto
x_dims_vec
=
phi
::
vectorize
(
x
->
dims
());
if
(
reduce_all
)
{
reduce_dims
.
clear
();
for
(
size_t
d
=
0
;
d
<
x_dims_vec
.
size
();
++
d
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
Tensor
tmp_out
,
tmp_out_grad
;
auto
tmp_out_dims_vec
=
x_dims_vec
;
for
(
auto
d
:
reduce_dims
)
{
if
(
d
<
0
)
{
d
+=
x_dims_vec
.
size
();
}
tmp_out_dims_vec
[
d
]
=
1
;
}
tmp_out
.
ShareDataWith
(
*
out
);
tmp_out
.
Resize
(
phi
::
make_ddim
(
tmp_out_dims_vec
));
tmp_out_grad
.
ShareDataWith
(
*
out_grad
);
tmp_out_grad
.
Resize
(
phi
::
make_ddim
(
tmp_out_dims_vec
));
Tensor
transformed_out
(
x
->
type
());
transformed_out
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out
.
mutable_data
<
T
>
(
place
);
MLUCnnlTensorDesc
tmp_out_desc
(
tmp_out
);
MLUCnnlTensorDesc
transformed_out_desc
(
transformed_out
);
MLUCnnl
::
BroadcastTo
(
context
,
tmp_out_desc
.
get
(),
GetBasePtr
(
&
tmp_out
),
transformed_out_desc
.
get
(),
GetBasePtr
(
&
transformed_out
));
Tensor
transformed_out_grad
(
x
->
type
());
transformed_out_grad
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out_grad
.
mutable_data
<
T
>
(
place
);
MLUCnnlTensorDesc
tmp_out_grad_desc
(
tmp_out_grad
);
MLUCnnlTensorDesc
transformed_out_grad_desc
(
transformed_out_grad
);
MLUCnnl
::
BroadcastTo
(
context
,
tmp_out_grad_desc
.
get
(),
GetBasePtr
(
&
tmp_out_grad
),
transformed_out_grad_desc
.
get
(),
GetBasePtr
(
&
transformed_out_grad
));
// compare
Tensor
equal_cond
;
equal_cond
.
mutable_data
<
bool
>
(
x_grad
->
dims
(),
place
);
MLUCnnlTensorDesc
x_desc
(
*
x
);
MLUCnnlTensorDesc
equal_cond_desc
(
equal_cond
);
MLUCnnl
::
Logic
(
context
,
CNNL_LOGIC_OP_EQ
,
x_desc
.
get
(),
GetBasePtr
(
x
),
transformed_out_desc
.
get
(),
GetBasePtr
(
&
transformed_out
),
equal_cond_desc
.
get
(),
GetBasePtr
(
&
equal_cond
));
// select
Tensor
t_zero
;
t_zero
.
mutable_data
<
T
>
(
x_grad
->
dims
(),
place
);
FillMLUTensorWithHostValue
<
T
>
(
context
,
static_cast
<
T
>
(
0
),
&
t_zero
);
t_zero
.
Resize
(
x_grad
->
dims
());
MLUCnnlTensorDesc
t_zero_desc
(
t_zero
);
MLUCnnlTensorDesc
x_grad_desc
(
*
x_grad
);
MLUCnnl
::
Select
(
context
,
equal_cond_desc
.
get
(),
GetBasePtr
(
&
equal_cond
),
transformed_out_grad_desc
.
get
(),
GetBasePtr
(
&
transformed_out_grad
),
t_zero_desc
.
get
(),
GetBasePtr
(
&
t_zero
),
x_grad_desc
.
get
(),
GetBasePtr
(
x_grad
));
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -102,3 +208,7 @@ REGISTER_OP_MLU_KERNEL(reduce_max,
ops
::
ReduceMaxMLUKernel
<
float
>
,
ops
::
ReduceMaxMLUKernel
<
plat
::
float16
>
,
ops
::
ReduceMaxMLUKernel
<
int
>
);
REGISTER_OP_MLU_KERNEL
(
reduce_max_grad
,
ops
::
ReduceMaxGradMLUKernel
<
float
>
,
ops
::
ReduceMaxGradMLUKernel
<
plat
::
float16
>
,
ops
::
ReduceMaxGradMLUKernel
<
int
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录