Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f9e55dee
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f9e55dee
编写于
5月 31, 2022
作者:
A
Aganlengzi
提交者:
GitHub
5月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] fix arg_max and reduce_max (#42887)
* fix arg_max and reduce_max * add arg_max ut
上级
21e1d10f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
57 addition
and
3 deletion
+57
-3
paddle/fluid/operators/arg_max_op_npu.cc
paddle/fluid/operators/arg_max_op_npu.cc
+8
-1
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
+22
-2
python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py
...n/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py
+27
-0
未找到文件。
paddle/fluid/operators/arg_max_op_npu.cc
浏览文件 @
f9e55dee
...
@@ -34,11 +34,18 @@ struct VisitDataArgNPUMaxFunctor {
...
@@ -34,11 +34,18 @@ struct VisitDataArgNPUMaxFunctor {
out
.
template
mutable_data
<
Tout
>(
ctx
.
GetPlace
());
out
.
template
mutable_data
<
Tout
>(
ctx
.
GetPlace
());
auto
axis
=
ctx
.
Attr
<
int64_t
>
(
"axis"
);
auto
axis
=
ctx
.
Attr
<
int64_t
>
(
"axis"
);
auto
dtype
=
ctx
.
Attr
<
int
>
(
"dtype"
);
auto
dtype
=
ctx
.
Attr
<
int
>
(
"dtype"
);
const
bool
&
flatten
=
ctx
.
Attr
<
bool
>
(
"flatten"
);
Tensor
transformed_x
(
x
.
type
());
transformed_x
.
ShareDataWith
(
x
);
if
(
flatten
)
{
transformed_x
.
Resize
(
phi
::
make_ddim
({
x
.
numel
()}));
}
auto
stream
=
ctx
.
template
device_context
<
NPUDeviceContext
>().
stream
();
auto
stream
=
ctx
.
template
device_context
<
NPUDeviceContext
>().
stream
();
NpuOpRunner
runner
;
NpuOpRunner
runner
;
runner
.
SetType
(
"ArgMaxV2"
)
runner
.
SetType
(
"ArgMaxV2"
)
.
AddInput
(
x
)
.
AddInput
(
transformed_
x
)
.
AddInput
(
std
::
vector
<
int64_t
>
{
axis
})
.
AddInput
(
std
::
vector
<
int64_t
>
{
axis
})
.
AddOutput
(
out
)
.
AddOutput
(
out
)
.
AddAttrDataType
(
"dtype"
,
dtype
)
.
AddAttrDataType
(
"dtype"
,
dtype
)
...
...
paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
浏览文件 @
f9e55dee
...
@@ -112,6 +112,8 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
...
@@ -112,6 +112,8 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
reduce_dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
int
in_dtype
=
context
.
Attr
<
int
>
(
"in_dtype"
);
int
in_dtype
=
context
.
Attr
<
int
>
(
"in_dtype"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
...
@@ -129,12 +131,30 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
...
@@ -129,12 +131,30 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
// broadcast
// broadcast
auto
x_dims_vec
=
phi
::
vectorize
(
x
->
dims
());
auto
x_dims_vec
=
phi
::
vectorize
(
x
->
dims
());
if
(
reduce_all
)
{
reduce_dims
.
clear
();
for
(
size_t
d
=
0
;
d
<
x_dims_vec
.
size
();
++
d
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
Tensor
tmp_out
,
tmp_out_grad
;
auto
tmp_out_dims_vec
=
x_dims_vec
;
for
(
auto
d
:
reduce_dims
)
{
tmp_out_dims_vec
[
d
]
=
1
;
}
tmp_out
.
ShareDataWith
(
*
out
);
tmp_out
.
Resize
(
phi
::
make_ddim
(
tmp_out_dims_vec
));
tmp_out_grad
.
ShareDataWith
(
*
out_grad
);
tmp_out_grad
.
Resize
(
phi
::
make_ddim
(
tmp_out_dims_vec
));
Tensor
transformed_out
(
x
->
type
());
Tensor
transformed_out
(
x
->
type
());
transformed_out
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out
.
Resize
(
phi
::
make_ddim
(
x_dims_vec
));
transformed_out
.
mutable_data
<
T
>
(
place
);
transformed_out
.
mutable_data
<
T
>
(
place
);
NpuOpRunner
r_brd_out
;
NpuOpRunner
r_brd_out
;
r_brd_out
.
SetType
(
"BroadcastTo"
)
r_brd_out
.
SetType
(
"BroadcastTo"
)
.
AddInput
(
*
out
)
.
AddInput
(
tmp_
out
)
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddOutput
(
transformed_out
)
.
AddOutput
(
transformed_out
)
.
Run
(
stream
);
.
Run
(
stream
);
...
@@ -143,7 +163,7 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
...
@@ -143,7 +163,7 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel<T> {
transformed_out_grad
.
mutable_data
<
T
>
(
place
);
transformed_out_grad
.
mutable_data
<
T
>
(
place
);
NpuOpRunner
r_brd_out_grad
;
NpuOpRunner
r_brd_out_grad
;
r_brd_out_grad
.
SetType
(
"BroadcastTo"
)
r_brd_out_grad
.
SetType
(
"BroadcastTo"
)
.
AddInput
(
*
out_grad
)
.
AddInput
(
tmp_
out_grad
)
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddInput
(
std
::
move
(
x_dims_vec
))
.
AddOutput
(
transformed_out_grad
)
.
AddOutput
(
transformed_out_grad
)
.
Run
(
stream
);
.
Run
(
stream
);
...
...
python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py
浏览文件 @
f9e55dee
...
@@ -328,5 +328,32 @@ class TestArgMaxAPI_2(unittest.TestCase):
...
@@ -328,5 +328,32 @@ class TestArgMaxAPI_2(unittest.TestCase):
run
(
place
)
run
(
place
)
class
TestArgMaxAPI_3
(
unittest
.
TestCase
):
def
initTestCase
(
self
):
self
.
dims
=
(
1
,
9
)
self
.
dtype
=
'float32'
def
setUp
(
self
):
self
.
initTestCase
()
self
.
__class__
.
use_npu
=
True
self
.
place
=
[
paddle
.
NPUPlace
(
0
)]
def
test_dygraph_api
(
self
):
def
run
(
place
):
paddle
.
disable_static
(
place
)
np
.
random
.
seed
(
2021
)
numpy_input
=
(
np
.
random
.
random
(
self
.
dims
)).
astype
(
self
.
dtype
)
tensor_input
=
paddle
.
to_tensor
(
numpy_input
)
numpy_output
=
np
.
argmax
(
numpy_input
).
reshape
([
1
])
paddle_output
=
paddle
.
argmax
(
tensor_input
)
self
.
assertEqual
(
np
.
allclose
(
numpy_output
,
paddle_output
.
numpy
()),
True
)
self
.
assertEqual
(
numpy_output
.
shape
,
paddle_output
.
numpy
().
shape
)
paddle
.
enable_static
()
for
place
in
self
.
place
:
run
(
place
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录