Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3e088aaf
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3e088aaf
编写于
11月 25, 2021
作者:
F
furnace
提交者:
GitHub
11月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] add int64 support for argsort op (#37434)
* [NPU] add int64 support for argsort op * [NPU] delete debug codes
上级
1127fecb
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
179 addition
and
32 deletion
+179
-32
paddle/fluid/operators/argsort_op_npu.cc
paddle/fluid/operators/argsort_op_npu.cc
+97
-32
python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py
...n/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py
+82
-0
未找到文件。
paddle/fluid/operators/argsort_op_npu.cc
浏览文件 @
3e088aaf
...
...
@@ -46,6 +46,18 @@ static void CastToInt64(const framework::ExecutionContext& ctx,
.
Run
(
stream
);
}
static
void
CastToFP32
(
const
framework
::
ExecutionContext
&
ctx
,
const
aclrtStream
&
stream
,
const
Tensor
&
in
,
Tensor
*
out
)
{
out
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
NpuOpRunner
runner
;
runner
.
SetType
(
"Cast"
)
.
AddInput
(
in
)
.
AddOutput
(
*
out
)
.
AddAttr
(
"dst_type"
,
ACL_FLOAT
)
.
Run
(
stream
);
}
template
<
typename
T
>
class
ArgsortNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -66,41 +78,91 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
Tensor
indices_tmp
(
framework
::
proto
::
VarType
::
INT32
);
indices_tmp
.
Resize
(
indices
->
dims
());
if
(
axis
==
-
1
||
axis
+
1
==
in_dims
.
size
())
{
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
indices_tmp
.
mutable_data
<
int32_t
>
(
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
*
input
},
{
*
output
,
indices_tmp
},
attr
);
runner
.
Run
(
stream
);
}
else
{
std
::
vector
<
int64_t
>
perm
;
for
(
int64_t
i
=
0
;
i
<
in_dims
.
size
();
i
++
)
{
perm
.
emplace_back
(
i
);
if
(
input
->
type
()
==
framework
::
proto
::
VarType
::
INT64
)
{
Tensor
input_fp32
(
framework
::
proto
::
VarType
::
FP32
);
input_fp32
.
Resize
(
input
->
dims
());
CastToFP32
(
ctx
,
stream
,
*
input
,
&
input_fp32
);
Tensor
output_fp32
(
framework
::
proto
::
VarType
::
FP32
);
output_fp32
.
Resize
(
output
->
dims
());
if
(
axis
==
-
1
||
axis
+
1
==
in_dims
.
size
())
{
output_fp32
.
mutable_data
<
float
>
(
ctx
.
GetPlace
());
indices_tmp
.
mutable_data
<
int32_t
>
(
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
input_fp32
},
{
output_fp32
,
indices_tmp
},
attr
);
runner
.
Run
(
stream
);
CastToInt64
(
ctx
,
stream
,
output_fp32
,
output
);
}
else
{
std
::
vector
<
int64_t
>
perm
;
for
(
int64_t
i
=
0
;
i
<
in_dims
.
size
();
i
++
)
{
perm
.
emplace_back
(
i
);
}
std
::
swap
(
perm
[
axis
],
perm
[
in_dims
.
size
()
-
1
]);
std
::
vector
<
int64_t
>
shape
;
for
(
size_t
i
=
0
;
i
<
perm
.
size
();
i
++
)
{
shape
.
emplace_back
(
in_dims
[
perm
[
i
]]);
}
auto
trans_dims
=
framework
::
make_ddim
(
shape
);
Tensor
trans_input
(
input_fp32
.
type
());
trans_input
.
Resize
(
trans_dims
);
TranposeNPU
<
float
>
(
ctx
,
stream
,
&
perm
,
input_fp32
,
&
trans_input
);
Tensor
trans_output
(
input_fp32
.
type
());
Tensor
trans_indices
(
framework
::
proto
::
VarType
::
INT32
);
trans_output
.
mutable_data
<
float
>
(
trans_dims
,
ctx
.
GetPlace
());
trans_indices
.
mutable_data
<
int32_t
>
(
trans_dims
,
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
trans_input
},
{
trans_output
,
trans_indices
},
attr
);
runner
.
Run
(
stream
);
TranposeNPU
<
float
>
(
ctx
,
stream
,
&
perm
,
trans_output
,
&
output_fp32
);
TranposeNPU
<
int32_t
>
(
ctx
,
stream
,
&
perm
,
trans_indices
,
&
indices_tmp
);
CastToInt64
(
ctx
,
stream
,
output_fp32
,
output
);
}
std
::
swap
(
perm
[
axis
],
perm
[
in_dims
.
size
()
-
1
]);
std
::
vector
<
int64_t
>
shape
;
for
(
size_t
i
=
0
;
i
<
perm
.
size
();
i
++
)
{
shape
.
emplace_back
(
in_dims
[
perm
[
i
]]);
}
else
{
if
(
axis
==
-
1
||
axis
+
1
==
in_dims
.
size
())
{
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
indices_tmp
.
mutable_data
<
int32_t
>
(
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
*
input
},
{
*
output
,
indices_tmp
},
attr
);
runner
.
Run
(
stream
);
}
else
{
std
::
vector
<
int64_t
>
perm
;
for
(
int64_t
i
=
0
;
i
<
in_dims
.
size
();
i
++
)
{
perm
.
emplace_back
(
i
);
}
std
::
swap
(
perm
[
axis
],
perm
[
in_dims
.
size
()
-
1
]);
std
::
vector
<
int64_t
>
shape
;
for
(
size_t
i
=
0
;
i
<
perm
.
size
();
i
++
)
{
shape
.
emplace_back
(
in_dims
[
perm
[
i
]]);
}
auto
trans_dims
=
framework
::
make_ddim
(
shape
);
Tensor
trans_input
(
input
->
type
());
trans_input
.
Resize
(
trans_dims
);
TranposeNPU
<
T
>
(
ctx
,
stream
,
&
perm
,
*
input
,
&
trans_input
);
Tensor
trans_output
(
input
->
type
());
Tensor
trans_indices
(
framework
::
proto
::
VarType
::
INT32
);
trans_output
.
mutable_data
<
T
>
(
trans_dims
,
ctx
.
GetPlace
());
trans_indices
.
mutable_data
<
int32_t
>
(
trans_dims
,
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
trans_input
},
{
trans_output
,
trans_indices
},
attr
);
runner
.
Run
(
stream
);
TranposeNPU
<
T
>
(
ctx
,
stream
,
&
perm
,
trans_output
,
output
);
TranposeNPU
<
int32_t
>
(
ctx
,
stream
,
&
perm
,
trans_indices
,
&
indices_tmp
);
}
auto
trans_dims
=
framework
::
make_ddim
(
shape
);
Tensor
trans_input
(
input
->
type
());
trans_input
.
Resize
(
trans_dims
);
TranposeNPU
<
T
>
(
ctx
,
stream
,
&
perm
,
*
input
,
&
trans_input
);
Tensor
trans_output
(
input
->
type
());
Tensor
trans_indices
(
framework
::
proto
::
VarType
::
INT32
);
trans_output
.
mutable_data
<
T
>
(
trans_dims
,
ctx
.
GetPlace
());
trans_indices
.
mutable_data
<
int32_t
>
(
trans_dims
,
ctx
.
GetPlace
());
const
auto
&
runner
=
NpuOpRunner
(
"Sort"
,
{
trans_input
},
{
trans_output
,
trans_indices
},
attr
);
runner
.
Run
(
stream
);
TranposeNPU
<
T
>
(
ctx
,
stream
,
&
perm
,
trans_output
,
output
);
TranposeNPU
<
int32_t
>
(
ctx
,
stream
,
&
perm
,
trans_indices
,
&
indices_tmp
);
}
CastToInt64
(
ctx
,
stream
,
indices_tmp
,
indices
);
}
};
...
...
@@ -208,6 +270,9 @@ namespace ops = paddle::operators;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
argsort
,
ops
::
ArgsortNPUKernel
<
float
>
,
#ifdef PADDLE_WITH_ASCEND_INT64
ops
::
ArgsortNPUKernel
<
int64_t
>
,
#endif
ops
::
ArgsortNPUKernel
<
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
argsort_grad
,
ops
::
ArgsortGradNPUKernel
<
float
>
,
...
...
python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py
浏览文件 @
3e088aaf
...
...
@@ -209,5 +209,87 @@ class TestArgsortOpDescendingAxisNeg2NPUFP32(TestArgsortOpAxisNeg2NPUFP32):
self
.
descending
=
True
# test cases for int64
class
TestArgsortOpAxis0NPUINT64
(
TestArgsortOp
):
def
setUp
(
self
):
self
.
set_npu
()
self
.
op_type
=
"argsort"
self
.
place
=
paddle
.
NPUPlace
(
0
)
self
.
init_dtype
()
self
.
init_inputshape
()
self
.
init_axis
()
self
.
init_direction
()
self
.
x
=
np
.
random
.
randint
(
low
=-
100
,
high
=
100
,
size
=
self
.
input_shape
,
dtype
=
self
.
dtype
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
"X"
:
self
.
x
}
self
.
attrs
=
{
"axis"
:
self
.
axis
,
"descending"
:
self
.
descending
}
self
.
get_output
()
self
.
outputs
=
{
"Out"
:
self
.
sorted_x
,
"Indices"
:
self
.
indices
}
def
init_axis
(
self
):
self
.
axis
=
0
def
init_dtype
(
self
):
self
.
dtype
=
np
.
int64
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
,
atol
=
1e-2
)
def
set_npu
(
self
):
self
.
__class__
.
use_npu
=
True
class
TestArgsortOpAxis1NPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_axis
(
self
):
self
.
axis
=
1
class
TestArgsortOpAxis2NPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_axis
(
self
):
self
.
axis
=
2
class
TestArgsortOpAxisNeg1NPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_axis
(
self
):
self
.
axis
=
-
1
class
TestArgsortOpAxisNeg2NPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_axis
(
self
):
self
.
axis
=
-
2
class
TestArgsortOpDescendingAxisNPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
class
TestArgsortOpDescendingAxis0NPUINT64
(
TestArgsortOpAxis0NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
class
TestArgsortOpDescendingAxis1NPUINT64
(
TestArgsortOpAxis1NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
class
TestArgsortOpDescendingAxis2NPUINT64
(
TestArgsortOpAxis2NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
class
TestArgsortOpDescendingAxisNeg1NPUINT64
(
TestArgsortOpAxisNeg1NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
class
TestArgsortOpDescendingAxisNeg2NPUINT64
(
TestArgsortOpAxisNeg2NPUINT64
):
def
init_direction
(
self
):
self
.
descending
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录