Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
SummerGao.
Paddle
提交
689de12c
P
Paddle
项目概览
SummerGao.
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
689de12c
编写于
2月 20, 2023
作者:
H
houj04
提交者:
GitHub
2月 20, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] add fp16 support for top_k_v2, squeeze2 and argsort. (#50614)
上级
1c8e15c9
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
124 addition
and
74 deletion
+124
-74
paddle/phi/backends/xpu/xpu2_op_list.cc
paddle/phi/backends/xpu/xpu2_op_list.cc
+4
-1
paddle/phi/kernels/xpu/argsort_kernel.cc
paddle/phi/kernels/xpu/argsort_kernel.cc
+34
-23
paddle/phi/kernels/xpu/top_k_kernel.cc
paddle/phi/kernels/xpu/top_k_kernel.cc
+32
-24
python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
...n/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
+3
-1
python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
.../paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
+51
-25
未找到文件。
paddle/phi/backends/xpu/xpu2_op_list.cc
浏览文件 @
689de12c
...
...
@@ -40,6 +40,7 @@ XPUOpMap& get_kl2_ops() {
{
"argsort"
,
XPUKernelSet
({
phi
::
DataType
::
INT32
,
phi
::
DataType
::
INT64
,
phi
::
DataType
::
FLOAT16
,
phi
::
DataType
::
FLOAT32
})},
{
"assign"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
,
...
...
@@ -598,6 +599,7 @@ XPUOpMap& get_kl2_ops() {
phi
::
DataType
::
BOOL
,
phi
::
DataType
::
INT8
,
phi
::
DataType
::
UINT8
,
phi
::
DataType
::
FLOAT16
,
phi
::
DataType
::
FLOAT32
})},
{
"squeeze"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT64
,
...
...
@@ -665,7 +667,8 @@ XPUOpMap& get_kl2_ops() {
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
,
phi
::
DataType
::
FLOAT16
})},
{
"truncated_gaussian_random"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
{
"top_k"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
,
phi
::
DataType
::
FLOAT16
})},
{
"top_k_v2"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
{
"top_k_v2"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
,
phi
::
DataType
::
FLOAT16
})},
{
"update_loss_scaling"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
,
phi
::
DataType
::
FLOAT16
})},
{
"unbind"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
...
...
paddle/phi/kernels/xpu/argsort_kernel.cc
浏览文件 @
689de12c
...
...
@@ -207,34 +207,45 @@ void ArgsortKernel(const Context& dev_ctx,
}
}
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
if
(
int64_need_cast
)
{
XPUArgsort
<
T
,
true
,
true
>
()(
dev_ctx
.
x_context
(),
input_data
,
output_data
,
indices_data
,
data_shape
,
permute_vec
,
descending
);
XPUArgsort
<
XPUType
,
true
,
true
>
()(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
input_data
),
reinterpret_cast
<
XPUType
*>
(
output_data
),
indices_data
,
data_shape
,
permute_vec
,
descending
);
}
else
if
(
index_need_cast
)
{
XPUArgsort
<
T
,
false
,
true
>
()(
dev_ctx
.
x_context
(),
input_data
,
output_data
,
indices_data
,
data_shape
,
permute_vec
,
descending
);
XPUArgsort
<
XPUType
,
false
,
true
>
()(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
input_data
),
reinterpret_cast
<
XPUType
*>
(
output_data
),
indices_data
,
data_shape
,
permute_vec
,
descending
);
}
else
{
XPUArgsort
<
T
,
false
,
false
>
()(
dev_ctx
.
x_context
(),
input_data
,
output_data
,
indices_data
,
data_shape
,
permute_vec
,
descending
);
XPUArgsort
<
XPUType
,
false
,
false
>
()(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
input_data
),
reinterpret_cast
<
XPUType
*>
(
output_data
),
indices_data
,
data_shape
,
permute_vec
,
descending
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
argsort
,
XPU
,
ALL_LAYOUT
,
phi
::
ArgsortKernel
,
float
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
argsort
,
XPU
,
ALL_LAYOUT
,
phi
::
ArgsortKernel
,
float
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/xpu/top_k_kernel.cc
浏览文件 @
689de12c
...
...
@@ -28,6 +28,8 @@ void TopkKernel(const Context& dev_ctx,
bool
sorted
,
DenseTensor
*
out
,
DenseTensor
*
indices
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
const
auto
&
in_dims
=
x
.
dims
();
const
T
*
in_data
=
x
.
data
<
T
>
();
int64_t
*
indices_data
=
dev_ctx
.
template
Alloc
<
int64_t
>(
indices
);
...
...
@@ -59,13 +61,13 @@ void TopkKernel(const Context& dev_ctx,
const
size_t
row
=
phi
::
product
(
phi
::
slice_ddim
(
in_dims
,
0
,
in_dims
.
size
()
-
1
));
const
size_t
col
=
in_dims
[
in_dims
.
size
()
-
1
];
int
r
=
xpu
::
sorted_topk
<
T
>
(
dev_ctx
.
x_context
(),
in_data
,
output_data
,
indices_int_data
,
row
,
col
,
k
);
int
r
=
xpu
::
sorted_topk
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
in_data
)
,
reinterpret_cast
<
XPUType
*>
(
output_data
)
,
indices_int_data
,
row
,
col
,
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
...
...
@@ -97,11 +99,14 @@ void TopkKernel(const Context& dev_ctx,
}
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
T
*
trans_in_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
T
>
(
x
.
numel
());
XPUType
*
trans_in_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
x
.
numel
());
// Transpose and save interval output to trans_in
int
r
=
xpu
::
transpose
<
T
>
(
dev_ctx
.
x_context
(),
in_data
,
trans_in_data
,
x_shape_host
,
trans_axes
);
int
r
=
xpu
::
transpose
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
in_data
),
trans_in_data
,
x_shape_host
,
trans_axes
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
errors
::
External
(
"XPU API 1st Transpose kernel"
...
...
@@ -109,7 +114,7 @@ void TopkKernel(const Context& dev_ctx,
r
,
XPUAPIErrorMsg
[
r
]));
T
*
trans_out_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
T
>
(
out
->
numel
());
XPUType
*
trans_out_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
out
->
numel
());
int64_t
*
trans_idx_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
out
->
numel
());
int32_t
*
trans_idx_int32_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
out
->
numel
());
...
...
@@ -118,13 +123,14 @@ void TopkKernel(const Context& dev_ctx,
const
size_t
col
=
trans_dims
[
trans_dims
.
size
()
-
1
];
// Do top k on transposed input
r
=
xpu
::
sorted_topk
<
T
>
(
dev_ctx
.
x_context
(),
trans_in_data
,
trans_out_data
,
trans_idx_int32_data
,
row
,
col
,
k
);
r
=
xpu
::
sorted_topk
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
trans_in_data
),
reinterpret_cast
<
XPUType
*>
(
trans_out_data
),
trans_idx_int32_data
,
row
,
col
,
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
...
...
@@ -146,11 +152,12 @@ void TopkKernel(const Context& dev_ctx,
for
(
size_t
i
=
0
;
i
<
trans_back_axes
.
size
();
++
i
)
{
trans_out_shape_host
[
i
]
=
trans_out_dims
[
i
];
}
r
=
xpu
::
transpose
<
T
>
(
dev_ctx
.
x_context
(),
trans_out_data
,
output_data
,
trans_out_shape_host
,
trans_back_axes
);
r
=
xpu
::
transpose
<
XPUType
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
trans_out_data
),
reinterpret_cast
<
XPUType
*>
(
output_data
),
trans_out_shape_host
,
trans_back_axes
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
errors
::
External
(
"XPU API 2nd Transpose kernel"
...
...
@@ -173,4 +180,5 @@ void TopkKernel(const Context& dev_ctx,
}
// namespace phi
PD_REGISTER_KERNEL
(
topk
,
XPU
,
ALL_LAYOUT
,
phi
::
TopkKernel
,
float
)
{}
PD_REGISTER_KERNEL
(
topk
,
XPU
,
ALL_LAYOUT
,
phi
::
TopkKernel
,
float
,
phi
::
dtype
::
float16
)
{}
python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
浏览文件 @
689de12c
...
...
@@ -185,7 +185,9 @@ class XPUTestArgsortOp_LargeN(XPUOpTestWrapper):
support_types
=
get_xpu_op_support_types
(
'argsort'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestArgsortOp
,
stype
)
create_test_class
(
globals
(),
XPUTestArgsortOp_LargeN
,
stype
)
if
stype
!=
"float16"
:
# skip fp16 test on LARGE input because unstable sort on low-precision fp16 will lead to test failure
create_test_class
(
globals
(),
XPUTestArgsortOp_LargeN
,
stype
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
浏览文件 @
689de12c
...
...
@@ -30,6 +30,20 @@ import paddle
paddle
.
enable_static
()
def
random_unique_float
(
shape
,
dtype
):
# create a random float array with 10x length
numel
=
np
.
prod
(
shape
)
arr
=
np
.
random
.
uniform
(
-
10.0
,
10.0
,
numel
*
10
).
astype
(
dtype
)
arr
=
np
.
unique
(
arr
)
assert
(
arr
.
shape
[
0
]
>=
numel
),
"failed to create enough unique values: %d vs %d"
%
(
arr
.
shape
[
0
],
numel
)
arr
=
arr
[:
numel
]
np
.
random
.
shuffle
(
arr
)
arr
=
arr
.
reshape
(
shape
)
return
arr
def
numpy_topk
(
x
,
k
=
1
,
axis
=-
1
,
largest
=
True
):
if
axis
<
0
:
axis
=
len
(
x
.
shape
)
+
axis
...
...
@@ -52,16 +66,14 @@ class XPUTestTopKV2Op(XPUOpTestWrapper):
self
.
use_dynamic_create_class
=
False
class
TestTopkOp
(
XPUOpTest
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
20
).
astype
(
self
.
dtype
)
def
setUp
(
self
):
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
op_type
=
"top_k_v2"
self
.
init_args
()
self
.
dtype
=
self
.
in_type
self
.
init_args
()
self
.
input_data
=
random_unique_float
(
self
.
input_data_shape
,
self
.
dtype
)
self
.
inputs
=
{
'X'
:
self
.
input_data
}
self
.
attrs
=
{
'k'
:
self
.
k
,
...
...
@@ -74,98 +86,112 @@ class XPUTestTopKV2Op(XPUOpTestWrapper):
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
)
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad
(
set
([
'X'
]),
'Out'
)
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data_shape
=
(
10
,
20
)
class
TestTopkOp1
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
100
,
155
).
astype
(
self
.
dtype
)
# too many values for fp16 will lead to failure in random_unique_float function
if
self
.
dtype
==
np
.
float16
:
self
.
input_data_shape
=
(
100
,
55
)
else
:
self
.
input_data_shape
=
(
100
,
155
)
class
TestTopkOp2
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp3
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
5
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp4
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
1
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp5
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
2
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp6
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
5
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
8
,
32
,
64
).
astype
(
self
.
dtype
)
# too many values for fp16 will lead to failure in random_unique_float function
if
self
.
dtype
==
np
.
float16
:
self
.
input_data_shape
=
(
8
,
32
,
32
)
else
:
self
.
input_data_shape
=
(
8
,
32
,
64
)
class
TestTopkOp7
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
10
self
.
axis
=
2
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
8
,
5
,
10
,
16
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
8
,
5
,
10
,
16
)
class
TestTopkOp8
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
1
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
8
,
32
,
64
).
astype
(
self
.
dtype
)
# too many values for fp16 will lead to failure in random_unique_float function
if
self
.
dtype
==
np
.
float16
:
self
.
input_data_shape
=
(
8
,
32
,
32
)
else
:
self
.
input_data_shape
=
(
8
,
32
,
64
)
class
TestTopkOp9
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp10
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
3
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp11
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
5
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
class
TestTopkOp12
(
TestTopkOp
):
def
init_args
(
self
):
self
.
k
=
1
self
.
axis
=
1
self
.
largest
=
True
self
.
input_data
=
np
.
random
.
rand
(
10
,
10
,
5
).
astype
(
self
.
dtype
)
self
.
input_data
_shape
=
(
10
,
10
,
5
)
support_types
=
get_xpu_op_support_types
(
'top_k_v2'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录