Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
22ec915c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
22ec915c
编写于
12月 05, 2022
作者:
R
Roc
提交者:
GitHub
12月 05, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[0D Tensor]support 0d tensor for dist.scatter and dist.broadcast (#48638)
上级
35ebf2b4
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
80 addition
and
2 deletion
+80
-2
paddle/phi/infermeta/multiary.cc
paddle/phi/infermeta/multiary.cc
+2
-1
paddle/phi/kernels/funcs/concat_funcs.h
paddle/phi/kernels/funcs/concat_funcs.h
+2
-1
paddle/phi/kernels/gpu/concat_kernel.cu
paddle/phi/kernels/gpu/concat_kernel.cu
+29
-0
python/paddle/fluid/tests/unittests/collective/process_group_nccl.py
...le/fluid/tests/unittests/collective/process_group_nccl.py
+47
-0
未找到文件。
paddle/phi/infermeta/multiary.cc
浏览文件 @
22ec915c
...
...
@@ -911,13 +911,14 @@ void ConcatInferMeta(const std::vector<const MetaTensor*>& x,
// 1. calculate axis
int
rank
=
x
.
at
(
0
)
->
dims
().
size
();
PADDLE_ENFORCE_EQ
(
axis
>=
-
rank
&&
axis
<
rank
,
!
rank
||
(
axis
>=
-
rank
&&
axis
<
rank
)
,
true
,
phi
::
errors
::
InvalidArgument
(
"The axis is expected to be in range of [%d, %d), but got %d"
,
-
rank
,
rank
,
axis
));
axis
=
rank
?
axis
:
0
;
if
(
axis
<
0
)
{
axis
=
axis
+
rank
;
}
...
...
paddle/phi/kernels/funcs/concat_funcs.h
浏览文件 @
22ec915c
...
...
@@ -21,13 +21,14 @@ namespace funcs {
static
inline
int64_t
ComputeAxis
(
int64_t
axis
,
int64_t
rank
)
{
PADDLE_ENFORCE_EQ
(
axis
>=
-
rank
&&
axis
<
rank
,
!
rank
||
(
axis
>=
-
rank
&&
axis
<
rank
)
,
true
,
phi
::
errors
::
InvalidArgument
(
"The axis is expected to be in range of [%d, %d), but got %d"
,
-
rank
,
rank
,
axis
));
axis
=
rank
?
axis
:
0
;
if
(
axis
<
0
)
{
axis
=
axis
+
rank
;
}
...
...
paddle/phi/kernels/gpu/concat_kernel.cu
浏览文件 @
22ec915c
...
...
@@ -34,6 +34,35 @@ void ConcatKernel(const Context& dev_ctx,
DenseTensor
*
out
)
{
int64_t
axis
=
axis_scalar
.
to
<
int64_t
>
();
if
(
UNLIKELY
(
x
[
0
]
->
dims
().
size
()
==
0
))
{
// for dims is 0 specially
phi
::
DDim
tmp_1dim
,
out_dims
;
out_dims
[
0
]
=
x
.
size
();
tmp_1dim
[
0
]
=
1
;
out
->
Resize
(
out_dims
);
dev_ctx
.
template
Alloc
<
T
>(
out
);
size_t
output_offset
=
0
;
for
(
auto
*
in
:
x
)
{
if
(
in
->
numel
()
==
0UL
)
{
continue
;
}
auto
in_stride
=
phi
::
stride_numel
(
tmp_1dim
);
auto
out_stride
=
phi
::
stride_numel
(
out
->
dims
());
paddle
::
operators
::
StridedNumelCopyWithAxis
<
T
>
(
dev_ctx
,
axis
,
out
->
data
<
T
>
()
+
output_offset
,
out_stride
,
in
->
data
<
T
>
(),
in_stride
,
in_stride
[
axis
]);
output_offset
+=
in_stride
[
axis
];
}
return
;
}
axis
=
phi
::
funcs
::
ComputeAxis
(
axis
,
x
[
0
]
->
dims
().
size
());
std
::
vector
<
phi
::
DDim
>
x_dims
;
...
...
python/paddle/fluid/tests/unittests/collective/process_group_nccl.py
浏览文件 @
22ec915c
...
...
@@ -167,6 +167,29 @@ class TestProcessGroupFp32(unittest.TestCase):
print
(
"test broadcast api ok"
)
# test broadcast with shape=[]
# rank 0
x
=
np
.
random
.
random
([]).
astype
(
self
.
dtype
)
tensor_x
=
paddle
.
to_tensor
(
x
)
# rank 1
y
=
np
.
random
.
random
([]).
astype
(
self
.
dtype
)
tensor_y
=
paddle
.
to_tensor
(
y
)
broadcast_result
=
paddle
.
assign
(
tensor_x
)
if
pg
.
rank
()
==
0
:
task
=
dist
.
broadcast
(
tensor_x
,
0
,
sync_op
=
False
)
task
.
synchronize
()
paddle
.
device
.
cuda
.
synchronize
()
assert
task
.
is_completed
()
assert
np
.
array_equal
(
broadcast_result
,
tensor_x
)
else
:
task
=
dist
.
broadcast
(
tensor_y
,
0
)
paddle
.
device
.
cuda
.
synchronize
()
assert
np
.
array_equal
(
broadcast_result
,
tensor_y
)
assert
tensor_y
.
shape
==
[]
print
(
"test broadcast api with shape=[] ok"
)
# test barrier
# rank 0
if
pg
.
rank
()
==
0
:
...
...
@@ -417,6 +440,30 @@ class TestProcessGroupFp32(unittest.TestCase):
assert
np
.
array_equal
(
tensor_y
,
out2
)
print
(
"test scatter api ok
\n
"
)
# test Scatter with shape=[]
# rank 0
x
=
np
.
random
.
random
([]).
astype
(
self
.
dtype
)
y
=
np
.
random
.
random
([]).
astype
(
self
.
dtype
)
tensor_x
=
paddle
.
to_tensor
(
x
)
tensor_y
=
paddle
.
to_tensor
(
y
)
if
pg
.
rank
()
==
0
:
in_1
,
in_2
=
tensor_x
,
tensor_x
+
1
task
=
dist
.
scatter
(
tensor_y
,
[
in_1
,
in_2
],
0
,
sync_op
=
True
)
paddle
.
device
.
cuda
.
synchronize
()
# rank 1
else
:
task
=
dist
.
scatter
(
tensor_y
,
[],
0
,
sync_op
=
True
)
task
.
wait
()
paddle
.
device
.
cuda
.
synchronize
()
out1
=
paddle
.
assign
(
tensor_x
)
out2
=
paddle
.
assign
(
tensor_x
+
1
)
if
pg
.
rank
()
==
0
:
assert
np
.
array_equal
(
tensor_y
,
out1
)
else
:
assert
np
.
array_equal
(
tensor_y
,
out2
),
f
"
{
tensor_y
}
,
{
out2
}
"
assert
tensor_y
.
shape
==
[]
print
(
"test scatter api with shape=[] ok
\n
"
)
# test send min
# rank 0
x
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录