Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c5232b4b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c5232b4b
编写于
5月 11, 2022
作者:
H
Haohongxiang
提交者:
GitHub
5月 11, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dygraph] Support diff batch for sparse of EagerReducer (#42646)
* support diff batch for sparse of eagerreducer * fix
上级
7b828f71
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
57 addition
and
4 deletion
+57
-4
paddle/fluid/distributed/collective/reducer.cc
paddle/fluid/distributed/collective/reducer.cc
+55
-4
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+2
-0
未找到文件。
paddle/fluid/distributed/collective/reducer.cc
浏览文件 @
c5232b4b
...
@@ -901,6 +901,9 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -901,6 +901,9 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
Tensor
src_value_tensor
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
src
->
value
()));
std
::
vector
<
int64_t
>
dst_shape
=
src_value_tensor
.
shape
();
if
(
std
::
all_of
(
cpu_rows_num_ptr
,
cpu_rows_num_ptr
+
size_
,
if
(
std
::
all_of
(
cpu_rows_num_ptr
,
cpu_rows_num_ptr
+
size_
,
[
&
](
int64_t
row
)
{
return
row
==
cpu_rows_num_ptr
[
0
];
}))
{
[
&
](
int64_t
row
)
{
return
row
==
cpu_rows_num_ptr
[
0
];
}))
{
// During sparse communication, the number of each card is same.
// During sparse communication, the number of each card is same.
...
@@ -940,8 +943,6 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -940,8 +943,6 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
&
dst_rows_vector
);
&
dst_rows_vector
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
Tensor
src_value_tensor
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
src
->
value
()));
std
::
vector
<
int64_t
>
dst_shape
=
src_value_tensor
.
shape
();
dst_shape
[
dst_shape
.
size
()
-
2
]
=
rows_num
;
dst_shape
[
dst_shape
.
size
()
-
2
]
=
rows_num
;
auto
dst_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
auto
dst_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
paddle
::
experimental
::
full
(
IntArray
(
dst_shape
),
0
,
paddle
::
experimental
::
full
(
IntArray
(
dst_shape
),
0
,
...
@@ -971,8 +972,58 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -971,8 +972,58 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
*
(
src
->
mutable_value
())
=
*
(
src
->
mutable_value
())
=
*
(
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_value_tensor
.
impl
()));
*
(
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_value_tensor
.
impl
()));
}
else
{
}
else
{
PADDLE_THROW
(
std
::
vector
<
Tensor
>
rows_tensors
;
platform
::
errors
::
Unimplemented
(
"This case is not supported."
));
std
::
vector
<
Tensor
>
values_tensors
;
for
(
int
i
=
0
;
i
<
size_
;
++
i
)
{
std
::
vector
<
int64_t
>
value_tensor_shape
=
{
cpu_rows_num_ptr
[
i
],
dst_shape
[
dst_shape
.
size
()
-
1
]};
Tensor
rows_tensor
=
paddle
::
experimental
::
full
(
IntArray
({
static_cast
<
int64_t
>
(
cpu_rows_num_ptr
[
i
])}),
0
,
DataType
::
INT64
,
inner_place_
);
Tensor
values_tensor
=
paddle
::
experimental
::
full
(
IntArray
(
value_tensor_shape
),
0
,
src
->
value
().
dtype
(),
inner_place_
);
std
::
vector
<
phi
::
DenseTensor
>
rows_dense_vector
;
std
::
vector
<
phi
::
DenseTensor
>
values_dense_vector
;
if
(
i
==
rank_
)
{
auto
*
rows_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
rows_tensor
.
impl
())
.
get
();
framework
::
TensorFromVector
<
int64_t
>
(
src_rows
,
*
dev_ctx
,
rows_dense_tensor
);
values_tensor
.
set_impl
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
src
->
value
()));
}
rows_dense_vector
.
push_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
rows_tensor
.
impl
()));
values_dense_vector
.
push_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
values_tensor
.
impl
()));
auto
b_opts
=
BroadcastOptions
();
b_opts
.
source_rank
=
i
;
process_group_
->
Broadcast
(
rows_dense_vector
,
rows_dense_vector
,
b_opts
);
process_group_
->
Broadcast
(
values_dense_vector
,
values_dense_vector
,
b_opts
)
->
Wait
();
rows_tensors
.
push_back
(
rows_tensor
);
values_tensors
.
push_back
(
values_tensor
);
}
Tensor
dst_rows_tensor
=
paddle
::
experimental
::
concat
(
rows_tensors
,
phi
::
Scalar
(
0
));
framework
::
Vector
<
int64_t
>
dst_rows_vector
(
rows_num
,
0
);
auto
*
dst_rows_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_rows_tensor
.
impl
())
.
get
();
framework
::
TensorToVector
<
int64_t
>
(
*
dst_rows_dense_tensor
,
*
dev_ctx
,
&
dst_rows_vector
);
src
->
set_rows
(
dst_rows_vector
);
Tensor
dst_values_tensor
=
paddle
::
experimental
::
concat
(
values_tensors
,
phi
::
Scalar
(
0
));
*
(
src
->
mutable_value
())
=
*
(
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_values_tensor
.
impl
()));
}
}
}
}
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
c5232b4b
...
@@ -1461,6 +1461,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -1461,6 +1461,7 @@ class TestDistBase(unittest.TestCase):
need_envs
=
{},
need_envs
=
{},
log_name
=
""
):
log_name
=
""
):
if
self
.
_dygraph
and
(
self
.
_gloo_mode
or
self
.
_nccl2_mode
):
if
self
.
_dygraph
and
(
self
.
_gloo_mode
or
self
.
_nccl2_mode
):
need_envs
.
update
({
"FLAGS_enable_eager_mode"
:
"1"
})
with
_test_eager_guard
():
with
_test_eager_guard
():
self
.
check_with_place_func
(
self
.
check_with_place_func
(
model_file
=
model_file
,
model_file
=
model_file
,
...
@@ -1468,6 +1469,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -1468,6 +1469,7 @@ class TestDistBase(unittest.TestCase):
check_error_log
=
check_error_log
,
check_error_log
=
check_error_log
,
need_envs
=
need_envs
,
need_envs
=
need_envs
,
log_name
=
log_name
)
log_name
=
log_name
)
need_envs
.
update
({
"FLAGS_enable_eager_mode"
:
"0"
})
self
.
check_with_place_func
(
self
.
check_with_place_func
(
model_file
=
model_file
,
model_file
=
model_file
,
delta
=
delta
,
delta
=
delta
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录