Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
64f769d4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
64f769d4
编写于
4月 06, 2022
作者:
H
Haohongxiang
提交者:
GitHub
4月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dygraph] Remove unrequired UT cases of DP in eager mode (#41413)
* remove unrequired ut cases * update * fix bugs * update
上级
6f4bd0ea
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
82 addition
and
77 deletion
+82
-77
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
...on/paddle/distributed/fleet/utils/hybrid_parallel_util.py
+36
-4
python/paddle/fluid/dygraph/parallel.py
python/paddle/fluid/dygraph/parallel.py
+23
-11
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
...s/unittests/parallel_dygraph_dataparallel_with_pylayer.py
+20
-2
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
...uid/tests/unittests/test_parallel_dygraph_dataparallel.py
+3
-0
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
.../unittests/test_parallel_dygraph_sparse_embedding_gloo.py
+0
-30
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
...est_parallel_dygraph_sparse_embedding_over_height_gloo.py
+0
-15
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
...tests/unittests/test_parallel_dygraph_transformer_gloo.py
+0
-15
未找到文件。
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
浏览文件 @
64f769d4
...
...
@@ -20,6 +20,7 @@ from paddle import framework
import
paddle
from
paddle.fluid
import
core
from
paddle.fluid.dygraph.parallel
import
_split_tensors
,
sync_params_buffers
,
build_groups
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
collections
import
OrderedDict
from
.log_util
import
logger
...
...
@@ -58,6 +59,30 @@ def _apply_collective_grads(parameters, comm_group):
_split_tensors
(
coalesced_grads_and_vars
)
def
_apply_collective_grads_eager
(
parameters
,
comm_group
):
grad_var_set
=
set
()
grad_vars
=
[]
for
param
in
parameters
:
if
param
.
trainable
and
(
param
.
_grad_ivar
()
is
not
None
):
g_var
=
param
.
_grad_ivar
()
assert
not
g_var
.
is_sparse
(
),
"Now, it doesn't support sparse parameters"
grad_vars
.
append
(
g_var
)
assert
g_var
not
in
grad_var_set
grad_var_set
.
add
(
g_var
)
coalesced_grads_and_vars
=
build_groups
(
grad_vars
,
128
*
1024
*
1024
)
div_factor
=
1.0
/
comm_group
.
nranks
for
coalesced_grad
,
_
,
_
in
coalesced_grads_and_vars
:
# need to div nranks
coalesced_grad
.
scale_
(
div_factor
)
paddle
.
distributed
.
all_reduce
(
coalesced_grad
,
group
=
comm_group
)
_split_tensors
(
coalesced_grads_and_vars
)
def
_broadcast_data_help
(
data
,
shape
,
dtype
,
hcg
):
model_parallel_group
=
hcg
.
get_model_parallel_group
()
src_rank
=
hcg
.
get_model_parallel_group_src_rank
()
...
...
@@ -115,10 +140,17 @@ def broadcast_dp_parameters(model, hcg):
def
fused_allreduce_gradients
(
parameter_list
,
hcg
):
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
()
logger
.
debug
(
"dp start fuse allreduce gradients"
)
with
framework
.
no_grad
():
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
if
_in_legacy_dygraph
():
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
(
)
logger
.
debug
(
"dp start fuse allreduce gradients"
)
with
framework
.
no_grad
():
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
elif
in_dygraph_mode
():
assert
hcg
is
None
,
"It's not support to use hcg in EagerDygraph now."
data_parallel_group
=
paddle
.
distributed
.
collective
.
_get_default_group
()
with
framework
.
no_grad
():
_apply_collective_grads_eager
(
parameter_list
,
data_parallel_group
)
def
sharding_reduce_gradients
(
parameter_list
,
hcg
):
...
...
python/paddle/fluid/dygraph/parallel.py
浏览文件 @
64f769d4
...
...
@@ -22,6 +22,7 @@ import warnings
from
contextlib
import
contextmanager
import
paddle
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
framework
from
paddle.fluid.dygraph
import
layers
...
...
@@ -307,17 +308,28 @@ def _reshape_inplace(x, shape):
@
framework
.
dygraph_only
def
_split_tensors
(
coalesced_grads_and_grad_vars
):
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
framework
.
_dygraph_tracer
().
trace_op
(
type
=
'split'
,
inputs
=
{
'X'
:
coalesced_grad
},
outputs
=
{
'Out'
:
origin_grad_vars
},
attrs
=
{
'sections'
:
grad_var_len
,
'axis'
:
0
})
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
if
_in_legacy_dygraph
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
framework
.
_dygraph_tracer
().
trace_op
(
type
=
'split'
,
inputs
=
{
'X'
:
coalesced_grad
},
outputs
=
{
'Out'
:
origin_grad_vars
},
attrs
=
{
'sections'
:
grad_var_len
,
'axis'
:
0
})
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
elif
in_dygraph_mode
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
attrs
=
()
attrs
+=
(
'sections'
,
grad_var_len
)
attrs
+=
(
'axis'
,
0
)
_C_ops
.
split
(
coalesced_grad
,
origin_grad_vars
,
*
attrs
)
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
g_var
.
reshape_
(
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
def
scale_loss
(
loss
):
...
...
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
浏览文件 @
64f769d4
...
...
@@ -21,7 +21,8 @@ import paddle
import
numpy
as
np
import
paddle.distributed
as
dist
from
paddle.fluid.dygraph.nn
import
Linear
from
paddle.autograd
import
PyLayer
from
paddle.autograd
import
PyLayer
,
EagerPyLayer
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
paddle.distributed.fleet.utils.hybrid_parallel_util
import
fused_allreduce_gradients
batch
=
5
...
...
@@ -43,6 +44,20 @@ class cus_tanh(PyLayer):
return
grad
class
cus_tanh_eager
(
EagerPyLayer
):
@
staticmethod
def
forward
(
ctx
,
x
):
y
=
paddle
.
tanh
(
x
)
ctx
.
save_for_backward
(
y
)
return
y
@
staticmethod
def
backward
(
ctx
,
dy
):
y
,
=
ctx
.
saved_tensor
()
grad
=
dy
*
(
1
-
paddle
.
square
(
y
))
return
grad
class
SimpleNet
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
,
train_id
,
model_id
):
super
(
SimpleNet
,
self
).
__init__
()
...
...
@@ -55,7 +70,10 @@ class SimpleNet(paddle.nn.Layer):
def
forward
(
self
,
inputs
):
if
self
.
model_id
==
0
:
inputs
=
cus_tanh
.
apply
(
inputs
)
if
in_dygraph_mode
():
inputs
=
cus_tanh_eager
.
apply
(
inputs
)
elif
_in_legacy_dygraph
():
inputs
=
cus_tanh
.
apply
(
inputs
)
else
:
inputs
=
self
.
tanh
(
inputs
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
浏览文件 @
64f769d4
...
...
@@ -23,6 +23,7 @@ import os
import
subprocess
from
paddle.distributed.utils
import
find_free_ports
,
watch_local_trainers
,
get_cluster
,
TrainerProc
from
paddle.fluid.framework
import
_test_eager_guard
def
get_cluster_from_args
(
selected_gpus
):
...
...
@@ -205,6 +206,8 @@ class TestDataParallelGradientCheck(TestMultipleGpus):
class
TestDataParallelWithPyLayer
(
TestMultipleGpus
):
def
test_parallel_dygraph_dataparallel_with_pylayer
(
self
):
with
_test_eager_guard
():
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
浏览文件 @
64f769d4
...
...
@@ -55,35 +55,5 @@ class TestParallelDygraphSparseEmdeddingFP64_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEagerFP64_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding_fp64
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_fp64.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
浏览文件 @
64f769d4
...
...
@@ -40,20 +40,5 @@ class TestParallelDygraphSparseEmdeddingOverHeight_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingOverHeightEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_over_height.py"
,
delta
=
1e-7
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
浏览文件 @
64f769d4
...
...
@@ -57,20 +57,5 @@ class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphTransformerEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_transformer
(
self
):
self
.
check_with_place
(
"parallel_dygraph_transformer.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录