Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
64f769d4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
64f769d4
编写于
4月 06, 2022
作者:
H
Haohongxiang
提交者:
GitHub
4月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dygraph] Remove unrequired UT cases of DP in eager mode (#41413)
* remove unrequired ut cases * update * fix bugs * update
上级
6f4bd0ea
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
82 addition
and
77 deletion
+82
-77
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
...on/paddle/distributed/fleet/utils/hybrid_parallel_util.py
+36
-4
python/paddle/fluid/dygraph/parallel.py
python/paddle/fluid/dygraph/parallel.py
+23
-11
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
...s/unittests/parallel_dygraph_dataparallel_with_pylayer.py
+20
-2
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
...uid/tests/unittests/test_parallel_dygraph_dataparallel.py
+3
-0
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
.../unittests/test_parallel_dygraph_sparse_embedding_gloo.py
+0
-30
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
...est_parallel_dygraph_sparse_embedding_over_height_gloo.py
+0
-15
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
...tests/unittests/test_parallel_dygraph_transformer_gloo.py
+0
-15
未找到文件。
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
浏览文件 @
64f769d4
...
@@ -20,6 +20,7 @@ from paddle import framework
...
@@ -20,6 +20,7 @@ from paddle import framework
import
paddle
import
paddle
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.dygraph.parallel
import
_split_tensors
,
sync_params_buffers
,
build_groups
from
paddle.fluid.dygraph.parallel
import
_split_tensors
,
sync_params_buffers
,
build_groups
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
.log_util
import
logger
from
.log_util
import
logger
...
@@ -58,6 +59,30 @@ def _apply_collective_grads(parameters, comm_group):
...
@@ -58,6 +59,30 @@ def _apply_collective_grads(parameters, comm_group):
_split_tensors
(
coalesced_grads_and_vars
)
_split_tensors
(
coalesced_grads_and_vars
)
def
_apply_collective_grads_eager
(
parameters
,
comm_group
):
grad_var_set
=
set
()
grad_vars
=
[]
for
param
in
parameters
:
if
param
.
trainable
and
(
param
.
_grad_ivar
()
is
not
None
):
g_var
=
param
.
_grad_ivar
()
assert
not
g_var
.
is_sparse
(
),
"Now, it doesn't support sparse parameters"
grad_vars
.
append
(
g_var
)
assert
g_var
not
in
grad_var_set
grad_var_set
.
add
(
g_var
)
coalesced_grads_and_vars
=
build_groups
(
grad_vars
,
128
*
1024
*
1024
)
div_factor
=
1.0
/
comm_group
.
nranks
for
coalesced_grad
,
_
,
_
in
coalesced_grads_and_vars
:
# need to div nranks
coalesced_grad
.
scale_
(
div_factor
)
paddle
.
distributed
.
all_reduce
(
coalesced_grad
,
group
=
comm_group
)
_split_tensors
(
coalesced_grads_and_vars
)
def
_broadcast_data_help
(
data
,
shape
,
dtype
,
hcg
):
def
_broadcast_data_help
(
data
,
shape
,
dtype
,
hcg
):
model_parallel_group
=
hcg
.
get_model_parallel_group
()
model_parallel_group
=
hcg
.
get_model_parallel_group
()
src_rank
=
hcg
.
get_model_parallel_group_src_rank
()
src_rank
=
hcg
.
get_model_parallel_group_src_rank
()
...
@@ -115,10 +140,17 @@ def broadcast_dp_parameters(model, hcg):
...
@@ -115,10 +140,17 @@ def broadcast_dp_parameters(model, hcg):
def
fused_allreduce_gradients
(
parameter_list
,
hcg
):
def
fused_allreduce_gradients
(
parameter_list
,
hcg
):
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
()
if
_in_legacy_dygraph
():
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
(
)
logger
.
debug
(
"dp start fuse allreduce gradients"
)
logger
.
debug
(
"dp start fuse allreduce gradients"
)
with
framework
.
no_grad
():
with
framework
.
no_grad
():
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
elif
in_dygraph_mode
():
assert
hcg
is
None
,
"It's not support to use hcg in EagerDygraph now."
data_parallel_group
=
paddle
.
distributed
.
collective
.
_get_default_group
()
with
framework
.
no_grad
():
_apply_collective_grads_eager
(
parameter_list
,
data_parallel_group
)
def
sharding_reduce_gradients
(
parameter_list
,
hcg
):
def
sharding_reduce_gradients
(
parameter_list
,
hcg
):
...
...
python/paddle/fluid/dygraph/parallel.py
浏览文件 @
64f769d4
...
@@ -22,6 +22,7 @@ import warnings
...
@@ -22,6 +22,7 @@ import warnings
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
import
paddle
import
paddle
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid
import
framework
from
paddle.fluid
import
framework
from
paddle.fluid.dygraph
import
layers
from
paddle.fluid.dygraph
import
layers
...
@@ -307,6 +308,7 @@ def _reshape_inplace(x, shape):
...
@@ -307,6 +308,7 @@ def _reshape_inplace(x, shape):
@
framework
.
dygraph_only
@
framework
.
dygraph_only
def
_split_tensors
(
coalesced_grads_and_grad_vars
):
def
_split_tensors
(
coalesced_grads_and_grad_vars
):
if
_in_legacy_dygraph
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
framework
.
_dygraph_tracer
().
trace_op
(
framework
.
_dygraph_tracer
().
trace_op
(
...
@@ -318,6 +320,16 @@ def _split_tensors(coalesced_grads_and_grad_vars):
...
@@ -318,6 +320,16 @@ def _split_tensors(coalesced_grads_and_grad_vars):
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
assert
g_var
.
shape
==
g_shape
elif
in_dygraph_mode
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
attrs
=
()
attrs
+=
(
'sections'
,
grad_var_len
)
attrs
+=
(
'axis'
,
0
)
_C_ops
.
split
(
coalesced_grad
,
origin_grad_vars
,
*
attrs
)
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
g_var
.
reshape_
(
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
def
scale_loss
(
loss
):
def
scale_loss
(
loss
):
...
...
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
浏览文件 @
64f769d4
...
@@ -21,7 +21,8 @@ import paddle
...
@@ -21,7 +21,8 @@ import paddle
import
numpy
as
np
import
numpy
as
np
import
paddle.distributed
as
dist
import
paddle.distributed
as
dist
from
paddle.fluid.dygraph.nn
import
Linear
from
paddle.fluid.dygraph.nn
import
Linear
from
paddle.autograd
import
PyLayer
from
paddle.autograd
import
PyLayer
,
EagerPyLayer
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
paddle.distributed.fleet.utils.hybrid_parallel_util
import
fused_allreduce_gradients
from
paddle.distributed.fleet.utils.hybrid_parallel_util
import
fused_allreduce_gradients
batch
=
5
batch
=
5
...
@@ -43,6 +44,20 @@ class cus_tanh(PyLayer):
...
@@ -43,6 +44,20 @@ class cus_tanh(PyLayer):
return
grad
return
grad
class
cus_tanh_eager
(
EagerPyLayer
):
@
staticmethod
def
forward
(
ctx
,
x
):
y
=
paddle
.
tanh
(
x
)
ctx
.
save_for_backward
(
y
)
return
y
@
staticmethod
def
backward
(
ctx
,
dy
):
y
,
=
ctx
.
saved_tensor
()
grad
=
dy
*
(
1
-
paddle
.
square
(
y
))
return
grad
class
SimpleNet
(
paddle
.
nn
.
Layer
):
class
SimpleNet
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
,
train_id
,
model_id
):
def
__init__
(
self
,
train_id
,
model_id
):
super
(
SimpleNet
,
self
).
__init__
()
super
(
SimpleNet
,
self
).
__init__
()
...
@@ -55,6 +70,9 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -55,6 +70,9 @@ class SimpleNet(paddle.nn.Layer):
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
if
self
.
model_id
==
0
:
if
self
.
model_id
==
0
:
if
in_dygraph_mode
():
inputs
=
cus_tanh_eager
.
apply
(
inputs
)
elif
_in_legacy_dygraph
():
inputs
=
cus_tanh
.
apply
(
inputs
)
inputs
=
cus_tanh
.
apply
(
inputs
)
else
:
else
:
inputs
=
self
.
tanh
(
inputs
)
inputs
=
self
.
tanh
(
inputs
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
浏览文件 @
64f769d4
...
@@ -23,6 +23,7 @@ import os
...
@@ -23,6 +23,7 @@ import os
import
subprocess
import
subprocess
from
paddle.distributed.utils
import
find_free_ports
,
watch_local_trainers
,
get_cluster
,
TrainerProc
from
paddle.distributed.utils
import
find_free_ports
,
watch_local_trainers
,
get_cluster
,
TrainerProc
from
paddle.fluid.framework
import
_test_eager_guard
def
get_cluster_from_args
(
selected_gpus
):
def
get_cluster_from_args
(
selected_gpus
):
...
@@ -205,6 +206,8 @@ class TestDataParallelGradientCheck(TestMultipleGpus):
...
@@ -205,6 +206,8 @@ class TestDataParallelGradientCheck(TestMultipleGpus):
class
TestDataParallelWithPyLayer
(
TestMultipleGpus
):
class
TestDataParallelWithPyLayer
(
TestMultipleGpus
):
def
test_parallel_dygraph_dataparallel_with_pylayer
(
self
):
def
test_parallel_dygraph_dataparallel_with_pylayer
(
self
):
with
_test_eager_guard
():
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
浏览文件 @
64f769d4
...
@@ -55,35 +55,5 @@ class TestParallelDygraphSparseEmdeddingFP64_GLOO(TestDistBase):
...
@@ -55,35 +55,5 @@ class TestParallelDygraphSparseEmdeddingFP64_GLOO(TestDistBase):
log_name
=
flag_name
)
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEagerFP64_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding_fp64
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_fp64.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
浏览文件 @
64f769d4
...
@@ -40,20 +40,5 @@ class TestParallelDygraphSparseEmdeddingOverHeight_GLOO(TestDistBase):
...
@@ -40,20 +40,5 @@ class TestParallelDygraphSparseEmdeddingOverHeight_GLOO(TestDistBase):
log_name
=
flag_name
)
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingOverHeightEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_over_height.py"
,
delta
=
1e-7
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
浏览文件 @
64f769d4
...
@@ -57,20 +57,5 @@ class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase):
...
@@ -57,20 +57,5 @@ class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase):
log_name
=
flag_name
)
log_name
=
flag_name
)
class
TestParallelDygraphTransformerEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_transformer
(
self
):
self
.
check_with_place
(
"parallel_dygraph_transformer.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录