Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
64f769d4
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
64f769d4
编写于
4月 06, 2022
作者:
H
Haohongxiang
提交者:
GitHub
4月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dygraph] Remove unrequired UT cases of DP in eager mode (#41413)
* remove unrequired ut cases * update * fix bugs * update
上级
6f4bd0ea
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
82 addition
and
77 deletion
+82
-77
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
...on/paddle/distributed/fleet/utils/hybrid_parallel_util.py
+36
-4
python/paddle/fluid/dygraph/parallel.py
python/paddle/fluid/dygraph/parallel.py
+23
-11
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
...s/unittests/parallel_dygraph_dataparallel_with_pylayer.py
+20
-2
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
...uid/tests/unittests/test_parallel_dygraph_dataparallel.py
+3
-0
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
.../unittests/test_parallel_dygraph_sparse_embedding_gloo.py
+0
-30
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
...est_parallel_dygraph_sparse_embedding_over_height_gloo.py
+0
-15
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
...tests/unittests/test_parallel_dygraph_transformer_gloo.py
+0
-15
未找到文件。
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
浏览文件 @
64f769d4
...
...
@@ -20,6 +20,7 @@ from paddle import framework
import
paddle
from
paddle.fluid
import
core
from
paddle.fluid.dygraph.parallel
import
_split_tensors
,
sync_params_buffers
,
build_groups
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
collections
import
OrderedDict
from
.log_util
import
logger
...
...
@@ -58,6 +59,30 @@ def _apply_collective_grads(parameters, comm_group):
_split_tensors
(
coalesced_grads_and_vars
)
def
_apply_collective_grads_eager
(
parameters
,
comm_group
):
grad_var_set
=
set
()
grad_vars
=
[]
for
param
in
parameters
:
if
param
.
trainable
and
(
param
.
_grad_ivar
()
is
not
None
):
g_var
=
param
.
_grad_ivar
()
assert
not
g_var
.
is_sparse
(
),
"Now, it doesn't support sparse parameters"
grad_vars
.
append
(
g_var
)
assert
g_var
not
in
grad_var_set
grad_var_set
.
add
(
g_var
)
coalesced_grads_and_vars
=
build_groups
(
grad_vars
,
128
*
1024
*
1024
)
div_factor
=
1.0
/
comm_group
.
nranks
for
coalesced_grad
,
_
,
_
in
coalesced_grads_and_vars
:
# need to div nranks
coalesced_grad
.
scale_
(
div_factor
)
paddle
.
distributed
.
all_reduce
(
coalesced_grad
,
group
=
comm_group
)
_split_tensors
(
coalesced_grads_and_vars
)
def
_broadcast_data_help
(
data
,
shape
,
dtype
,
hcg
):
model_parallel_group
=
hcg
.
get_model_parallel_group
()
src_rank
=
hcg
.
get_model_parallel_group_src_rank
()
...
...
@@ -115,10 +140,17 @@ def broadcast_dp_parameters(model, hcg):
def
fused_allreduce_gradients
(
parameter_list
,
hcg
):
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
()
logger
.
debug
(
"dp start fuse allreduce gradients"
)
with
framework
.
no_grad
():
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
if
_in_legacy_dygraph
():
data_parallel_group
=
None
if
hcg
is
None
else
hcg
.
get_data_parallel_group
(
)
logger
.
debug
(
"dp start fuse allreduce gradients"
)
with
framework
.
no_grad
():
_apply_collective_grads
(
parameter_list
,
data_parallel_group
)
elif
in_dygraph_mode
():
assert
hcg
is
None
,
"It's not support to use hcg in EagerDygraph now."
data_parallel_group
=
paddle
.
distributed
.
collective
.
_get_default_group
()
with
framework
.
no_grad
():
_apply_collective_grads_eager
(
parameter_list
,
data_parallel_group
)
def
sharding_reduce_gradients
(
parameter_list
,
hcg
):
...
...
python/paddle/fluid/dygraph/parallel.py
浏览文件 @
64f769d4
...
...
@@ -22,6 +22,7 @@ import warnings
from
contextlib
import
contextmanager
import
paddle
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
framework
from
paddle.fluid.dygraph
import
layers
...
...
@@ -307,17 +308,28 @@ def _reshape_inplace(x, shape):
@
framework
.
dygraph_only
def
_split_tensors
(
coalesced_grads_and_grad_vars
):
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
framework
.
_dygraph_tracer
().
trace_op
(
type
=
'split'
,
inputs
=
{
'X'
:
coalesced_grad
},
outputs
=
{
'Out'
:
origin_grad_vars
},
attrs
=
{
'sections'
:
grad_var_len
,
'axis'
:
0
})
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
if
_in_legacy_dygraph
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
framework
.
_dygraph_tracer
().
trace_op
(
type
=
'split'
,
inputs
=
{
'X'
:
coalesced_grad
},
outputs
=
{
'Out'
:
origin_grad_vars
},
attrs
=
{
'sections'
:
grad_var_len
,
'axis'
:
0
})
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
_reshape_inplace
(
x
=
g_var
,
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
elif
in_dygraph_mode
():
for
coalesced_grad
,
origin_grad_vars
,
grad_shapes
in
coalesced_grads_and_grad_vars
:
grad_var_len
=
[
np
.
prod
(
g_shape
)
for
g_shape
in
grad_shapes
]
attrs
=
()
attrs
+=
(
'sections'
,
grad_var_len
)
attrs
+=
(
'axis'
,
0
)
_C_ops
.
split
(
coalesced_grad
,
origin_grad_vars
,
*
attrs
)
for
g_var
,
g_shape
in
zip
(
origin_grad_vars
,
grad_shapes
):
g_var
.
reshape_
(
shape
=
g_shape
)
assert
g_var
.
shape
==
g_shape
def
scale_loss
(
loss
):
...
...
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py
浏览文件 @
64f769d4
...
...
@@ -21,7 +21,8 @@ import paddle
import
numpy
as
np
import
paddle.distributed
as
dist
from
paddle.fluid.dygraph.nn
import
Linear
from
paddle.autograd
import
PyLayer
from
paddle.autograd
import
PyLayer
,
EagerPyLayer
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
paddle.distributed.fleet.utils.hybrid_parallel_util
import
fused_allreduce_gradients
batch
=
5
...
...
@@ -43,6 +44,20 @@ class cus_tanh(PyLayer):
return
grad
class
cus_tanh_eager
(
EagerPyLayer
):
@
staticmethod
def
forward
(
ctx
,
x
):
y
=
paddle
.
tanh
(
x
)
ctx
.
save_for_backward
(
y
)
return
y
@
staticmethod
def
backward
(
ctx
,
dy
):
y
,
=
ctx
.
saved_tensor
()
grad
=
dy
*
(
1
-
paddle
.
square
(
y
))
return
grad
class
SimpleNet
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
,
train_id
,
model_id
):
super
(
SimpleNet
,
self
).
__init__
()
...
...
@@ -55,7 +70,10 @@ class SimpleNet(paddle.nn.Layer):
def
forward
(
self
,
inputs
):
if
self
.
model_id
==
0
:
inputs
=
cus_tanh
.
apply
(
inputs
)
if
in_dygraph_mode
():
inputs
=
cus_tanh_eager
.
apply
(
inputs
)
elif
_in_legacy_dygraph
():
inputs
=
cus_tanh
.
apply
(
inputs
)
else
:
inputs
=
self
.
tanh
(
inputs
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
浏览文件 @
64f769d4
...
...
@@ -23,6 +23,7 @@ import os
import
subprocess
from
paddle.distributed.utils
import
find_free_ports
,
watch_local_trainers
,
get_cluster
,
TrainerProc
from
paddle.fluid.framework
import
_test_eager_guard
def
get_cluster_from_args
(
selected_gpus
):
...
...
@@ -205,6 +206,8 @@ class TestDataParallelGradientCheck(TestMultipleGpus):
class
TestDataParallelWithPyLayer
(
TestMultipleGpus
):
def
test_parallel_dygraph_dataparallel_with_pylayer
(
self
):
with
_test_eager_guard
():
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
self
.
run_mnist_2gpu
(
'parallel_dygraph_dataparallel_with_pylayer.py'
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py
浏览文件 @
64f769d4
...
...
@@ -55,35 +55,5 @@ class TestParallelDygraphSparseEmdeddingFP64_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingEagerFP64_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding_fp64
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_fp64.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py
浏览文件 @
64f769d4
...
...
@@ -40,20 +40,5 @@ class TestParallelDygraphSparseEmdeddingOverHeight_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphSparseEmdeddingOverHeightEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_sparse_embedding
(
self
):
self
.
check_with_place
(
"parallel_dygraph_sparse_embedding_over_height.py"
,
delta
=
1e-7
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
浏览文件 @
64f769d4
...
...
@@ -57,20 +57,5 @@ class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase):
log_name
=
flag_name
)
class
TestParallelDygraphTransformerEager_GLOO
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_eager_mode
=
True
self
.
_gloo_mode
=
True
self
.
_dygraph
=
True
def
test_transformer
(
self
):
self
.
check_with_place
(
"parallel_dygraph_transformer.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录