Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7d46d9f9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7d46d9f9
编写于
1月 11, 2023
作者:
W
Wen Sun
提交者:
GitHub
1月 11, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor: rm fluid deps in fleet (#49724)
上级
b53888e7
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
27 addition
and
29 deletion
+27
-29
python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py
...dle/distributed/fleet/meta_parallel/meta_parallel_base.py
+2
-2
python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
...tributed/fleet/meta_parallel/parallel_layers/pp_layers.py
+13
-14
python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
...ddle/distributed/fleet/meta_parallel/pipeline_parallel.py
+3
-5
python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
...ributed/fleet/meta_parallel/pp_utils/p2p_communication.py
+9
-8
未找到文件。
python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py
浏览文件 @
7d46d9f9
...
@@ -12,12 +12,12 @@
...
@@ -12,12 +12,12 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddle.fluid.dygraph.layers
import
Layer
import
paddle.nn
as
nn
__all__
=
[]
__all__
=
[]
class
MetaParallelBase
(
Layer
):
class
MetaParallelBase
(
nn
.
Layer
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
super
().
__init__
(
layers
.
full_name
()
+
"_meta_parallel_base"
)
super
().
__init__
(
layers
.
full_name
()
+
"_meta_parallel_base"
)
self
.
_layers
=
layers
self
.
_layers
=
layers
...
...
python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
浏览文件 @
7d46d9f9
...
@@ -45,8 +45,8 @@ import re
...
@@ -45,8 +45,8 @@ import re
from
functools
import
partial
from
functools
import
partial
import
paddle
import
paddle
from
paddle.fluid.dygraph.layers
import
Layer
import
paddle.framework
as
framework
from
paddle.fluid.framework
import
in_dygraph_mode
import
paddle.nn
as
nn
from
paddle.incubate.distributed.fleet
import
recompute_hybrid
from
paddle.incubate.distributed.fleet
import
recompute_hybrid
from
...utils.log_util
import
layer_to_str
,
logger
from
...utils.log_util
import
layer_to_str
,
logger
...
@@ -60,7 +60,7 @@ class LayerDesc:
...
@@ -60,7 +60,7 @@ class LayerDesc:
self
.
inputs
=
inputs
self
.
inputs
=
inputs
self
.
kwargs
=
kwargs
self
.
kwargs
=
kwargs
if
not
issubclass
(
layer_func
,
Layer
):
if
not
issubclass
(
layer_func
,
nn
.
Layer
):
raise
TypeError
(
raise
TypeError
(
"The input(layer_func) should be a derived class of Layer."
"The input(layer_func) should be a derived class of Layer."
)
)
...
@@ -151,7 +151,7 @@ class SegmentLayers:
...
@@ -151,7 +151,7 @@ class SegmentLayers:
regex
=
re
.
compile
(
layername
,
re
.
IGNORECASE
)
regex
=
re
.
compile
(
layername
,
re
.
IGNORECASE
)
for
idx
,
layer
in
enumerate
(
self
.
_layers_desc
):
for
idx
,
layer
in
enumerate
(
self
.
_layers_desc
):
name
=
None
name
=
None
if
isinstance
(
layer
,
Layer
):
if
isinstance
(
layer
,
nn
.
Layer
):
name
=
layer
.
__class__
.
__name__
name
=
layer
.
__class__
.
__name__
elif
isinstance
(
layer
,
LayerDesc
):
elif
isinstance
(
layer
,
LayerDesc
):
name
=
layer
.
layer_func
.
__name__
name
=
layer
.
layer_func
.
__name__
...
@@ -180,7 +180,7 @@ class SegmentLayers:
...
@@ -180,7 +180,7 @@ class SegmentLayers:
return
result
return
result
class
PipelineLayerChunk
(
Layer
):
class
PipelineLayerChunk
(
nn
.
Layer
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
run_function
=
[]
self
.
run_function
=
[]
...
@@ -189,7 +189,7 @@ class PipelineLayerChunk(Layer):
...
@@ -189,7 +189,7 @@ class PipelineLayerChunk(Layer):
# This method is used to unify codes in _build_layer_impl.
# This method is used to unify codes in _build_layer_impl.
# For 1f1b scheduler, it will call append method of a List.
# For 1f1b scheduler, it will call append method of a List.
# For interleave scheduler, it will call append method of this class.
# For interleave scheduler, it will call append method of this class.
if
isinstance
(
sublayer
,
Layer
):
if
isinstance
(
sublayer
,
nn
.
Layer
):
self
.
add_sublayer
(
str
(
len
(
self
.
run_function
)),
sublayer
)
self
.
add_sublayer
(
str
(
len
(
self
.
run_function
)),
sublayer
)
self
.
run_function
.
append
(
sublayer
)
self
.
run_function
.
append
(
sublayer
)
...
@@ -206,7 +206,7 @@ class PipelineLayerChunk(Layer):
...
@@ -206,7 +206,7 @@ class PipelineLayerChunk(Layer):
)
)
class
PipelineLayer
(
Layer
):
class
PipelineLayer
(
nn
.
Layer
):
"""PipelineLayer
"""PipelineLayer
Args:
Args:
layers(Iterable): A sequence of layers description to define the structure for pipeline.
layers(Iterable): A sequence of layers description to define the structure for pipeline.
...
@@ -220,9 +220,8 @@ class PipelineLayer(Layer):
...
@@ -220,9 +220,8 @@ class PipelineLayer(Layer):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
import paddle.nn as nn
import paddle.nn as nn
from paddle.distributed import fleet
from paddle.fluid.dygraph.layers import Layer
import paddle.nn.functional as F
import paddle.nn.functional as F
from paddle.distributed import fleet
from paddle.distributed.fleet.meta_parallel import LayerDesc, PipelineLayer
from paddle.distributed.fleet.meta_parallel import LayerDesc, PipelineLayer
pipeline_parallel_size = 2
pipeline_parallel_size = 2
...
@@ -241,7 +240,7 @@ class PipelineLayer(Layer):
...
@@ -241,7 +240,7 @@ class PipelineLayer(Layer):
hcg = fleet.get_hybrid_communicate_group()
hcg = fleet.get_hybrid_communicate_group()
class ReshapeHelp(Layer):
class ReshapeHelp(
nn.
Layer):
def __init__(self, shape):
def __init__(self, shape):
super().__init__()
super().__init__()
self.shape = shape
self.shape = shape
...
@@ -500,14 +499,14 @@ class PipelineLayer(Layer):
...
@@ -500,14 +499,14 @@ class PipelineLayer(Layer):
for
key
,
comm
in
self
.
shared_comm
.
items
():
for
key
,
comm
in
self
.
shared_comm
.
items
():
param
=
getattr
(
self
.
shared_layers
[
key
],
comm
[
'weight_attr'
])
param
=
getattr
(
self
.
shared_layers
[
key
],
comm
[
'weight_attr'
])
# need use trace_op to allreduce weight
# need use trace_op to allreduce weight
if
in_dygraph_mode
():
if
framework
.
in_dygraph_mode
():
with
paddle
.
framework
.
no_grad
():
with
paddle
.
framework
.
no_grad
():
paddle
.
distributed
.
all_reduce
(
paddle
.
distributed
.
all_reduce
(
param
.
grad
,
group
=
comm
[
'group'
]
param
.
grad
,
group
=
comm
[
'group'
]
)
)
else
:
else
:
with
paddle
.
framework
.
no_grad
():
with
paddle
.
framework
.
no_grad
():
paddle
.
fluid
.
framework
.
_dygraph_tracer
().
trace_op
(
framework
.
_dygraph_tracer
().
trace_op
(
type
=
"c_allreduce_sum"
,
type
=
"c_allreduce_sum"
,
inputs
=
{
'X'
:
param
.
_grad_ivar
()},
inputs
=
{
'X'
:
param
.
_grad_ivar
()},
outputs
=
{
'Out'
:
param
.
_grad_ivar
()},
outputs
=
{
'Out'
:
param
.
_grad_ivar
()},
...
@@ -627,7 +626,7 @@ class PipelineLayer(Layer):
...
@@ -627,7 +626,7 @@ class PipelineLayer(Layer):
for
index
,
layer
in
enumerate
(
self
.
_layers_desc
[
start
:
end
]):
for
index
,
layer
in
enumerate
(
self
.
_layers_desc
[
start
:
end
]):
layer_index
=
start
+
index
layer_index
=
start
+
index
if
isinstance
(
layer
,
Layer
):
if
isinstance
(
layer
,
nn
.
Layer
):
run_function
.
append
(
layer
)
run_function
.
append
(
layer
)
if
self
.
_num_virtual_pipeline_stages
==
1
:
if
self
.
_num_virtual_pipeline_stages
==
1
:
# Only add sublayer for 1f1b scheduler,
# Only add sublayer for 1f1b scheduler,
...
@@ -729,7 +728,7 @@ class PipelineLayer(Layer):
...
@@ -729,7 +728,7 @@ class PipelineLayer(Layer):
):
):
return
False
return
False
params
=
[
f
.
parameters
()
for
f
in
funcs
if
isinstance
(
f
,
Layer
)]
params
=
[
f
.
parameters
()
for
f
in
funcs
if
isinstance
(
f
,
nn
.
Layer
)]
return
any
(
len
(
list
(
p
))
>
0
for
p
in
params
)
return
any
(
len
(
list
(
p
))
>
0
for
p
in
params
)
def
save_state_dict
(
self
,
path
):
def
save_state_dict
(
self
,
path
):
...
...
python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
浏览文件 @
7d46d9f9
...
@@ -12,9 +12,7 @@
...
@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.framework
as
framework
import
paddle.fluid.core
as
core
import
paddle.fluid.framework
as
framework
from
..meta_optimizers.dygraph_optimizer
import
HybridParallelOptimizer
from
..meta_optimizers.dygraph_optimizer
import
HybridParallelOptimizer
from
..utils.hybrid_parallel_util
import
(
from
..utils.hybrid_parallel_util
import
(
...
@@ -208,7 +206,7 @@ class PipelineParallel(MetaParallelBase):
...
@@ -208,7 +206,7 @@ class PipelineParallel(MetaParallelBase):
),
'optimizer should be HybridParallelOptimizer subclass.'
),
'optimizer should be HybridParallelOptimizer subclass.'
assert
(
assert
(
f
luid
.
f
ramework
.
_dygraph_tracer
().
_has_grad
framework
.
_dygraph_tracer
().
_has_grad
),
'Please enable the generation of gradients.'
),
'Please enable the generation of gradients.'
if
self
.
is_pipeline_first_stage
(
if
self
.
is_pipeline_first_stage
(
...
@@ -308,7 +306,7 @@ class PipelineParallel(MetaParallelBase):
...
@@ -308,7 +306,7 @@ class PipelineParallel(MetaParallelBase):
labels
=
self
.
_load_micro_batch
(
self
.
micro_batch_id
)
labels
=
self
.
_load_micro_batch
(
self
.
micro_batch_id
)
output_tensor
=
self
.
_layers
.
_loss_fn
(
output_tensor
,
labels
)
output_tensor
=
self
.
_layers
.
_loss_fn
(
output_tensor
,
labels
)
assert
isinstance
(
assert
isinstance
(
output_tensor
,
(
paddle
.
Tensor
,
core
.
eager
.
Tensor
)
output_tensor
,
(
paddle
.
Tensor
,
framework
.
core
.
eager
.
Tensor
)
),
"Currently, loss_fn should obtain Paddle.Tensor dtype"
),
"Currently, loss_fn should obtain Paddle.Tensor dtype"
with
paddle
.
amp
.
auto_cast
(
enable
=
False
):
with
paddle
.
amp
.
auto_cast
(
enable
=
False
):
...
...
python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
浏览文件 @
7d46d9f9
...
@@ -15,8 +15,7 @@
...
@@ -15,8 +15,7 @@
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
paddle.fluid.core
as
core
import
paddle.framework
as
framework
from
paddle.fluid.framework
import
in_dygraph_mode
from
...utils.log_util
import
logger
from
...utils.log_util
import
logger
from
.utils
import
number_2_dtype
,
paddle_2_number
from
.utils
import
number_2_dtype
,
paddle_2_number
...
@@ -138,7 +137,7 @@ class SendRecvMeta:
...
@@ -138,7 +137,7 @@ class SendRecvMeta:
def
send_meta
(
self
,
tensor
,
group
):
def
send_meta
(
self
,
tensor
,
group
):
dst_rank
=
_hcg
.
_get_p2p_next_rank
()
dst_rank
=
_hcg
.
_get_p2p_next_rank
()
if
isinstance
(
tensor
,
(
paddle
.
Tensor
,
core
.
eager
.
Tensor
)):
if
isinstance
(
tensor
,
(
paddle
.
Tensor
,
framework
.
core
.
eager
.
Tensor
)):
tensor_type
=
paddle
.
to_tensor
([
0
])
tensor_type
=
paddle
.
to_tensor
([
0
])
# send tensor type
# send tensor type
paddle
.
distributed
.
send
(
tensor_type
,
dst
=
dst_rank
,
group
=
group
)
paddle
.
distributed
.
send
(
tensor_type
,
dst
=
dst_rank
,
group
=
group
)
...
@@ -153,11 +152,13 @@ class SendRecvMeta:
...
@@ -153,11 +152,13 @@ class SendRecvMeta:
paddle
.
distributed
.
send
(
nums
,
dst
=
dst_rank
,
group
=
group
)
paddle
.
distributed
.
send
(
nums
,
dst
=
dst_rank
,
group
=
group
)
for
d
in
tensor
:
for
d
in
tensor
:
assert
isinstance
(
d
,
(
paddle
.
Tensor
,
core
.
eager
.
Tensor
))
assert
isinstance
(
d
,
(
paddle
.
Tensor
,
framework
.
core
.
eager
.
Tensor
)
)
self
.
_send_dims_shape_dtype
(
d
,
group
=
group
)
self
.
_send_dims_shape_dtype
(
d
,
group
=
group
)
def
set_send_message
(
self
,
tensor
):
def
set_send_message
(
self
,
tensor
):
if
isinstance
(
tensor
,
(
paddle
.
Tensor
,
core
.
eager
.
Tensor
)):
if
isinstance
(
tensor
,
(
paddle
.
Tensor
,
framework
.
core
.
eager
.
Tensor
)):
self
.
send_shape_message
=
tensor
.
shape
self
.
send_shape_message
=
tensor
.
shape
self
.
send_dtype_message
=
paddle_2_number
(
tensor
.
dtype
)
self
.
send_dtype_message
=
paddle_2_number
(
tensor
.
dtype
)
elif
isinstance
(
tensor
,
tuple
):
elif
isinstance
(
tensor
,
tuple
):
...
@@ -188,7 +189,7 @@ def _partial_send_op(
...
@@ -188,7 +189,7 @@ def _partial_send_op(
tensor
,
group
,
use_calc_stream
,
ring_id
,
dst
,
nranks
,
rank_id
tensor
,
group
,
use_calc_stream
,
ring_id
,
dst
,
nranks
,
rank_id
):
):
dst_rank_in_group
=
dst
if
group
is
None
else
group
.
get_group_rank
(
dst
)
dst_rank_in_group
=
dst
if
group
is
None
else
group
.
get_group_rank
(
dst
)
if
in_dygraph_mode
():
if
framework
.
in_dygraph_mode
():
group
=
(
group
=
(
paddle
.
distributed
.
collective
.
_get_default_group
()
paddle
.
distributed
.
collective
.
_get_default_group
()
if
group
is
None
if
group
is
None
...
@@ -259,7 +260,7 @@ def recv_partial(
...
@@ -259,7 +260,7 @@ def recv_partial(
else
:
else
:
if
use_calc_stream
:
if
use_calc_stream
:
recv_op
=
paddle
.
distributed
.
recv
recv_op
=
paddle
.
distributed
.
recv
elif
in_dygraph_mode
():
elif
framework
.
in_dygraph_mode
():
recv_op
=
paddle
.
distributed
.
irecv
recv_op
=
paddle
.
distributed
.
irecv
return
recv_op
(
tensor
.
detach
(),
src
=
src_rank
,
group
=
group
)
return
recv_op
(
tensor
.
detach
(),
src
=
src_rank
,
group
=
group
)
...
@@ -480,7 +481,7 @@ def _p2p_helper(
...
@@ -480,7 +481,7 @@ def _p2p_helper(
tasks
.
append
(
task
)
tasks
.
append
(
task
)
if
not
sync_recv
:
if
not
sync_recv
:
if
in_dygraph_mode
():
if
framework
.
in_dygraph_mode
():
# wait irecv tasks in eager dygraph mode with new comm library
# wait irecv tasks in eager dygraph mode with new comm library
for
task
in
tasks
:
for
task
in
tasks
:
assert
task
is
not
None
assert
task
is
not
None
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录