Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5cd2bfec
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5cd2bfec
编写于
3月 11, 2021
作者:
W
WangXi
提交者:
sandyhouse
3月 22, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add optimize_offload config
上级
47042a97
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
16 addition
and
11 deletion
+16
-11
paddle/fluid/framework/distributed_strategy.proto
paddle/fluid/framework/distributed_strategy.proto
+2
-1
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+14
-10
未找到文件。
paddle/fluid/framework/distributed_strategy.proto
浏览文件 @
5cd2bfec
...
@@ -38,7 +38,8 @@ message ShardingConfig {
...
@@ -38,7 +38,8 @@ message ShardingConfig {
optional
int32
acc_steps
=
7
[
default
=
1
];
optional
int32
acc_steps
=
7
[
default
=
1
];
optional
int32
schedule_mode
=
8
[
default
=
0
];
optional
int32
schedule_mode
=
8
[
default
=
0
];
optional
int32
pp_bz
=
9
[
default
=
1
];
optional
int32
pp_bz
=
9
[
default
=
1
];
optional
bool
pp_allreduce_in_optimize
=
10
[
default
=
true
];
optional
bool
pp_allreduce_in_optimize
=
10
[
default
=
false
];
optional
bool
optimize_offload
=
11
[
default
=
false
];
}
}
message
AMPConfig
{
message
AMPConfig
{
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
5cd2bfec
...
@@ -103,6 +103,8 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -103,6 +103,8 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
pp_bz
=
self
.
user_defined_strategy
.
sharding_configs
[
"pp_bz"
]
self
.
pp_bz
=
self
.
user_defined_strategy
.
sharding_configs
[
"pp_bz"
]
self
.
pp_allreduce_in_optimize
=
self
.
user_defined_strategy
.
sharding_configs
[
self
.
pp_allreduce_in_optimize
=
self
.
user_defined_strategy
.
sharding_configs
[
"pp_allreduce_in_optimize"
]
"pp_allreduce_in_optimize"
]
self
.
optimize_offload
=
self
.
user_defined_strategy
.
sharding_configs
[
"optimize_offload"
]
if
self
.
inner_opt
is
None
:
if
self
.
inner_opt
is
None
:
raise
ValueError
(
raise
ValueError
(
...
@@ -359,8 +361,10 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -359,8 +361,10 @@ class ShardingOptimizer(MetaOptimizerBase):
main_block
.
_sync_with_cpp
()
main_block
.
_sync_with_cpp
()
# TODO(wangxi): add optimize offload
# TODO(wangxi): add optimize offload
offload_helper
=
OffloadHelper
()
if
self
.
optimize_offload
:
offload_helper
.
offload
(
main_block
,
startup_block
)
logging
.
info
(
"Sharding with optimize offload !"
)
offload_helper
=
OffloadHelper
()
offload_helper
.
offload
(
main_block
,
startup_block
)
with
open
(
"start_sharding_%d"
%
self
.
role_maker
.
_worker_index
(),
with
open
(
"start_sharding_%d"
%
self
.
role_maker
.
_worker_index
(),
'w'
)
as
f
:
'w'
)
as
f
:
...
@@ -943,9 +947,8 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -943,9 +947,8 @@ class ShardingOptimizer(MetaOptimizerBase):
]
]
self
.
pp_group_size
=
self
.
pipeline_nodes
self
.
pp_group_size
=
self
.
pipeline_nodes
self
.
pp_group_endpoints
=
[
self
.
pp_group_endpoints
=
[
ep
for
idx
,
ep
in
enumerate
(
self
.
endpoints
)
ep
for
idx
,
ep
in
enumerate
(
self
.
endpoints
)
if
if
(
idx
%
self
.
sharding_group_size
(
idx
%
self
.
sharding_group_size
)
==
self
.
sharding_rank
)
==
self
.
sharding_rank
]
]
else
:
else
:
self
.
mp_group_id
=
0
self
.
mp_group_id
=
0
...
@@ -969,11 +972,12 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -969,11 +972,12 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
_inner_parallelism_size
*
self
.
sharding_group_size
)
self
.
_inner_parallelism_size
*
self
.
sharding_group_size
)
self
.
megatron_rank
=
self
.
global_rank
%
self
.
_inner_parallelism_size
self
.
megatron_rank
=
self
.
global_rank
%
self
.
_inner_parallelism_size
self
.
sharding_group_endpoints
=
[
self
.
sharding_group_endpoints
=
[
ep
for
idx
,
ep
in
enumerate
(
self
.
endpoints
)
ep
for
idx
,
ep
in
enumerate
(
self
.
endpoints
)
if
if
(
idx
//
(
self
.
_inner_parallelism_size
*
(
idx
//
self
.
sharding_group_size
)
(
self
.
_inner_parallelism_size
*
)
==
self
.
sharding_group_id
and
idx
%
self
.
sharding_group_size
))
==
self
.
sharding_group_id
self
.
_inner_parallelism_size
==
self
.
megatron_rank
and
idx
%
self
.
_inner_parallelism_size
==
self
.
megatron_rank
]
]
print
(
"sharding_endpoint:"
,
self
.
sharding_group_endpoints
)
print
(
"sharding_endpoint:"
,
self
.
sharding_group_endpoints
)
print
(
"sharding_rank:"
,
self
.
sharding_rank
)
print
(
"sharding_rank:"
,
self
.
sharding_rank
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录