Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0bb079cd
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0bb079cd
编写于
5月 06, 2021
作者:
F
Feiyu Chan
提交者:
GitHub
5月 06, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
avoid polluting logging's root logger (#32673) (#32706)
avoid polluting logging's root logger
上级
a9d330a3
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
64 addition
and
52 deletion
+64
-52
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+46
-43
python/paddle/distributed/fleet/utils/recompute.py
python/paddle/distributed/fleet/utils/recompute.py
+7
-4
python/paddle/fluid/incubate/fleet/utils/utils.py
python/paddle/fluid/incubate/fleet/utils/utils.py
+5
-2
python/paddle/utils/cpp_extension/extension_utils.py
python/paddle/utils/cpp_extension/extension_utils.py
+6
-3
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
0bb079cd
...
...
@@ -29,9 +29,12 @@ from paddle.fluid.framework import Program, Variable, name_scope, default_main_p
from
paddle.fluid
import
layers
import
logging
logging
.
basicConfig
(
format
=
'%(asctime)s %(levelname)-8s %(message)s'
,
datefmt
=
'%Y-%m-%d %H:%M:%S'
)
logger
=
logging
.
getLogger
(
__name__
)
formatter
=
logging
.
Formatter
(
fmt
=
'%(asctime)s %(levelname)-8s %(message)s'
,
datefmt
=
'%Y-%m-%d %H:%M:%S'
)
ch
=
logging
.
StreamHandler
()
ch
.
setFormatter
(
formatter
)
logger
.
addHandler
(
ch
)
from
functools
import
reduce
__all__
=
[
"ShardingOptimizer"
]
...
...
@@ -136,7 +139,7 @@ class ShardingOptimizer(MetaOptimizerBase):
# FIXME (JZ-LIANG) deprecated hybrid_dp
if
self
.
user_defined_strategy
.
sharding_configs
[
"hybrid_dp"
]:
logg
ing
.
warning
(
logg
er
.
warning
(
"[hybrid_dp] API setting is deprecated. Now when dp_degree >= 2, its will be in hybrid dp mode automatically"
)
assert
self
.
dp_degree
>=
1
...
...
@@ -174,7 +177,7 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
_gradient_merge_acc_step
=
self
.
user_defined_strategy
.
pipeline_configs
[
'accumulate_steps'
]
if
self
.
_gradient_merge_acc_step
>
1
:
logg
ing
.
info
(
"Gradient merge in [{}], acc step = [{}]"
.
format
(
logg
er
.
info
(
"Gradient merge in [{}], acc step = [{}]"
.
format
(
self
.
gradient_merge_mode
,
self
.
_gradient_merge_acc_step
))
# optimize offload
...
...
@@ -338,7 +341,7 @@ class ShardingOptimizer(MetaOptimizerBase):
# opt offload should be enable while gradient merge is enable && acc_step is quite large (e.g. >> 100)
# sync its memcpy could not be overlap with calc, otherwise it will slower down training severely.
if
self
.
optimize_offload
:
logg
ing
.
info
(
"Sharding with optimize offload !"
)
logg
er
.
info
(
"Sharding with optimize offload !"
)
offload_helper
=
OffloadHelper
()
offload_helper
.
offload
(
main_block
,
startup_block
)
offload_helper
.
offload_fp32param
(
main_block
,
startup_block
)
...
...
@@ -641,15 +644,15 @@ class ShardingOptimizer(MetaOptimizerBase):
for
varname
in
sorted
(
var2broadcast_time
,
key
=
var2broadcast_time
.
get
,
reverse
=
True
):
logg
ing
.
info
(
"Sharding broadcast: [{}] times [{}]"
.
format
(
logg
er
.
info
(
"Sharding broadcast: [{}] times [{}]"
.
format
(
var2broadcast_time
[
varname
],
varname
))
for
idx_
in
range
(
len
(
self
.
_segments
)):
logg
ing
.
info
(
"segment [{}] :"
.
format
(
idx_
))
logg
ing
.
info
(
"start op: [{}] [{}]"
.
format
(
block
.
ops
[
logg
er
.
info
(
"segment [{}] :"
.
format
(
idx_
))
logg
er
.
info
(
"start op: [{}] [{}]"
.
format
(
block
.
ops
[
self
.
_segments
[
idx_
].
_start_idx
].
desc
.
type
(),
block
.
ops
[
self
.
_segments
[
idx_
].
_start_idx
].
desc
.
input_arg_names
(
)))
logg
ing
.
info
(
"end op: [{}] [{}]"
.
format
(
block
.
ops
[
logg
er
.
info
(
"end op: [{}] [{}]"
.
format
(
block
.
ops
[
self
.
_segments
[
idx_
].
_end_idx
].
desc
.
type
(),
block
.
ops
[
self
.
_segments
[
idx_
].
_end_idx
].
desc
.
input_arg_names
()))
return
...
...
@@ -1108,7 +1111,7 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
dp_group_endpoints
.
append
(
self
.
global_endpoints
[
dp_first_rank_idx
+
dp_offset
*
i
])
assert
self
.
current_endpoint
in
self
.
dp_group_endpoints
logg
ing
.
info
(
"Hybrid DP mode turn on !"
)
logg
er
.
info
(
"Hybrid DP mode turn on !"
)
else
:
self
.
dp_ring_id
=
-
1
self
.
dp_rank
=
-
1
...
...
@@ -1119,40 +1122,40 @@ class ShardingOptimizer(MetaOptimizerBase):
# NOTE (JZ-LIANG) when use global ring for calc global norm and dp_degree > 1, the allreduce result should be devided by dp_degree
self
.
global_ring_id
=
3
logg
ing
.
info
(
"global word size: {}"
.
format
(
self
.
global_word_size
))
logg
ing
.
info
(
"global rank: {}"
.
format
(
self
.
global_rank
))
logg
ing
.
info
(
"global endpoints: {}"
.
format
(
self
.
global_endpoints
))
logg
ing
.
info
(
"global ring id: {}"
.
format
(
self
.
global_ring_id
))
logg
ing
.
info
(
"#####"
*
6
)
logg
ing
.
info
(
"mp group size: {}"
.
format
(
self
.
mp_degree
))
logg
ing
.
info
(
"mp rank: {}"
.
format
(
self
.
mp_rank
))
logg
ing
.
info
(
"mp group id: {}"
.
format
(
self
.
mp_group_id
))
logg
ing
.
info
(
"mp group endpoints: {}"
.
format
(
self
.
mp_group_endpoints
))
logg
ing
.
info
(
"mp ring id: {}"
.
format
(
self
.
mp_ring_id
))
logg
ing
.
info
(
"#####"
*
6
)
logg
ing
.
info
(
"sharding group size: {}"
.
format
(
self
.
sharding_degree
))
logg
ing
.
info
(
"sharding rank: {}"
.
format
(
self
.
sharding_rank
))
logg
ing
.
info
(
"sharding group id: {}"
.
format
(
self
.
sharding_group_id
))
logg
ing
.
info
(
"sharding group endpoints: {}"
.
format
(
logg
er
.
info
(
"global word size: {}"
.
format
(
self
.
global_word_size
))
logg
er
.
info
(
"global rank: {}"
.
format
(
self
.
global_rank
))
logg
er
.
info
(
"global endpoints: {}"
.
format
(
self
.
global_endpoints
))
logg
er
.
info
(
"global ring id: {}"
.
format
(
self
.
global_ring_id
))
logg
er
.
info
(
"#####"
*
6
)
logg
er
.
info
(
"mp group size: {}"
.
format
(
self
.
mp_degree
))
logg
er
.
info
(
"mp rank: {}"
.
format
(
self
.
mp_rank
))
logg
er
.
info
(
"mp group id: {}"
.
format
(
self
.
mp_group_id
))
logg
er
.
info
(
"mp group endpoints: {}"
.
format
(
self
.
mp_group_endpoints
))
logg
er
.
info
(
"mp ring id: {}"
.
format
(
self
.
mp_ring_id
))
logg
er
.
info
(
"#####"
*
6
)
logg
er
.
info
(
"sharding group size: {}"
.
format
(
self
.
sharding_degree
))
logg
er
.
info
(
"sharding rank: {}"
.
format
(
self
.
sharding_rank
))
logg
er
.
info
(
"sharding group id: {}"
.
format
(
self
.
sharding_group_id
))
logg
er
.
info
(
"sharding group endpoints: {}"
.
format
(
self
.
sharding_group_endpoints
))
logg
ing
.
info
(
"sharding ring id: {}"
.
format
(
self
.
sharding_ring_id
))
logg
ing
.
info
(
"#####"
*
6
)
logg
ing
.
info
(
"pp group size: {}"
.
format
(
self
.
pp_degree
))
logg
ing
.
info
(
"pp rank: {}"
.
format
(
self
.
pp_rank
))
logg
ing
.
info
(
"pp group id: {}"
.
format
(
self
.
pp_group_id
))
logg
ing
.
info
(
"pp group endpoints: {}"
.
format
(
self
.
pp_group_endpoints
))
logg
ing
.
info
(
"pp ring id: {}"
.
format
(
self
.
pp_ring_id
))
logg
ing
.
info
(
"#####"
*
6
)
logg
ing
.
info
(
"pure dp group size: {}"
.
format
(
self
.
dp_degree
))
logg
ing
.
info
(
"pure dp rank: {}"
.
format
(
self
.
dp_rank
))
logg
ing
.
info
(
"pure dp group endpoints: {}"
.
format
(
logg
er
.
info
(
"sharding ring id: {}"
.
format
(
self
.
sharding_ring_id
))
logg
er
.
info
(
"#####"
*
6
)
logg
er
.
info
(
"pp group size: {}"
.
format
(
self
.
pp_degree
))
logg
er
.
info
(
"pp rank: {}"
.
format
(
self
.
pp_rank
))
logg
er
.
info
(
"pp group id: {}"
.
format
(
self
.
pp_group_id
))
logg
er
.
info
(
"pp group endpoints: {}"
.
format
(
self
.
pp_group_endpoints
))
logg
er
.
info
(
"pp ring id: {}"
.
format
(
self
.
pp_ring_id
))
logg
er
.
info
(
"#####"
*
6
)
logg
er
.
info
(
"pure dp group size: {}"
.
format
(
self
.
dp_degree
))
logg
er
.
info
(
"pure dp rank: {}"
.
format
(
self
.
dp_rank
))
logg
er
.
info
(
"pure dp group endpoints: {}"
.
format
(
self
.
dp_group_endpoints
))
logg
ing
.
info
(
"pure dp ring id: {}"
.
format
(
self
.
dp_ring_id
))
logg
ing
.
info
(
"#####"
*
6
)
logg
er
.
info
(
"pure dp ring id: {}"
.
format
(
self
.
dp_ring_id
))
logg
er
.
info
(
"#####"
*
6
)
return
...
...
python/paddle/distributed/fleet/utils/recompute.py
浏览文件 @
0bb079cd
...
...
@@ -19,9 +19,12 @@ from paddle.fluid import framework
import
contextlib
import
logging
logging
.
basicConfig
(
format
=
'%(asctime)s %(levelname)-8s %(message)s'
,
datefmt
=
'%Y-%m-%d %H:%M:%S'
)
logger
=
logging
.
getLogger
(
__name__
)
formatter
=
logging
.
Formatter
(
fmt
=
'%(asctime)s %(levelname)-8s %(message)s'
,
datefmt
=
'%Y-%m-%d %H:%M:%S'
)
ch
=
logging
.
StreamHandler
()
ch
.
setFormatter
(
formatter
)
logger
.
addHandler
(
ch
)
def
detach_variable
(
inputs
):
...
...
@@ -40,7 +43,7 @@ def detach_variable(inputs):
def
check_recompute_necessary
(
inputs
):
if
not
any
(
input_
.
stop_gradient
==
False
for
input_
in
inputs
if
isinstance
(
input_
,
paddle
.
Tensor
)):
logg
ing
.
warn
(
logg
er
.
warn
(
"[Recompute]: None of the inputs to current recompute block need grad, "
"therefore there is NO need to recompute this block in backward !"
)
...
...
python/paddle/fluid/incubate/fleet/utils/utils.py
浏览文件 @
0bb079cd
...
...
@@ -34,9 +34,12 @@ __all__ = [
"graphviz"
]
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
,
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
logger
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
fmt
=
'%(asctime)s - %(levelname)s - %(message)s'
)
ch
=
logging
.
StreamHandler
()
ch
.
setFormatter
(
formatter
)
logger
.
addHandler
(
ch
)
persistable_vars_out_fn
=
"vars_persistable.log"
all_vars_out_fn
=
"vars_all.log"
...
...
python/paddle/utils/cpp_extension/extension_utils.py
浏览文件 @
0bb079cd
...
...
@@ -32,9 +32,12 @@ from ...fluid import core
from
...fluid.framework
import
OpProtoHolder
from
...sysconfig
import
get_include
,
get_lib
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
,
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
"utils.cpp_extension"
)
logger
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
fmt
=
'%(asctime)s - %(levelname)s - %(message)s'
)
ch
=
logging
.
StreamHandler
()
ch
.
setFormatter
(
formatter
)
logger
.
addHandler
(
ch
)
OS_NAME
=
sys
.
platform
IS_WINDOWS
=
OS_NAME
.
startswith
(
'win'
)
...
...
@@ -1125,4 +1128,4 @@ def log_v(info, verbose=True):
Print log information on stdout.
"""
if
verbose
:
logg
ing
.
info
(
info
)
logg
er
.
info
(
info
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录