Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
7bede8d9
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7bede8d9
编写于
9月 30, 2018
作者:
X
Xin Pan
提交者:
GitHub
9月 30, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13678 from typhoonzero/cherrypick_13535
Fix memory optimization with dist train (#13535)
上级
de195518
db1bfb99
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
112 addition
and
25 deletion
+112
-25
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-2
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+2
-4
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+7
-8
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+101
-11
未找到文件。
paddle/fluid/API.spec
浏览文件 @
7bede8d9
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
...
@@ -304,7 +304,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=[
...
@@ -304,7 +304,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=[
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
7bede8d9
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
def
run_pserver
(
self
,
args
):
def
run_pserver
(
self
,
args
):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
# NOTE: pserver should not call memory optimize
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
args
.
endpoints
,
fluid
.
default_main_program
(),
args
.
endpoints
,
args
.
trainers
,
args
.
sync_mode
)
args
.
trainers
,
args
.
sync_mode
)
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
fluid
.
default_main_program
()
,
skip_grads
=
True
)
if
args
.
is_dist
:
if
args
.
is_dist
:
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
fluid
.
default_main_program
(),
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
7bede8d9
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# TODO(typhoonzero): fix this test
class
TestDistseResnXt2x2WithMemopt
(
TestDistBase
):
# class TestDistseResnXt2x2WithMemopt(TestDistBase):
def
_setup_config
(
self
):
# def _setup_config(self):
self
.
_sync_mode
=
True
# self._sync_mode = True
self
.
_mem_opt
=
True
# self._mem_opt = True
def
test_dist_train
(
self
):
# def test_dist_train(self):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# self.check_with_place("dist_se_resnext.py", delta=1e-7)
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
7bede8d9
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
from
__future__
import
print_function
from
__future__
import
print_function
from
collections
import
defaultdict
,
OrderedDict
,
Callable
from
collections
import
defaultdict
,
MutableSet
from
..
import
core
from
..
import
core
from
...
import
compat
as
cpt
from
...
import
compat
as
cpt
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
,
core
from
..backward
import
_rename_arg_
from
..backward
import
_rename_arg_
from
functools
import
reduce
from
functools
import
reduce
from
six.moves
import
range
from
six.moves
import
range
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
PRINT_LOG
=
False
PRINT_LOG
=
False
class
OrderedSet
(
MutableSet
):
def
__init__
(
self
,
iterable
=
None
):
self
.
end
=
end
=
[]
end
+=
[
None
,
end
,
end
]
# sentinel node for doubly linked list
self
.
map
=
{}
# key --> [key, prev, next]
if
iterable
is
not
None
:
self
|=
iterable
def
__len__
(
self
):
return
len
(
self
.
map
)
def
__contains__
(
self
,
key
):
return
key
in
self
.
map
def
add
(
self
,
key
):
if
key
not
in
self
.
map
:
end
=
self
.
end
curr
=
end
[
1
]
curr
[
2
]
=
end
[
1
]
=
self
.
map
[
key
]
=
[
key
,
curr
,
end
]
def
update
(
self
,
other
):
for
e
in
other
:
self
.
add
(
e
)
def
discard
(
self
,
key
):
if
key
in
self
.
map
:
key
,
prev
,
next
=
self
.
map
.
pop
(
key
)
prev
[
2
]
=
next
next
[
1
]
=
prev
def
remove
(
self
,
key
):
self
.
discard
(
key
)
def
__iter__
(
self
):
end
=
self
.
end
curr
=
end
[
2
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
2
]
def
__reversed__
(
self
):
end
=
self
.
end
curr
=
end
[
1
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
1
]
def
pop
(
self
,
last
=
True
):
if
not
self
:
raise
KeyError
(
'set is empty'
)
key
=
self
.
end
[
1
][
0
]
if
last
else
self
.
end
[
2
][
0
]
self
.
discard
(
key
)
return
key
def
__repr__
(
self
):
if
not
self
:
return
'%s()'
%
(
self
.
__class__
.
__name__
,
)
return
'%s(%r)'
%
(
self
.
__class__
.
__name__
,
list
(
self
))
def
__eq__
(
self
,
other
):
if
isinstance
(
other
,
OrderedSet
):
return
len
(
self
)
==
len
(
other
)
and
list
(
self
)
==
list
(
other
)
return
set
(
self
)
==
set
(
other
)
class
ControlFlowGraph
(
object
):
class
ControlFlowGraph
(
object
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
self
.
_program
=
program
self
.
_program
=
program
self
.
_ops
=
ops
self
.
_ops
=
ops
self
.
_forward_num
=
forward_num
self
.
_forward_num
=
forward_num
self
.
_successors
=
defaultdict
(
s
et
)
self
.
_successors
=
defaultdict
(
OrderedS
et
)
self
.
_presuccessors
=
defaultdict
(
s
et
)
self
.
_presuccessors
=
defaultdict
(
OrderedS
et
)
self
.
_uses
=
defaultdict
(
s
et
)
self
.
_uses
=
defaultdict
(
OrderedS
et
)
self
.
_defs
=
defaultdict
(
s
et
)
self
.
_defs
=
defaultdict
(
OrderedS
et
)
self
.
_live_in
=
defaultdict
(
s
et
)
self
.
_live_in
=
defaultdict
(
OrderedS
et
)
self
.
_live_out
=
defaultdict
(
s
et
)
self
.
_live_out
=
defaultdict
(
OrderedS
et
)
self
.
_skip_opt
=
skip_opt
self
.
_skip_opt
=
skip_opt
self
.
pool
=
[]
self
.
pool
=
[]
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
can_optimize
=
[
can_optimize
=
[
x
for
x
in
sorted
(
list
(
in_diff
))
x
for
x
in
in_diff
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
if
can_optimize
:
if
can_optimize
:
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
if
self
.
pool
:
if
self
.
pool
:
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
defs_can_optimize
=
[
defs_can_optimize
=
[
x
for
x
in
s
orted
(
list
(
self
.
_defs
[
i
]))
x
for
x
in
s
elf
.
_defs
[
i
]
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
out_pair
=
[
out_pair
=
[
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
return
cfgs
return
cfgs
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
):
def
_is_opt_role_op
(
op
):
op_maker
=
core
.
op_proto_and_checker_maker
optimize_role
=
core
.
op_proto_and_checker_maker
.
OpRole
.
Optimize
if
op_maker
.
kOpRoleAttrName
()
in
op
.
attr_names
and
\
int
(
op
.
all_attrs
()[
op_maker
.
kOpRoleAttrName
()])
==
int
(
optimize_role
):
return
True
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
,
skip_grads
=
False
):
"""Optimize memory by reusing var memory.
"""Optimize memory by reusing var memory.
Note: it doesn't not support subblock nested in subblock.
Note: it doesn't not support subblock nested in subblock.
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
raise
ValueError
(
"only support opt_level 0 or 1."
)
raise
ValueError
(
"only support opt_level 0 or 1."
)
global
PRINT_LOG
global
PRINT_LOG
PRINT_LOG
=
print_log
PRINT_LOG
=
print_log
if
skip_grads
:
grad_set
=
set
()
OP_ROLE_VAR
=
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
()
for
op
in
input_program
.
global_block
().
ops
:
if
_is_opt_role_op
(
op
):
if
op
.
attr
(
OP_ROLE_VAR
):
grad_name
=
op
.
attr
(
OP_ROLE_VAR
)[
1
]
grad_set
.
add
(
grad_name
)
if
not
skip_opt_set
:
skip_opt_set
=
grad_set
else
:
skip_opt_set
.
update
(
grad_set
)
cfgs
=
_get_cfgs
(
input_program
)
cfgs
=
_get_cfgs
(
input_program
)
for
cfg
in
cfgs
:
for
cfg
in
cfgs
:
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录