Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c70f5920
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c70f5920
编写于
7月 30, 2020
作者:
M
mapingshuo
提交者:
GitHub
7月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add gradient Merge optimizer to meta (#25763)
* add gradient Merge optimizer to meta, test=develop
上级
caa90a65
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
154 addition
and
2 deletion
+154
-2
python/paddle/fleet/base/distributed_strategy.py
python/paddle/fleet/base/distributed_strategy.py
+29
-0
python/paddle/fleet/base/meta_optimizer_factory.py
python/paddle/fleet/base/meta_optimizer_factory.py
+6
-1
python/paddle/fleet/meta_optimizers/__init__.py
python/paddle/fleet/meta_optimizers/__init__.py
+5
-1
python/paddle/fleet/meta_optimizers/gradient_merge_optimizer.py
.../paddle/fleet/meta_optimizers/gradient_merge_optimizer.py
+53
-0
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+6
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-0
python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py
...sts/unittests/test_fleet_gradient_merge_meta_optimizer.py
+53
-0
未找到文件。
python/paddle/fleet/base/distributed_strategy.py
浏览文件 @
c70f5920
...
...
@@ -521,6 +521,23 @@ class DistributedStrategy(object):
@
property
def
gradient_merge
(
self
):
"""
Gradient Merge, also called as Gradient Accumulation,
is a strategy for large batch training. With this strategy,
model parameter will not be updated until user-defined steps.
For each step, the forward network and the backward network
will run to calculate the gradient of model parameters.
For every k step, the optimization network will run,
applying a specific optimization method (such as SGD, Adam)
to model parameters.
Examples:
.. code-block:: python
import paddle.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.gradient_merge = True
strategy.gradient_merge_configs = {"k_steps": 4, "avg": True}
"""
return
self
.
strategy
.
gradient_merge
@
gradient_merge
.
setter
...
...
@@ -532,6 +549,18 @@ class DistributedStrategy(object):
@
property
def
gradient_merge_configs
(
self
):
"""
the key-value configs of distribute_strategy
Keys:
k_steps (int): the update period of the parameters
avg (bool): whether to average the gradients of each mini-batch,
the default value is `True`
Example:
import paddle.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.gradient_merge = True
strategy.gradient_merge_configs = {"k_steps": 4, "avg": True}
"""
return
get_msg_dict
(
self
.
strategy
.
gradient_merge_configs
)
@
gradient_merge_configs
.
setter
...
...
python/paddle/fleet/base/meta_optimizer_factory.py
浏览文件 @
c70f5920
...
...
@@ -13,11 +13,16 @@
# limitations under the License.
from
..meta_optimizers
import
RecomputeOptimizer
from
..meta_optimizers
import
GradientMergeOptimizer
from
..meta_optimizers
import
GraphExecutionOptimizer
__all__
=
[
"MetaOptimizerFactory"
]
meta_optimizer_names
=
[
"RecomputeOptimizer"
,
"GraphExecutionOptimizer"
]
meta_optimizer_names
=
[
"RecomputeOptimizer"
,
"GradientMergeOptimizer"
,
"GraphExecutionOptimizer"
,
]
class
MetaOptimizerFactory
(
object
):
...
...
python/paddle/fleet/meta_optimizers/__init__.py
浏览文件 @
c70f5920
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
from
.recompute_optimizer
import
RecomputeOptimizer
from
.gradient_merge_optimizer
import
GradientMergeOptimizer
from
.graph_execution_optimizer
import
GraphExecutionOptimizer
__all__
=
[
'RecomputeOptimizer'
]
__all__
=
[
'RecomputeOptimizer'
,
'GradientMergeOptimizer'
,
]
python/paddle/fleet/meta_optimizers/gradient_merge_optimizer.py
0 → 100644
浏览文件 @
c70f5920
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from
paddle.fluid.optimizer
import
GradientMergeOptimizer
as
GM
from
.meta_optimizer_base
import
MetaOptimizerBase
__all__
=
[
"GradientMergeOptimizer"
]
class
GradientMergeOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
super
(
GradientMergeOptimizer
,
self
).
__init__
(
optimizer
)
self
.
inner_opt
=
optimizer
self
.
wrapped_opt
=
GM
(
optimizer
)
self
.
meta_optimizers_white_list
=
[]
def
_set_basic_info
(
self
,
loss
,
role_maker
,
user_defined_optimizer
,
user_defined_strategy
):
super
(
GradientMergeOptimizer
,
self
).
_set_basic_info
(
loss
,
role_maker
,
user_defined_optimizer
,
user_defined_strategy
)
self
.
wrapped_opt
.
_set_k_steps
(
self
.
user_defined_strategy
.
gradient_merge_configs
[
"k_steps"
])
self
.
wrapped_opt
.
_set_avg
(
self
.
user_defined_strategy
.
gradient_merge_configs
[
"avg"
])
def
_can_apply
(
self
):
can_apply
=
(
self
.
user_defined_strategy
.
gradient_merge
==
True
)
and
\
self
.
user_defined_strategy
.
gradient_merge_configs
[
"k_steps"
]
>
1
return
can_apply
def
_disable_strategy
(
self
,
dist_strategy
):
dist_strategy
.
gradient_merge
=
False
dist_strategy
.
gradient_merge_configs
=
{
"k_steps"
:
1
,
"avg"
:
True
}
def
minimize_impl
(
self
,
loss
,
startup_program
=
None
,
parameter_list
=
None
,
no_grad_set
=
None
):
optimize_ops
,
params_grads
=
\
self
.
wrapped_opt
.
minimize
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
return
optimize_ops
,
params_grads
python/paddle/fluid/optimizer.py
浏览文件 @
c70f5920
...
...
@@ -5017,6 +5017,12 @@ class GradientMergeOptimizer(object):
self
.
type
=
"gradient_merge"
self
.
avg
=
avg
def
_set_k_steps
(
self
,
k_steps
):
self
.
k_steps
=
k_steps
def
_set_avg
(
self
,
avg
):
self
.
avg
=
avg
def
minimize
(
self
,
loss
,
startup_program
=
None
,
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
c70f5920
...
...
@@ -32,6 +32,7 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_checkpoint)
list
(
APPEND MIXED_DIST_TEST_OPS test_collective_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_base
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_gradient_merge_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_private_function
)
foreach
(
TEST_OP
${
MIXED_DIST_TEST_OPS
}
)
list
(
REMOVE_ITEM TEST_OPS
${
TEST_OP
}
)
...
...
@@ -364,6 +365,7 @@ if(WITH_DISTRIBUTE)
if
(
NOT APPLE
)
py_test_modules
(
test_fleet_base MODULES test_fleet_base ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_meta_optimizer MODULES test_fleet_meta_optimizer ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_gradient_merge_meta_optimizer MODULES test_fleet_gradient_merge_meta_optimizer ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_private_function MODULES test_fleet_private_function ENVS
${
dist_ENVS
}
)
endif
(
NOT APPLE
)
if
(
WITH_DGC
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py
0 → 100644
浏览文件 @
c70f5920
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle
import
os
import
paddle.fleet
as
fleet
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
class
TestFleetGradientMergeMetaOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"POD_IP"
]
=
"127.0.0.1"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
os
.
environ
[
"PADDLE_TRAINERS_NUM"
]
=
"2"
os
.
environ
[
"PADDLE_PSERVERS_IP_PORT_LIST"
]
=
\
"127.0.0.1:36001,127.0.0.2:36001"
def
test_gradient_merge_optimizer
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
input_x
=
paddle
.
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
fluid
.
layers
.
data
(
name
=
"y"
,
shape
=
[
1
],
dtype
=
'int64'
)
fc_1
=
paddle
.
fluid
.
layers
.
fc
(
input
=
input_x
,
size
=
64
,
act
=
'tanh'
)
fc_2
=
paddle
.
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
64
,
act
=
'tanh'
)
prediction
=
paddle
.
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
2
,
act
=
'softmax'
)
cost
=
paddle
.
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
strategy
=
paddle
.
fleet
.
DistributedStrategy
()
strategy
.
gradient_merge
=
True
strategy
.
gradient_merge_configs
=
{
"k_steps"
:
2
,
"avg"
:
True
}
optimizer
=
paddle
.
optimizer
.
SGD
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录