未验证 提交 c70f5920 编写于 作者: M mapingshuo 提交者: GitHub

add gradient Merge optimizer to meta (#25763)

* add gradient Merge optimizer to meta, test=develop
上级 caa90a65
......@@ -521,6 +521,23 @@ class DistributedStrategy(object):
@property
def gradient_merge(self):
"""
Gradient Merge, also called as Gradient Accumulation,
is a strategy for large batch training. With this strategy,
model parameter will not be updated until user-defined steps.
For each step, the forward network and the backward network
will run to calculate the gradient of model parameters.
For every k step, the optimization network will run,
applying a specific optimization method (such as SGD, Adam)
to model parameters.
Examples:
.. code-block:: python
import paddle.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.gradient_merge = True
strategy.gradient_merge_configs = {"k_steps": 4, "avg": True}
"""
return self.strategy.gradient_merge
@gradient_merge.setter
......@@ -532,6 +549,18 @@ class DistributedStrategy(object):
@property
def gradient_merge_configs(self):
"""
the key-value configs of distribute_strategy
Keys:
k_steps (int): the update period of the parameters
avg (bool): whether to average the gradients of each mini-batch,
the default value is `True`
Example:
import paddle.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.gradient_merge = True
strategy.gradient_merge_configs = {"k_steps": 4, "avg": True}
"""
return get_msg_dict(self.strategy.gradient_merge_configs)
@gradient_merge_configs.setter
......
......@@ -13,11 +13,16 @@
# limitations under the License.
from ..meta_optimizers import RecomputeOptimizer
from ..meta_optimizers import GradientMergeOptimizer
from ..meta_optimizers import GraphExecutionOptimizer
__all__ = ["MetaOptimizerFactory"]
meta_optimizer_names = ["RecomputeOptimizer", "GraphExecutionOptimizer"]
meta_optimizer_names = [
"RecomputeOptimizer",
"GradientMergeOptimizer",
"GraphExecutionOptimizer",
]
class MetaOptimizerFactory(object):
......
......@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
from .recompute_optimizer import RecomputeOptimizer
from .gradient_merge_optimizer import GradientMergeOptimizer
from .graph_execution_optimizer import GraphExecutionOptimizer
__all__ = ['RecomputeOptimizer']
__all__ = [
'RecomputeOptimizer',
'GradientMergeOptimizer',
]
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from paddle.fluid.optimizer import GradientMergeOptimizer as GM
from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["GradientMergeOptimizer"]
class GradientMergeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(GradientMergeOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer
self.wrapped_opt = GM(optimizer)
self.meta_optimizers_white_list = []
def _set_basic_info(self, loss, role_maker, user_defined_optimizer,
user_defined_strategy):
super(GradientMergeOptimizer, self)._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy)
self.wrapped_opt._set_k_steps(
self.user_defined_strategy.gradient_merge_configs["k_steps"])
self.wrapped_opt._set_avg(
self.user_defined_strategy.gradient_merge_configs["avg"])
def _can_apply(self):
can_apply = (self.user_defined_strategy.gradient_merge == True) and \
self.user_defined_strategy.gradient_merge_configs["k_steps"] > 1
return can_apply
def _disable_strategy(self, dist_strategy):
dist_strategy.gradient_merge = False
dist_strategy.gradient_merge_configs = {"k_steps": 1, "avg": True}
def minimize_impl(self,
loss,
startup_program=None,
parameter_list=None,
no_grad_set=None):
optimize_ops, params_grads = \
self.wrapped_opt.minimize(loss, startup_program,
parameter_list, no_grad_set)
return optimize_ops, params_grads
......@@ -5017,6 +5017,12 @@ class GradientMergeOptimizer(object):
self.type = "gradient_merge"
self.avg = avg
def _set_k_steps(self, k_steps):
self.k_steps = k_steps
def _set_avg(self, avg):
self.avg = avg
def minimize(self,
loss,
startup_program=None,
......
......@@ -32,6 +32,7 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_checkpoint)
list(APPEND MIXED_DIST_TEST_OPS test_collective_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_gradient_merge_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_private_function)
foreach(TEST_OP ${MIXED_DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP})
......@@ -364,6 +365,7 @@ if(WITH_DISTRIBUTE)
if(NOT APPLE)
py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS})
py_test_modules(test_fleet_meta_optimizer MODULES test_fleet_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_gradient_merge_meta_optimizer MODULES test_fleet_gradient_merge_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_private_function MODULES test_fleet_private_function ENVS ${dist_ENVS})
endif(NOT APPLE)
if(WITH_DGC)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import os
import paddle.fleet as fleet
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
def setUp(self):
os.environ["POD_IP"] = "127.0.0.1"
os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
os.environ["PADDLE_TRAINERS_NUM"] = "2"
os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \
"127.0.0.1:36001,127.0.0.2:36001"
def test_gradient_merge_optimizer(self):
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
input_x = paddle.fluid.layers.data(
name="x", shape=[32], dtype='float32')
input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
cost = paddle.fluid.layers.cross_entropy(
input=prediction, label=input_y)
avg_cost = paddle.fluid.layers.mean(x=cost)
strategy = paddle.fleet.DistributedStrategy()
strategy.gradient_merge = True
strategy.gradient_merge_configs = {"k_steps": 2, "avg": True}
optimizer = paddle.optimizer.SGD(learning_rate=0.01)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册