diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 4f86a4a25b67926e7383ed2b32ba163f95141b57..f246afd5679e0ee38af5580d0a317a79318f7d70 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -1053,17 +1053,11 @@ class Fleet(object): loss_name=loss.name, share_vars_from=None) loss.block.program._graph = compiled_program return self.user_defined_optimizer.minimize( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + loss, startup_program, parameter_list, no_grad_set=no_grad_set) if meta_optimizer: optimize_ops, params_grads = meta_optimizer.minimize( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + loss, startup_program, parameter_list, no_grad_set=no_grad_set) default_program = paddle.static.default_main_program() @@ -1072,20 +1066,14 @@ class Fleet(object): else: optimize_ops, params_grads = self.user_defined_optimizer.minimize( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + loss, startup_program, parameter_list, no_grad_set=no_grad_set) context["program_optimize_ops"] = optimize_ops context["program_params_grads"] = params_grads if graph_optimizer: optimize_ops, params_grads = graph_optimizer.minimize( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + loss, startup_program, parameter_list, no_grad_set=no_grad_set) # since we do not encourage users to use graph operations # if a graph optimizer takes effect, mostly # optimizers_ops and params_grads are None diff --git a/python/paddle/fluid/incubate/fleet/base/fleet_base.py b/python/paddle/fluid/incubate/fleet/base/fleet_base.py index 40cc2d2dd4e3823796451e5f335b7c4e765d5908..77a202317912f2ca1f72eb394b322b39062c8fbd 100644 --- a/python/paddle/fluid/incubate/fleet/base/fleet_base.py +++ b/python/paddle/fluid/incubate/fleet/base/fleet_base.py @@ -19,6 +19,7 @@ import abc import paddle.fluid as fluid from paddle.fluid.executor import Executor from paddle.fluid.optimizer import SGD +from paddle.optimizer import SGD as SGD_v2 from paddle.fluid.incubate.fleet.base.mode import Mode from paddle.distributed.fleet.base.role_maker import RoleMakerBase @@ -291,7 +292,8 @@ class DistributedOptimizer(object): def __init__(self, optimizer, strategy=None): if not isinstance(optimizer, SGD.__bases__) \ - and not isinstance(optimizer, OptimizerWithMixedPrecision): + and not isinstance(optimizer, OptimizerWithMixedPrecision) \ + and not isinstance(optimizer, SGD_v2.__base__): raise TypeError("optimizer must be an instance of Optimizer") self._optimizer = optimizer diff --git a/python/paddle/fluid/incubate/fleet/collective/__init__.py b/python/paddle/fluid/incubate/fleet/collective/__init__.py index 6e5aae82517d1e0f408ebd7311e1c77a86fe426f..6466ce4b42e6ef6eb7d30e1e7a8bc56820e5ebad 100644 --- a/python/paddle/fluid/incubate/fleet/collective/__init__.py +++ b/python/paddle/fluid/incubate/fleet/collective/__init__.py @@ -28,6 +28,8 @@ from paddle.fluid.incubate.fleet.base.fleet_base import DistributedOptimizer from paddle.fluid import compiler from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel, CheckpointSaver +import paddle + import os import sys import six @@ -505,10 +507,7 @@ class CollectiveOptimizer(DistributedOptimizer): self._strategy) optimize_ops, param_grads = self._optimizer.minimize( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + loss, startup_program, parameter_list, no_grad_set=no_grad_set) fleet._origin_program = main_program.clone(for_test=False) fleet._transpiled_program = main_program diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py index d894762904ba612ef65010cef31d096960e590a9..05da44cd061331ff9a8e15d3095bec3bdf6965fb 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py @@ -60,7 +60,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True - optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost)