diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index e89cb1f5ec497133985ef8069e77e864cef47f52..ad5a942b53ec6215242e56a93e0f1a3adb2ceff2 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -30,6 +30,7 @@ __all__ = [ ] fleet = Fleet() +_final_strategy = fleet._final_strategy init = fleet.init is_first_worker = fleet.is_first_worker worker_index = fleet.worker_index diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 316b6494e347ff0352e7faf0b607425c8cdd3b50..1fc29ad0428832d1a302fb996d689e69b36c4987 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -1244,8 +1244,7 @@ class DistributedStrategy(object): if getattr(self.strategy, f.name): draws += border + "\n" draws += h1_format.format( - "{} = True, please check {}_configs".format( - f.name, f.name)) + "{}=True <-> {}_configs".format(f.name, f.name)) draws += line + "\n" my_configs = getattr(self.strategy, f.name + "_configs") diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index d0658efdca34ec5a10a0e2f07dabc357096bfad5..3fdd6e92483031d55adcdddd24abe59fa8fb555b 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -119,6 +119,8 @@ class Fleet(object): self.strategy_compiler = None self._is_collective = False self._runtime_handle = None + self._util = None + self._context = {} def init(self, role_maker=None, is_collective=False): """ @@ -233,7 +235,7 @@ class Fleet(object): Returns: int: worker numbers - + Examples: .. code-block:: python @@ -569,8 +571,9 @@ class Fleet(object): if strategy == None: strategy = DistributedStrategy() - self.user_defined_strategy = strategy - self.valid_strategy = None + + self._user_defined_strategy = copy.deepcopy(strategy) + self._context = {} return self @dygraph_only @@ -909,6 +912,15 @@ class Fleet(object): # imitate target optimizer retrieval return self.user_defined_optimizer.clear_grad() + def _final_strategy(self): + if "valid_strategy" not in self._context: + print( + "WARNING: You may need to call minimize function before this function is called" + ) + return {} + else: + return self._context["valid_strategy"] + def minimize(self, loss, startup_program=None, @@ -958,12 +970,15 @@ class Fleet(object): # for more examples, please reference https://github.com/PaddlePaddle/FleetX """ + context = {} + context["user_defined_strategy"] = copy.deepcopy( + self._user_defined_strategy) if paddle.fluid.framework.in_dygraph_mode(): # imitate target optimizer retrieval target_opt = self.user_defined_optimizer + self._context = context return target_opt.minimize(loss) - context = {} # cache original feed forward program self.origin_main_program = loss.block.program context["origin_main_program"] = self.origin_main_program @@ -984,17 +999,19 @@ class Fleet(object): MetaOptimizerFactory()._get_valid_meta_optimizers( self.user_defined_optimizer) - context["user_defined_strategy"] = copy.copy(self.user_defined_strategy) + context["user_defined_strategy"] = copy.deepcopy( + self._user_defined_strategy) + copy_user_defined_strategy = copy.deepcopy(self._user_defined_strategy) # trigger the auto-parallel in very strict condition # strategy = DistributedStrategy() # strategy.auto = True # optimizer = paddle.optimizer.SGD(learning_rate=0.1) # optimizer = fleet.distributed_optimizer(optimizer, strategy) - if self.user_defined_strategy._is_strict_auto(): + if copy_user_defined_strategy._is_strict_auto(): # turn on all the strategy for each optimizer for opt in distributed_optimizer_list: - opt._enable_strategy(self.user_defined_strategy, context) + opt._enable_strategy(copy_user_defined_strategy, context) valid_optimizer_list = [] valid_graph_optimizer_list = [] @@ -1003,7 +1020,7 @@ class Fleet(object): for opt in distributed_optimizer_list: opt._set_basic_info(loss, self._role_maker, self.user_defined_optimizer, - self.user_defined_strategy) + copy_user_defined_strategy) if opt._can_apply() and not opt._is_graph_out(): valid_optimizer_list.append(opt) elif opt._can_apply() and opt._is_graph_out(): @@ -1014,13 +1031,15 @@ class Fleet(object): meta_optimizer, graph_optimizer = \ self.strategy_compiler.generate_optimizer( loss, self._role_maker, self.user_defined_optimizer, - self.user_defined_strategy, valid_optimizer_list, + copy_user_defined_strategy, valid_optimizer_list, valid_graph_optimizer_list) valid_strategy = self.strategy_compiler._get_valid_strategy( - self.user_defined_strategy, can_not_apply_optimizer_list) + copy_user_defined_strategy, can_not_apply_optimizer_list) + + context["valid_strategy"] = copy.deepcopy(valid_strategy) - context["valid_strategy"] = valid_strategy + self._context = context self.valid_strategy = valid_strategy self.valid_strategy._enable_env() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py index 5b7e0fb94c662f4aa47fbaad964e03c576c97807..b8393f1e28a947f58ea4e42f4ee673f86f1fcb45 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py @@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) - self.assertTrue(optimizer.user_defined_strategy.a_sync) - a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(fleet._final_strategy().a_sync) + a_sync_configs = fleet._final_strategy().a_sync_configs self.assertTrue(a_sync_configs['k_steps'] == 0) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index 3dff9d0f9d82530cade09a737d448fca4bf4f960..49b34f059e8d899a9220831936e4e05ecea24a91 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -72,8 +72,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) - self.assertTrue(optimizer.user_defined_strategy.a_sync) - a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(fleet._final_strategy().a_sync) + a_sync_configs = fleet._final_strategy().a_sync_configs self.assertTrue(a_sync_configs['k_steps'] == 0) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py index bdfa3a9a7d57869466b895f23674b6e8ef83310f..334a4e028b2c4f5cdf7fab91bf58d20d43e57daa 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py @@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) - self.assertTrue(optimizer.user_defined_strategy.a_sync) - a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(fleet._final_strategy().a_sync) + a_sync_configs = fleet._final_strategy().a_sync_configs self.assertTrue(a_sync_configs['k_steps'] == 800) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py index 73e014b35008ff5a0539c6a338755b9dc2cf68d4..362428631e68cc7ac88be93d7ba1ff449a035822 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py @@ -18,6 +18,8 @@ import unittest import paddle import os +paddle.enable_static() + class TestFleetAMPOptimizer(unittest.TestCase): def setUp(self): @@ -55,6 +57,8 @@ class TestFleetAMPOptimizer(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) + strategy = fleet._final_strategy() + ops = [op.type for op in avg_cost.block.ops] self.assertIn('cast', ops) self.assertIn('check_finite_and_unscale', ops) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py index 020f2f4db382ef1277167d85917e8fdba9c83893..0a4e2f631d60cf1dad790d720c88074090a08ca8 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker +paddle.enable_static() + class TestDistributedStrategyAuto(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index ccd57c4d51529f2c178f6b7c6c8f98851b1e9e80..4945c158025b7e7337081160c8a156c90ce1dcd3 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -167,6 +167,8 @@ class TestFleetDygraph(unittest.TestCase): state_dict = adam.state_dict() adam.set_state_dict(state_dict) + final_strategy = fleet._final_strategy() + class TestFleetBaseSingleRunCollective(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py index ec055178d90c529080489218f3aca1a71311beea..022e0b99ce87113c0085addc0370594780d919f1 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py @@ -19,6 +19,8 @@ import os import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker +paddle.enable_static() + class TestFleetLambMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py index 0a70710b4590e253463640634615c2d11ff36e9f..e4cc3682d1a24f80bb52d57c5734ac0686bfeb63 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py @@ -19,6 +19,8 @@ import os import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker +paddle.enable_static() + class TestFleetLarsMetaOptimizer(unittest.TestCase): def setUp(self):