diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 4194cf13d2bbcdeada650735738d7ba8e1847cd2..22ed3f2ac416032c8deba402527dc7ed381c0acf 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -63,9 +63,9 @@ class GraphExecutionOptimizer(MetaOptimizerBase): trainer_endpoints_env = ",".join(trainer_endpoints) trainers_num = self.role_maker._worker_num() - # FIXME(wangxi): approve this. - #if trainer_id == 0: - # wait_server_ready(other_trainers) + # NOTE(wangxi): npu don't need to wait server ready + if trainer_id == 0 and not paddle.is_compiled_with_npu(): + wait_server_ready(other_trainers) if core.is_compiled_with_cuda(): comm_id_var = startup_program.global_block().create_var( diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py index 05da44cd061331ff9a8e15d3095bec3bdf6965fb..628f1db80d2d460f9220f6e3b63d4a54b4ba55b4 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py @@ -80,15 +80,17 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): cost_val = exe.run(feed=gen_data(), fetch_list=[avg_cost.name]) print("cost of step[{}] = {}".format(i, cost_val)) - proc_a = launch_func(node_func, node_a) - proc_a.start() + # rank 1 + proc_b = launch_func(node_func, node_b) + proc_b.start() + # rank 0, for wait server ready coverage # just for coverage - for key in node_b: - os.environ[key] = node_b[key] + for key in node_a: + os.environ[key] = node_a[key] node_func() - proc_a.join() + proc_b.join() if __name__ == "__main__":