fix wait server ready (#32889)

a8625aaf · WangXi · GitHub · dace3fd5 · a8625aaf · a8625aaf
2 changed file
--- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
@@ -63,9 +63,9 @@ class GraphExecutionOptimizer(MetaOptimizerBase):
        trainer_endpoints_env = ",".join(trainer_endpoints)
        trainers_num = self.role_maker._worker_num()
-        # FIXME(wangxi): approve this.
+        # NOTE(wangxi): npu don't need to wait server ready
-        #if trainer_id == 0:
+        if trainer_id == 0 and not paddle.is_compiled_with_npu():
-        #    wait_server_ready(other_trainers)
+            wait_server_ready(other_trainers)
        if core.is_compiled_with_cuda():
            comm_id_var = startup_program.global_block().create_var(

--- a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py
@@ -80,15 +80,17 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                cost_val = exe.run(feed=gen_data(), fetch_list=[avg_cost.name])
                print("cost of step[{}] = {}".format(i, cost_val))
-        proc_a = launch_func(node_func, node_a)
+        # rank 1
-        proc_a.start()
+        proc_b = launch_func(node_func, node_b)
+        proc_b.start()
+        # rank 0, for wait server ready coverage
        # just for coverage
-        for key in node_b:
+        for key in node_a:
-            os.environ[key] = node_b[key]
+            os.environ[key] = node_a[key]
        node_func()
-        proc_a.join()
+        proc_b.join()
 if __name__ == "__main__":