[AutoParallel] fix unittest with paddle.distributed.launch (#44439)

* fix unittest * fix log_dir * _enable_legacy_dygraph

[AutoParallel] fix unittest with paddle.distributed.launch (#44439)
* fix unittest * fix log_dir * _enable_legacy_dygraph
438ca7f6 · zhaoyingli · GitHub · 98e96853 · 438ca7f6 · 438ca7f6
8 changed file
--- a/python/paddle/distributed/auto_parallel/process_group.py
+++ b/python/paddle/distributed/auto_parallel/process_group.py
@@ -16,10 +16,12 @@ from collections import OrderedDict
 import paddle
 import paddle.fluid.core as core
 from ..collective import _get_global_env
 from ..collective import _new_ring_id
 from ...fluid.framework import _non_static_mode
 from ...fluid.layers.tensor import fill_constant
+from paddle.fluid.framework import _enable_legacy_dygraph
 def get_all_process_groups():
@@ -134,7 +136,8 @@ class ProcessGroup:
            # TODO(shenliang03): This is a temporary solution to solve the problem of
            # hang caused by cross-creation of new_group
-            paddle.framework._in_legacy_dygraph()
+            paddle.disable_static()
+            _enable_legacy_dygraph()
            paddle.set_device('gpu:%d' %
                              paddle.distributed.ParallelEnv().dev_id)
            tmp = paddle.to_tensor(

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
@@ -126,7 +126,7 @@ class TestAutoParallelReLaunch(unittest.TestCase):
            coverage_args = []
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
            mapping_json_path, "--enable_auto_mapping", "True",
            launch_model_path

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import tempfile
 import unittest
 import os
 import sys
@@ -32,18 +33,17 @@ class TestConverter(unittest.TestCase):
        else:
            coverage_args = []
+        tmp_dir = tempfile.TemporaryDirectory()
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--gpus", "0,1", launch_model_path
+            "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
+            tmp_dir.name, launch_model_path
        ]
        process = subprocess.Popen(cmd)
        process.wait()
        self.assertEqual(process.returncode, 0)
-        # Remove unnecessary files
+        tmp_dir.cleanup()
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
    def test_input_invalid(self):
        with self.assertRaises(ValueError):

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase):
        tmp_dir = tempfile.TemporaryDirectory()
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name,
+            "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
-            launch_model_path
+            tmp_dir.name, launch_model_path
        ]
        process = subprocess.Popen(cmd)

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py
@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase):
        tmp_dir = tempfile.TemporaryDirectory()
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name,
+            "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
-            launch_model_path
+            tmp_dir.name, launch_model_path
        ]
        process = subprocess.Popen(cmd)

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import tempfile
 import unittest
 import os
 import sys
@@ -31,18 +32,17 @@ class TestHighOrderGrad(unittest.TestCase):
        else:
            coverage_args = []
+        tmp_dir = tempfile.TemporaryDirectory()
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--gpus", "0,1", launch_model_path
+            "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
+            tmp_dir.name, launch_model_path
        ]
        process = subprocess.Popen(cmd)
        process.wait()
        self.assertEqual(process.returncode, 0)
-        # Remove unnecessary files
+        tmp_dir.cleanup()
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase):
            coverage_args = []
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
            mapping_json_path, "--enable_auto_mapping", "True",
            launch_model_path

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase):
            coverage_args = []
        cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
            mapping_json_path, "--enable_auto_mapping", "True",
            launch_model_path