diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py index 74cb6930e039207127a1330b1d8ee0dc3d3e762d..245c5c955e8be544fd8b0bfb153d94da00df8db1 100644 --- a/python/paddle/distributed/auto_parallel/process_group.py +++ b/python/paddle/distributed/auto_parallel/process_group.py @@ -16,10 +16,12 @@ from collections import OrderedDict import paddle import paddle.fluid.core as core + from ..collective import _get_global_env from ..collective import _new_ring_id from ...fluid.framework import _non_static_mode from ...fluid.layers.tensor import fill_constant +from paddle.fluid.framework import _enable_legacy_dygraph def get_all_process_groups(): @@ -134,7 +136,8 @@ class ProcessGroup: # TODO(shenliang03): This is a temporary solution to solve the problem of # hang caused by cross-creation of new_group - paddle.framework._in_legacy_dygraph() + paddle.disable_static() + _enable_legacy_dygraph() paddle.set_device('gpu:%d' % paddle.distributed.ParallelEnv().dev_id) tmp = paddle.to_tensor( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py index 09ec5131402d094e6ddc14148d1de12d2b1d04de..f893088782df04ee6c08e32a9339ee25628cc406 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py @@ -126,7 +126,7 @@ class TestAutoParallelReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--log_dir", self.temp_dir.name, + "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name, "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", mapping_json_path, "--enable_auto_mapping", "True", launch_model_path diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py index 22abd6d799554f40f7104c5fad2a918c40970d30..40fc301f26129b8b4b229118b5147dee92a492b3 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -32,18 +33,17 @@ class TestConverter(unittest.TestCase): else: coverage_args = [] + tmp_dir = tempfile.TemporaryDirectory() cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--gpus", "0,1", launch_model_path + "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir", + tmp_dir.name, launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) + tmp_dir.cleanup() def test_input_invalid(self): with self.assertRaises(ValueError): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py index 8d5051a3d48d4f022aeed22f1a38ee8de909e64e..3dfedea46f69d7ecf8a8ae4af9635da2b259abc6 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py @@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase): tmp_dir = tempfile.TemporaryDirectory() cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name, - launch_model_path + "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir", + tmp_dir.name, launch_model_path ] process = subprocess.Popen(cmd) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py index 92c8e534aa26bbb1d73d2ba71d2796ee0a5bfb9b..3e6105917a85717232f2cd4d1c53e21ceb5f591c 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api_dp.py @@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase): tmp_dir = tempfile.TemporaryDirectory() cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name, - launch_model_path + "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir", + tmp_dir.name, launch_model_path ] process = subprocess.Popen(cmd) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py index 9fb1c22d76cbf6ad47deff183bb854a39f0f0532..104cfb59ff5d829036c447352b0b9b2d73201fbc 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -31,18 +32,17 @@ class TestHighOrderGrad(unittest.TestCase): else: coverage_args = [] + tmp_dir = tempfile.TemporaryDirectory() cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--gpus", "0,1", launch_model_path + "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir", + tmp_dir.name, launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) + tmp_dir.cleanup() if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py index bc1ebd6688edba5ca7b8cf51d26b6063f05ae563..dd7e02af85469fc9f2a9d68dac81b423a4ca9fc5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py @@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--log_dir", self.temp_dir.name, + "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name, "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", mapping_json_path, "--enable_auto_mapping", "True", launch_model_path diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py index efcc313a2a4caab39adcfe919317f86e98d3347c..b9b02d749d852b56b351059afe7f57e2983a0798 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py @@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--log_dir", self.temp_dir.name, + "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name, "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", mapping_json_path, "--enable_auto_mapping", "True", launch_model_path