未验证 提交 438ca7f6 编写于 作者: Z zhaoyingli 提交者: GitHub

[AutoParallel] fix unittest with paddle.distributed.launch (#44439)

* fix unittest

* fix log_dir

* _enable_legacy_dygraph
上级 98e96853
...@@ -16,10 +16,12 @@ from collections import OrderedDict ...@@ -16,10 +16,12 @@ from collections import OrderedDict
import paddle import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
from ..collective import _get_global_env from ..collective import _get_global_env
from ..collective import _new_ring_id from ..collective import _new_ring_id
from ...fluid.framework import _non_static_mode from ...fluid.framework import _non_static_mode
from ...fluid.layers.tensor import fill_constant from ...fluid.layers.tensor import fill_constant
from paddle.fluid.framework import _enable_legacy_dygraph
def get_all_process_groups(): def get_all_process_groups():
...@@ -134,7 +136,8 @@ class ProcessGroup: ...@@ -134,7 +136,8 @@ class ProcessGroup:
# TODO(shenliang03): This is a temporary solution to solve the problem of # TODO(shenliang03): This is a temporary solution to solve the problem of
# hang caused by cross-creation of new_group # hang caused by cross-creation of new_group
paddle.framework._in_legacy_dygraph() paddle.disable_static()
_enable_legacy_dygraph()
paddle.set_device('gpu:%d' % paddle.set_device('gpu:%d' %
paddle.distributed.ParallelEnv().dev_id) paddle.distributed.ParallelEnv().dev_id)
tmp = paddle.to_tensor( tmp = paddle.to_tensor(
......
...@@ -126,7 +126,7 @@ class TestAutoParallelReLaunch(unittest.TestCase): ...@@ -126,7 +126,7 @@ class TestAutoParallelReLaunch(unittest.TestCase):
coverage_args = [] coverage_args = []
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name, "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path", "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True", mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path launch_model_path
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import tempfile
import unittest import unittest
import os import os
import sys import sys
...@@ -32,18 +33,17 @@ class TestConverter(unittest.TestCase): ...@@ -32,18 +33,17 @@ class TestConverter(unittest.TestCase):
else: else:
coverage_args = [] coverage_args = []
tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", launch_model_path "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
] ]
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
process.wait() process.wait()
self.assertEqual(process.returncode, 0) self.assertEqual(process.returncode, 0)
# Remove unnecessary files tmp_dir.cleanup()
log_path = os.path.join(file_dir, "log")
if os.path.exists(log_path):
shutil.rmtree(log_path)
def test_input_invalid(self): def test_input_invalid(self):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
......
...@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase): ...@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase):
tmp_dir = tempfile.TemporaryDirectory() tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name, "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
launch_model_path tmp_dir.name, launch_model_path
] ]
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
......
...@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase): ...@@ -34,8 +34,8 @@ class TestEngineAPI(unittest.TestCase):
tmp_dir = tempfile.TemporaryDirectory() tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name, "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
launch_model_path tmp_dir.name, launch_model_path
] ]
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import tempfile
import unittest import unittest
import os import os
import sys import sys
...@@ -31,18 +32,17 @@ class TestHighOrderGrad(unittest.TestCase): ...@@ -31,18 +32,17 @@ class TestHighOrderGrad(unittest.TestCase):
else: else:
coverage_args = [] coverage_args = []
tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", launch_model_path "-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
] ]
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
process.wait() process.wait()
self.assertEqual(process.returncode, 0) self.assertEqual(process.returncode, 0)
# Remove unnecessary files tmp_dir.cleanup()
log_path = os.path.join(file_dir, "log")
if os.path.exists(log_path):
shutil.rmtree(log_path)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase): ...@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase):
coverage_args = [] coverage_args = []
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name, "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path", "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True", mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path launch_model_path
......
...@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase): ...@@ -56,7 +56,7 @@ class TestPlannerReLaunch(unittest.TestCase):
coverage_args = [] coverage_args = []
cmd = [sys.executable, "-u"] + coverage_args + [ cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name, "-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path", "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True", mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path launch_model_path
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册