From 0f16ccf5c244529316ce33c37190a497dd7e4ea1 Mon Sep 17 00:00:00 2001 From: zhaoyingli <86812880+zhaoyinglia@users.noreply.github.com> Date: Mon, 20 Jun 2022 11:08:24 +0800 Subject: [PATCH] [Cherry-Pick] place all save/load path into temporary directory (#43316) (#43651) * place all save/load path into temporary directory * rm no need unittest --- .../unittests/auto_parallel/engine_api.py | 14 +++-- .../test_auto_parallel_relaunch.py | 51 +++++++++++++------ .../auto_parallel/test_engine_api.py | 16 ++---- .../auto_parallel/test_new_cost_model.py | 9 ++++ .../test_relaunch_with_gpt_planner.py | 39 ++++++++------ .../test_relaunch_with_planner.py | 39 ++++++++------ .../unittests/test_auto_parallel_cluster.py | 15 ++++-- .../unittests/test_auto_parallel_mapper.py | 15 ++++-- .../test_auto_parallel_partitioner_gpt.py | 19 ------- 9 files changed, 128 insertions(+), 89 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py index d7321066ed..3a16f5d70d 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py @@ -14,7 +14,7 @@ import unittest import time -import paddle.fluid as fluid +import tempfile import copy import os import numpy as np @@ -128,9 +128,15 @@ def train(): engine.fit(dataset, batch_size=batch_size, steps_per_epoch=batch_num * batch_size) - engine.save('./mlp') - engine.load('./mlp') - engine.save('./mlp_inf', training=False, mode='predict') + + # save + temp_dir = tempfile.TemporaryDirectory() + model_filename0 = os.path.join(temp_dir.name, 'mlp') + model_filename1 = os.path.join(temp_dir.name, 'mlp_inf') + engine.save(model_filename0) + engine.load(model_filename0) + engine.save(model_filename1, training=False, mode='predict') + temp_dir.cleanup() if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py index 321b262286..a2a297ac52 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -77,15 +78,44 @@ cluster_json = """ } """ +mapping_josn = """ +[ + { + "hostname": "machine1", + "addr": "127.0.0.1", + "port": "768", + "ranks": + { + "0": [1], + "1": [0] + } + } +] +""" + class TestAutoParallelReLaunch(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_relaunch(self): - file_dir = os.path.dirname(os.path.abspath(__file__)) - cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json") + cluster_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_cluster.json") + mapping_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_rank_mapping.json") + cluster_json_object = json.loads(cluster_json) with open(cluster_json_path, "w") as cluster_json_file: json.dump(cluster_json_object, cluster_json_file) + mapping_josn_object = json.loads(mapping_josn) + with open(mapping_json_path, "w") as mapping_josn_file: + json.dump(mapping_josn_object, mapping_josn_file) + + file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join(file_dir, "auto_parallel_relaunch_model.py") @@ -95,24 +125,15 @@ class TestAutoParallelReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--cluster_topo_path", cluster_json_path, - "--enable_auto_mapping", "True", launch_model_path + "-m", "launch", "--log_dir", self.temp_dir.name, + "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", + mapping_json_path, "--enable_auto_mapping", "True", + launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - if os.path.exists(cluster_json_path): - os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") - if os.path.exists(rank_mapping_json_path): - os.remove(rank_mapping_json_path) - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py index 5ca12bc1e0..e4a176c230 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -30,24 +31,17 @@ class TestEngineAPI(unittest.TestCase): else: coverage_args = [] + tmp_dir = tempfile.TemporaryDirectory() cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--gpus", "0,1", launch_model_path + "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name, + launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) - files_path = [path for path in os.listdir('.') if '.pd' in path] - for path in files_path: - if os.path.exists(path): - os.remove(path) - if os.path.exists('rank_mapping.csv'): - os.remove('rank_mapping.csv') + tmp_dir.cleanup() def test_engine_predict(self): file_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py index 0cd3041ea4..d7de3511c0 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py @@ -13,6 +13,9 @@ # limitations under the License. import unittest +import os +import json +import tempfile import paddle import paddle.distributed.auto_parallel.cost as cost_model @@ -30,6 +33,12 @@ def check_cost(cost): class TestCost(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_base_cost(self): cost = cost_model.Cost(memory=100, flops=200, time=0.5) self.assertTrue(check_cost(cost)) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py index 8782f01ea5..cb813fa8ba 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -22,14 +23,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestPlannerReLaunch(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_relaunch_with_planner(self): - from test_auto_parallel_relaunch import cluster_json - file_dir = os.path.dirname(os.path.abspath(__file__)) - cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json") + from test_auto_parallel_relaunch import cluster_json, mapping_josn + + cluster_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_cluster.json") + mapping_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_rank_mapping.json") + cluster_json_object = json.loads(cluster_json) with open(cluster_json_path, "w") as cluster_json_file: json.dump(cluster_json_object, cluster_json_file) + mapping_json_object = json.loads(mapping_josn) + with open(mapping_json_path, "w") as mapping_json_file: + json.dump(mapping_json_object, mapping_json_file) + + file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join( file_dir, "auto_parallel_relaunch_with_gpt_planner.py") @@ -39,24 +55,15 @@ class TestPlannerReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--cluster_topo_path", cluster_json_path, - "--enable_auto_mapping", "True", launch_model_path + "-m", "launch", "--log_dir", self.temp_dir.name, + "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", + mapping_json_path, "--enable_auto_mapping", "True", + launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - if os.path.exists(cluster_json_path): - os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") - if os.path.exists(rank_mapping_json_path): - os.remove(rank_mapping_json_path) - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py index 5a7ae87e64..fbc9534d89 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import os import sys @@ -22,14 +23,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestPlannerReLaunch(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_relaunch_with_planner(self): - from test_auto_parallel_relaunch import cluster_json - file_dir = os.path.dirname(os.path.abspath(__file__)) - cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json") + from test_auto_parallel_relaunch import cluster_json, mapping_josn + + cluster_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_cluster.json") + mapping_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_rank_mapping.json") + cluster_json_object = json.loads(cluster_json) with open(cluster_json_path, "w") as cluster_json_file: json.dump(cluster_json_object, cluster_json_file) + mapping_json_object = json.loads(mapping_josn) + with open(mapping_json_path, "w") as mapping_json_file: + json.dump(mapping_json_object, mapping_json_file) + + file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join( file_dir, "auto_parallel_relaunch_with_planner.py") @@ -39,24 +55,15 @@ class TestPlannerReLaunch(unittest.TestCase): coverage_args = [] cmd = [sys.executable, "-u"] + coverage_args + [ - "-m", "launch", "--cluster_topo_path", cluster_json_path, - "--enable_auto_mapping", "True", launch_model_path + "-m", "launch", "--log_dir", self.temp_dir.name, + "--cluster_topo_path", cluster_json_path, "--rank_mapping_path", + mapping_json_path, "--enable_auto_mapping", "True", + launch_model_path ] process = subprocess.Popen(cmd) process.wait() self.assertEqual(process.returncode, 0) - # Remove unnecessary files - if os.path.exists(cluster_json_path): - os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") - if os.path.exists(rank_mapping_json_path): - os.remove(rank_mapping_json_path) - log_path = os.path.join(file_dir, "log") - if os.path.exists(log_path): - shutil.rmtree(log_path) - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py index 55b3665443..17bf3fee2e 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py @@ -14,6 +14,7 @@ from __future__ import print_function +import tempfile import unittest import os import json @@ -200,15 +201,21 @@ cluster_json = """ class TestAutoParallelCluster(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_cluster(self): - cluster_json_file = "" + cluster_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_cluster.json") cluster_json_object = json.loads(cluster_json) - with open("./auto_parallel_cluster.json", "w") as cluster_json_file: + with open(cluster_json_path, "w") as cluster_json_file: json.dump(cluster_json_object, cluster_json_file) cluster = Cluster() - cluster.build_from_file("./auto_parallel_cluster.json") - os.remove("./auto_parallel_cluster.json") + cluster.build_from_file(cluster_json_path) self.assertEqual(len(cluster.get_all_devices("GPU")), 4) self.assertEqual(len(cluster.get_all_devices("CPU")), 2) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index 45b9defeb7..eb2d012270 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -14,6 +14,7 @@ from __future__ import print_function +import tempfile import unittest import os import json @@ -523,14 +524,20 @@ def get_device_local_ids(machine): class TestAutoParallelMapper(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + def test_mapper_dp_mp_pp(self): - cluster_json_file = "" + cluster_json_path = os.path.join(self.temp_dir.name, + "auto_parallel_cluster.json") cluster_json_object = json.loads(cluster_json) - with open("./auto_parallel_cluster.json", "w") as cluster_json_file: + with open(cluster_json_path, "w") as cluster_json_file: json.dump(cluster_json_object, cluster_json_file) cluster = Cluster() - cluster.build_from_file("./auto_parallel_cluster.json") - os.remove("./auto_parallel_cluster.json") + cluster.build_from_file(cluster_json_path) global _global_parallel_strategy _global_parallel_strategy = "dp_mp_pp" diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py index 07d94d1b76..3b87af598d 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py @@ -901,25 +901,6 @@ class TestGPTPartitioner(unittest.TestCase): auto_parallel_main_prog, auto_parallel_startup_prog, params_grads = partitioner.partition( complete_train_program, startup_program, params_grads) - with open("./test_auto_parallel_partitioner_serial_main_new.txt", - "w") as fw: - fw.write(str(train_program)) - with open("./test_auto_parallel_partitioner_serial_startup_new.txt", - "w") as fw: - fw.write(str(startup_program)) - - from paddle.distributed.auto_parallel.dist_context import set_default_distributed_context - set_default_distributed_context(dist_context) - with open("./test_auto_parallel_partitioner_main_new.txt1", "w") as fw: - fw.write(str(auto_parallel_main_prog)) - with open("./test_auto_parallel_partitioner_startup_new.txt1", - "w") as fw: - fw.write(str(auto_parallel_startup_prog)) - # with open("./test_auto_parallel_partitioner_main_completed.txt", "w") as fw: - # from paddle.distributed.auto_parallel.completion import Completer - # completer = Completer() - # completer.complete_forward_annotation(auto_parallel_main_prog) - # fw.write(str(auto_parallel_main_prog)) nrank = 4 # col parallel weights = [ -- GitLab