From 0f16ccf5c244529316ce33c37190a497dd7e4ea1 Mon Sep 17 00:00:00 2001
From: zhaoyingli <86812880+zhaoyinglia@users.noreply.github.com>
Date: Mon, 20 Jun 2022 11:08:24 +0800
Subject: [PATCH] [Cherry-Pick] place all save/load path into temporary
 directory (#43316) (#43651)

* place all save/load path into temporary directory

* rm no need unittest
---
 .../unittests/auto_parallel/engine_api.py     | 14 +++--
 .../test_auto_parallel_relaunch.py            | 51 +++++++++++++------
 .../auto_parallel/test_engine_api.py          | 16 ++----
 .../auto_parallel/test_new_cost_model.py      |  9 ++++
 .../test_relaunch_with_gpt_planner.py         | 39 ++++++++------
 .../test_relaunch_with_planner.py             | 39 ++++++++------
 .../unittests/test_auto_parallel_cluster.py   | 15 ++++--
 .../unittests/test_auto_parallel_mapper.py    | 15 ++++--
 .../test_auto_parallel_partitioner_gpt.py     | 19 -------
 9 files changed, 128 insertions(+), 89 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
index d7321066ed..3a16f5d70d 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
@@ -14,7 +14,7 @@
 
 import unittest
 import time
-import paddle.fluid as fluid
+import tempfile
 import copy
 import os
 import numpy as np
@@ -128,9 +128,15 @@ def train():
     engine.fit(dataset,
                batch_size=batch_size,
                steps_per_epoch=batch_num * batch_size)
-    engine.save('./mlp')
-    engine.load('./mlp')
-    engine.save('./mlp_inf', training=False, mode='predict')
+
+    # save
+    temp_dir = tempfile.TemporaryDirectory()
+    model_filename0 = os.path.join(temp_dir.name, 'mlp')
+    model_filename1 = os.path.join(temp_dir.name, 'mlp_inf')
+    engine.save(model_filename0)
+    engine.load(model_filename0)
+    engine.save(model_filename1, training=False, mode='predict')
+    temp_dir.cleanup()
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
index 321b262286..a2a297ac52 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import tempfile
 import unittest
 import os
 import sys
@@ -77,15 +78,44 @@ cluster_json = """
 }
 """
 
+mapping_josn = """
+[
+  {
+    "hostname": "machine1", 
+    "addr": "127.0.0.1", 
+    "port": "768", 
+    "ranks": 
+      {
+        "0": [1], 
+        "1": [0]
+      }
+  }
+]
+"""
+
 
 class TestAutoParallelReLaunch(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_relaunch(self):
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json")
+        cluster_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_cluster.json")
+        mapping_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_rank_mapping.json")
+
         cluster_json_object = json.loads(cluster_json)
         with open(cluster_json_path, "w") as cluster_json_file:
             json.dump(cluster_json_object, cluster_json_file)
 
+        mapping_josn_object = json.loads(mapping_josn)
+        with open(mapping_json_path, "w") as mapping_josn_file:
+            json.dump(mapping_josn_object, mapping_josn_file)
+
+        file_dir = os.path.dirname(os.path.abspath(__file__))
         launch_model_path = os.path.join(file_dir,
                                          "auto_parallel_relaunch_model.py")
 
@@ -95,24 +125,15 @@ class TestAutoParallelReLaunch(unittest.TestCase):
             coverage_args = []
 
         cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--cluster_topo_path", cluster_json_path,
-            "--enable_auto_mapping", "True", launch_model_path
+            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
+            mapping_json_path, "--enable_auto_mapping", "True",
+            launch_model_path
         ]
         process = subprocess.Popen(cmd)
         process.wait()
         self.assertEqual(process.returncode, 0)
 
-        # Remove unnecessary files
-        if os.path.exists(cluster_json_path):
-            os.remove(cluster_json_path)
-        rank_mapping_json_path = os.path.join(file_dir,
-                                              "auto_parallel_rank_mapping.json")
-        if os.path.exists(rank_mapping_json_path):
-            os.remove(rank_mapping_json_path)
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
index 5ca12bc1e0..e4a176c230 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import tempfile
 import unittest
 import os
 import sys
@@ -30,24 +31,17 @@ class TestEngineAPI(unittest.TestCase):
         else:
             coverage_args = []
 
+        tmp_dir = tempfile.TemporaryDirectory()
         cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--gpus", "0,1", launch_model_path
+            "-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name,
+            launch_model_path
         ]
 
         process = subprocess.Popen(cmd)
         process.wait()
         self.assertEqual(process.returncode, 0)
 
-        # Remove unnecessary files
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
-        files_path = [path for path in os.listdir('.') if '.pd' in path]
-        for path in files_path:
-            if os.path.exists(path):
-                os.remove(path)
-        if os.path.exists('rank_mapping.csv'):
-            os.remove('rank_mapping.csv')
+        tmp_dir.cleanup()
 
     def test_engine_predict(self):
         file_dir = os.path.dirname(os.path.abspath(__file__))
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py
index 0cd3041ea4..d7de3511c0 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 
 import unittest
+import os
+import json
+import tempfile
 
 import paddle
 import paddle.distributed.auto_parallel.cost as cost_model
@@ -30,6 +33,12 @@ def check_cost(cost):
 
 
 class TestCost(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_base_cost(self):
         cost = cost_model.Cost(memory=100, flops=200, time=0.5)
         self.assertTrue(check_cost(cost))
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
index 8782f01ea5..cb813fa8ba 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import tempfile
 import unittest
 import os
 import sys
@@ -22,14 +23,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage
 
 
 class TestPlannerReLaunch(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_relaunch_with_planner(self):
-        from test_auto_parallel_relaunch import cluster_json
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json")
+        from test_auto_parallel_relaunch import cluster_json, mapping_josn
+
+        cluster_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_cluster.json")
+        mapping_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_rank_mapping.json")
+
         cluster_json_object = json.loads(cluster_json)
         with open(cluster_json_path, "w") as cluster_json_file:
             json.dump(cluster_json_object, cluster_json_file)
 
+        mapping_json_object = json.loads(mapping_josn)
+        with open(mapping_json_path, "w") as mapping_json_file:
+            json.dump(mapping_json_object, mapping_json_file)
+
+        file_dir = os.path.dirname(os.path.abspath(__file__))
         launch_model_path = os.path.join(
             file_dir, "auto_parallel_relaunch_with_gpt_planner.py")
 
@@ -39,24 +55,15 @@ class TestPlannerReLaunch(unittest.TestCase):
             coverage_args = []
 
         cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--cluster_topo_path", cluster_json_path,
-            "--enable_auto_mapping", "True", launch_model_path
+            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
+            mapping_json_path, "--enable_auto_mapping", "True",
+            launch_model_path
         ]
         process = subprocess.Popen(cmd)
         process.wait()
         self.assertEqual(process.returncode, 0)
 
-        # Remove unnecessary files
-        if os.path.exists(cluster_json_path):
-            os.remove(cluster_json_path)
-        rank_mapping_json_path = os.path.join(file_dir,
-                                              "auto_parallel_rank_mapping.json")
-        if os.path.exists(rank_mapping_json_path):
-            os.remove(rank_mapping_json_path)
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
index 5a7ae87e64..fbc9534d89 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import tempfile
 import unittest
 import os
 import sys
@@ -22,14 +23,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage
 
 
 class TestPlannerReLaunch(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_relaunch_with_planner(self):
-        from test_auto_parallel_relaunch import cluster_json
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json")
+        from test_auto_parallel_relaunch import cluster_json, mapping_josn
+
+        cluster_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_cluster.json")
+        mapping_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_rank_mapping.json")
+
         cluster_json_object = json.loads(cluster_json)
         with open(cluster_json_path, "w") as cluster_json_file:
             json.dump(cluster_json_object, cluster_json_file)
 
+        mapping_json_object = json.loads(mapping_josn)
+        with open(mapping_json_path, "w") as mapping_json_file:
+            json.dump(mapping_json_object, mapping_json_file)
+
+        file_dir = os.path.dirname(os.path.abspath(__file__))
         launch_model_path = os.path.join(
             file_dir, "auto_parallel_relaunch_with_planner.py")
 
@@ -39,24 +55,15 @@ class TestPlannerReLaunch(unittest.TestCase):
             coverage_args = []
 
         cmd = [sys.executable, "-u"] + coverage_args + [
-            "-m", "launch", "--cluster_topo_path", cluster_json_path,
-            "--enable_auto_mapping", "True", launch_model_path
+            "-m", "launch", "--log_dir", self.temp_dir.name,
+            "--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
+            mapping_json_path, "--enable_auto_mapping", "True",
+            launch_model_path
         ]
         process = subprocess.Popen(cmd)
         process.wait()
         self.assertEqual(process.returncode, 0)
 
-        # Remove unnecessary files
-        if os.path.exists(cluster_json_path):
-            os.remove(cluster_json_path)
-        rank_mapping_json_path = os.path.join(file_dir,
-                                              "auto_parallel_rank_mapping.json")
-        if os.path.exists(rank_mapping_json_path):
-            os.remove(rank_mapping_json_path)
-        log_path = os.path.join(file_dir, "log")
-        if os.path.exists(log_path):
-            shutil.rmtree(log_path)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
index 55b3665443..17bf3fee2e 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import tempfile
 import unittest
 import os
 import json
@@ -200,15 +201,21 @@ cluster_json = """
 
 
 class TestAutoParallelCluster(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_cluster(self):
-        cluster_json_file = ""
+        cluster_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_cluster.json")
         cluster_json_object = json.loads(cluster_json)
-        with open("./auto_parallel_cluster.json", "w") as cluster_json_file:
+        with open(cluster_json_path, "w") as cluster_json_file:
             json.dump(cluster_json_object, cluster_json_file)
 
         cluster = Cluster()
-        cluster.build_from_file("./auto_parallel_cluster.json")
-        os.remove("./auto_parallel_cluster.json")
+        cluster.build_from_file(cluster_json_path)
 
         self.assertEqual(len(cluster.get_all_devices("GPU")), 4)
         self.assertEqual(len(cluster.get_all_devices("CPU")), 2)
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
index 45b9defeb7..eb2d012270 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import tempfile
 import unittest
 import os
 import json
@@ -523,14 +524,20 @@ def get_device_local_ids(machine):
 
 
 class TestAutoParallelMapper(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
     def test_mapper_dp_mp_pp(self):
-        cluster_json_file = ""
+        cluster_json_path = os.path.join(self.temp_dir.name,
+                                         "auto_parallel_cluster.json")
         cluster_json_object = json.loads(cluster_json)
-        with open("./auto_parallel_cluster.json", "w") as cluster_json_file:
+        with open(cluster_json_path, "w") as cluster_json_file:
             json.dump(cluster_json_object, cluster_json_file)
         cluster = Cluster()
-        cluster.build_from_file("./auto_parallel_cluster.json")
-        os.remove("./auto_parallel_cluster.json")
+        cluster.build_from_file(cluster_json_path)
 
         global _global_parallel_strategy
         _global_parallel_strategy = "dp_mp_pp"
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
index 07d94d1b76..3b87af598d 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
@@ -901,25 +901,6 @@ class TestGPTPartitioner(unittest.TestCase):
         auto_parallel_main_prog, auto_parallel_startup_prog, params_grads = partitioner.partition(
             complete_train_program, startup_program, params_grads)
 
-        with open("./test_auto_parallel_partitioner_serial_main_new.txt",
-                  "w") as fw:
-            fw.write(str(train_program))
-        with open("./test_auto_parallel_partitioner_serial_startup_new.txt",
-                  "w") as fw:
-            fw.write(str(startup_program))
-
-        from paddle.distributed.auto_parallel.dist_context import set_default_distributed_context
-        set_default_distributed_context(dist_context)
-        with open("./test_auto_parallel_partitioner_main_new.txt1", "w") as fw:
-            fw.write(str(auto_parallel_main_prog))
-        with open("./test_auto_parallel_partitioner_startup_new.txt1",
-                  "w") as fw:
-            fw.write(str(auto_parallel_startup_prog))
-        # with open("./test_auto_parallel_partitioner_main_completed.txt", "w") as fw:
-        #     from paddle.distributed.auto_parallel.completion import Completer
-        #     completer = Completer()
-        #     completer.complete_forward_annotation(auto_parallel_main_prog)
-        #     fw.write(str(auto_parallel_main_prog))       
         nrank = 4
         # col parallel
         weights = [
-- 
GitLab