From b958fa758ca43dc91df53d7f8d4e7cefe468054b Mon Sep 17 00:00:00 2001
From: kangguangli <kangguangli@hotmail.com>
Date: Wed, 22 Feb 2023 14:18:12 +0800
Subject: [PATCH] [with_data_parallel][part3] remove with_data_parallel in unit
 test  (#50568)

* remove with_data_parallel in unittest

* fix CI

* remove comment

* trigger CI

* revert part changes

* test_build_strategy_fusion_group_pass
---
 .../test_standalone_executor.py               |   4 +-
 .../test_build_strategy_fusion_group_pass.py  |   2 +-
 .../tests/unittests/test_compiled_program.py  |  63 ---------
 .../unittests/test_cuda_graph_static_mode.py  |   4 +-
 .../unittests/test_dataset_dataloader.py      |  10 +-
 .../test_eager_deletion_delete_vars.py        |  50 -------
 .../test_eager_deletion_dynamic_rnn_base.py   |  36 ++---
 .../test_eager_deletion_padding_rnn.py        |  29 ++--
 .../unittests/test_eager_deletion_while_op.py |  32 +----
 .../test_executor_and_use_program_cache.py    |   6 +-
 .../unittests/test_executor_check_feed.py     |   4 +-
 .../test_executor_feed_non_tensor.py          |   4 +-
 ..._executor_return_tensor_not_overwriting.py |  23 +---
 .../test_feed_data_check_shape_type.py        | 125 +++++++-----------
 .../unittests/test_fetch_lod_tensor_array.py  |  37 +-----
 .../tests/unittests/test_fuse_bn_act_pass.py  |   8 +-
 .../unittests/test_fuse_bn_add_act_pass.py    |   8 +-
 .../unittests/test_fuse_gemm_epilogue_pass.py |  14 +-
 .../unittests/test_inference_model_io.py      |   4 +-
 .../unittests/test_inplace_addto_strategy.py  |   4 +-
 ...test_inplace_softmax_with_cross_entropy.py |   4 +-
 .../test_memory_reuse_exclude_feed_var.py     |   4 +-
 .../tests/unittests/test_reader_reset.py      |   4 +-
 .../unittests/test_resnet50_with_cinn.py      |   4 +-
 .../tests/unittests/test_weight_decay.py      |   6 +-
 .../static/quantization/tests/test_graph.py   |   8 +-
 .../test_moving_average_abs_max_scale_op.py   |   4 +-
 .../tests/test_quantization_mkldnn_pass.py    |   4 +-
 .../tests/test_quantization_pass.py           |   4 +-
 .../tests/test_quantization_scale_pass.py     |   4 +-
 .../tests/test_user_defined_quantization.py   |   4 +-
 31 files changed, 129 insertions(+), 388 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_executor.py b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_executor.py
index d1f7c50433e..fe61abc533f 100644
--- a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_executor.py
+++ b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_executor.py
@@ -235,9 +235,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
         exe.run(startup_program)
 
         if use_compiled:
-            main_program = paddle.static.CompiledProgram(
-                main_program
-            ).with_data_parallel(fetch_vars[0].name, places=[self.place])
+            main_program = paddle.static.CompiledProgram(main_program)
 
         if use_str:  # test for fetch name
             fetch_vars = [x.name for x in fetch_vars]
diff --git a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
index a9ae9e85c36..564b1870051 100644
--- a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
@@ -32,7 +32,7 @@ class FusionGroupPaddingRNNTest(PaddingRNNTestBase):
         rnn_model = "static"
         config = RNNConfig("test", rnn_model)
         with fluid.scope_guard(fluid.Scope()):
-            self.train(config, parallel=True, use_program_cache=False)
+            self.train(config, use_program_cache=False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py
index a2ea57f8357..f698980b163 100644
--- a/python/paddle/fluid/tests/unittests/test_compiled_program.py
+++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py
@@ -72,30 +72,6 @@ class TestCompiledProgram(unittest.TestCase):
             )
             np.testing.assert_array_equal(loss_data[0], self.loss)
 
-    def test_compiled_program_with_data_parallel(self):
-        with new_program_scope():
-            paddle.seed(self.seed)
-            paddle.framework.random._manual_program_seed(self.seed)
-            place = (
-                fluid.CUDAPlace(0)
-                if core.is_compiled_with_cuda()
-                else fluid.CPUPlace()
-            )
-            exe = fluid.Executor(place)
-
-            loss = simple_fc_net()
-            exe.run(fluid.default_startup_program())
-            compiled_prog = fluid.CompiledProgram(
-                fluid.default_main_program()
-            ).with_data_parallel(loss_name=loss.name, places=[place])
-
-            (loss_data,) = exe.run(
-                compiled_prog,
-                feed={"image": self.img, "label": self.label},
-                fetch_list=[loss.name],
-            )
-            np.testing.assert_array_equal(loss_data[0], self.loss)
-
 
 class TestCompiledProgramError(unittest.TestCase):
     def test_program_or_graph_error(self):
@@ -112,17 +88,6 @@ class TestCompiledProgramError(unittest.TestCase):
         )
         avg_loss = paddle.mean(loss)
 
-    def compile_program_not_compiled(self):
-        with fluid.program_guard(fluid.Program()):
-            # build model
-            self.build_simple_model()
-            # compile program
-            program = fluid.default_main_program()
-            compiled_program = fluid.CompiledProgram(
-                program
-            ).with_data_parallel()
-            return compiled_program
-
     def compile_program(self):
         with fluid.program_guard(fluid.Program()):
             # build model
@@ -149,34 +114,6 @@ class TestCompiledProgramError(unittest.TestCase):
             with self.assertRaises(ValueError):
                 compiled_program._compile(scope, new_place)
 
-    def test_share_vars_from_error_no_parallel(self):
-        with fluid.program_guard(fluid.Program()):
-            source_program, _, _ = self.compile_program()
-            self.build_simple_model()
-            # compile program
-            program = fluid.default_main_program()
-            compiled_program = fluid.CompiledProgram(
-                program
-            ).with_data_parallel(share_vars_from=source_program)
-            scope = fluid.global_scope()
-            place = fluid.CPUPlace()
-            with self.assertRaises(ValueError):
-                compiled_program._compile(scope, place)
-
-    def test_share_vars_from_error_no_executor(self):
-        with fluid.program_guard(fluid.Program()):
-            source_program = self.compile_program_not_compiled()
-            self.build_simple_model()
-            # compile program
-            program = fluid.default_main_program()
-            compiled_program = fluid.CompiledProgram(
-                program
-            ).with_data_parallel(share_vars_from=source_program)
-            scope = fluid.global_scope()
-            place = fluid.CPUPlace()
-            with self.assertRaises(ValueError):
-                compiled_program._compile(scope, place)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph_static_mode.py b/python/paddle/fluid/tests/unittests/test_cuda_graph_static_mode.py
index 3dc56cc7039..cf3b60e490c 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph_static_mode.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph_static_mode.py
@@ -111,9 +111,7 @@ class TestCUDAGraphInStaticMode(unittest.TestCase):
             build_strategy.fix_op_run_order = True
             build_strategy.fuse_all_optimizer_ops = True
             compiled_program = paddle.static.CompiledProgram(
-                main
-            ).with_data_parallel(
-                loss_name=loss.name, build_strategy=build_strategy, places=place
+                main, build_strategy=build_strategy
             )
             image_t = scope.var(image.name).get_tensor()
             label_t = scope.var(label.name).get_tensor()
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
index 078ffb4e863..19a3edc6603 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
@@ -106,13 +106,13 @@ class DatasetLoaderTestBase(unittest.TestCase):
         dataset._set_batch_size(BATCH_SIZE)
 
         if isinstance(place, fluid.CPUPlace):
-            file_num = 10
+            file_num = 1
             os.environ['CPU_NUM'] = str(file_num)
-            places = fluid.cpu_places()
+            places = [fluid.CPUPlace()]
             use_cuda = False
         else:
-            file_num = fluid.core.get_cuda_device_count()
-            places = fluid.cuda_places()
+            file_num = 1
+            places = [fluid.CUDAPlace(0)]
             use_cuda = True
 
         filelist = []
@@ -145,7 +145,7 @@ class DatasetLoaderTestBase(unittest.TestCase):
         dataloader = fluid.io.DataLoader.from_dataset(
             dataset=dataset, places=places, drop_last=self.drop_last
         )
-        prog = fluid.CompiledProgram(main_prog).with_data_parallel()
+        prog = fluid.CompiledProgram(main_prog)
         exe = fluid.Executor(place)
 
         exe.run(startup_prog)
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
index 46977b13d77..0a8ed53f794 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
@@ -19,7 +19,6 @@ import numpy as np
 os.environ['FLAGS_use_mkldnn'] = '0'
 os.environ['CPU_NUM'] = '4'
 
-import multiprocessing
 import unittest
 from functools import reduce
 
@@ -82,13 +81,6 @@ class TestExecutor(unittest.TestCase):
                     with fluid.unique_name.guard():
                         self.executor_main()
 
-        for p in places:
-            self.place = p
-            with fluid.program_guard(fluid.Program(), fluid.Program()):
-                with fluid.scope_guard(fluid.Scope()):
-                    with fluid.unique_name.guard():
-                        self.pe_main()
-
     def prepare_feed(self, image, label, dev_cnt=1):
         batch_size = 32 * dev_cnt
         image_shape = (batch_size,) + tuple(image.shape[1:])
@@ -179,48 +171,6 @@ class TestExecutor(unittest.TestCase):
                 fluid.global_scope(), persistables, non_persistables
             )
 
-    def pe_main(self):
-        image, label, loss = simple_fc_net()
-        loss.persistable = False
-        persistables, non_persistables = get_persistables_and_non_persistables(
-            fluid.default_main_program(), [loss.name]
-        )
-        self.assert_gc_vars(
-            fluid.default_main_program(), [loss.name], non_persistables
-        )
-
-        exe = fluid.Executor(self.place)
-        exe.run(fluid.default_startup_program())
-
-        exec_strategy = fluid.ExecutionStrategy()
-        exec_strategy.num_iteration_per_drop_scope = 100
-
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.memory_optimize = False
-        build_strategy.enable_inplace = False
-
-        prog = fluid.CompiledProgram(
-            fluid.default_main_program()
-        ).with_data_parallel(loss_name=loss.name, exec_strategy=exec_strategy)
-
-        dev_cnt = (
-            fluid.core.get_cuda_device_count()
-            if isinstance(self.place, fluid.CUDAPlace)
-            else int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-        )
-
-        for idx in range(10):
-            image_np, label_np = self.prepare_feed(image, label, dev_cnt)
-            feed = {image.name: image_np, label.name: label_np}
-
-            exe.run(program=prog, feed=feed, fetch_list=[loss])
-
-            local_scopes = prog._local_scopes
-            for scope in local_scopes:
-                kids = scope._kids()
-                self.assertTrue(len(kids) == 1)
-                self.assertScopeVar(kids[0], persistables, non_persistables)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
index 44153b6e2fe..c1fb340a888 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -23,20 +23,13 @@ from fake_reader import fake_imdb_reader
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid import compiler
 
 
-def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
+def train(network, use_cuda, batch_size=32, pass_num=2):
     if use_cuda and not core.is_compiled_with_cuda():
         print('Skip use_cuda=True because Paddle is not compiled with cuda')
         return
 
-    if use_parallel_executor and os.name == 'nt':
-        print(
-            'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
-        )
-        return
-
     word_dict_size = 5147
     reader = fake_imdb_reader(word_dict_size, batch_size * 40)
     train_reader = paddle.batch(reader, batch_size=batch_size)
@@ -54,9 +47,7 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
 
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
     feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
-    reader = feeder.decorate_reader(
-        train_reader, multi_devices=use_parallel_executor
-    )
+    reader = feeder.decorate_reader(train_reader, multi_devices=False)
 
     exe = fluid.Executor(place)
     fluid.default_startup_program().random_seed = 1
@@ -64,13 +55,7 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
     exe.run(fluid.default_startup_program())
 
     train_cp = fluid.default_main_program()
-    if use_parallel_executor:
-        train_cp = compiler.CompiledProgram(
-            fluid.default_main_program()
-        ).with_data_parallel(loss_name=cost.name)
-        fetch_list = [cost.name]
-    else:
-        fetch_list = [cost]
+    fetch_list = [cost]
 
     for pass_id in range(pass_num):
         batch_id = 0
@@ -94,12 +79,9 @@ class TestBase(unittest.TestCase):
             return
 
         for use_cuda in [True, False]:
-            for use_parallel_executor in [False, True]:
-                print(
-                    'network: {}, use_cuda: {}, use_parallel_executor: {}'.format(
-                        self.net.__name__, use_cuda, use_parallel_executor
-                    )
-                )
-                with fluid.program_guard(fluid.Program(), fluid.Program()):
-                    with fluid.scope_guard(core.Scope()):
-                        train(self.net, use_cuda, use_parallel_executor)
+            print(
+                'network: {}, use_cuda: {}'.format(self.net.__name__, use_cuda)
+            )
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                with fluid.scope_guard(core.Scope()):
+                    train(self.net, use_cuda)
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 6c75c50cc13..7bcc533ebf9 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -473,7 +473,7 @@ class PaddingRNNTestBase(unittest.TestCase):
         # You can override the function to set your own config.
         pass
 
-    def _prepare_program(self, config, parallel=True):
+    def _prepare_program(self, config):
         paddle.seed(config.random_seed)
         self.main_program = fluid.Program()
         self.startup_program = fluid.Program()
@@ -517,16 +517,7 @@ class PaddingRNNTestBase(unittest.TestCase):
 
         self.exe.run(self.startup_program)
 
-        if parallel:
-            self.train_program = fluid.compiler.CompiledProgram(
-                self.main_program
-            ).with_data_parallel(
-                loss_name=self.loss.name,
-                build_strategy=self.build_strategy,
-                exec_strategy=self.exec_strategy,
-            )
-        else:
-            self.train_program = self.main_program
+        self.train_program = self.main_program
 
     def _generate_init_data(self):
         init_hidden = np.zeros(
@@ -621,29 +612,27 @@ class PaddingRNNTestBase(unittest.TestCase):
             ppl = np.append(ppl, batch_ppl)
         return ppl
 
-    def train(self, config, parallel=True, use_program_cache=True):
+    def train(self, config, use_program_cache=True):
         self.set_customed_config()
 
         self.config = config
-        self._prepare_program(config, parallel)
+        self._prepare_program(config)
         ppl = np.zeros(shape=(0, config.batch_size))
         for epoch_id in range(config.max_epoch):
             train_ppl = self._train_an_epoch(epoch_id, use_program_cache)
             ppl = np.append(ppl, train_ppl)
         return ppl
 
-    def compare_padding_static_mode(
-        self, parallel=True, use_program_cache=True
-    ):
+    def compare_padding_static_mode(self, use_program_cache=True):
         '''
         Test that train ppl of padding mode is same to that of static graph mode
         '''
         config = RNNConfig('test', 'padding')
         with fluid.scope_guard(fluid.Scope()):
-            padding_rnn_ppl = self.train(config, parallel, use_program_cache)
+            padding_rnn_ppl = self.train(config, use_program_cache)
         config = RNNConfig('test', 'static')
         with fluid.scope_guard(fluid.Scope()):
-            static_rnn_ppl = self.train(config, parallel, use_program_cache)
+            static_rnn_ppl = self.train(config, use_program_cache)
         np.testing.assert_allclose(padding_rnn_ppl, static_rnn_ppl, rtol=0.001)
 
 
@@ -654,7 +643,7 @@ class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
         '''
         fluid.core._set_eager_deletion_mode(-1.0, 1.0, True)
         # When parallel is True, use_program_cache does not make a difference.
-        self.compare_padding_static_mode(parallel=True, use_program_cache=True)
+        self.compare_padding_static_mode(use_program_cache=True)
 
     def test_padding_mode_eager_deletion(self):
         '''
@@ -662,7 +651,7 @@ class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
         '''
         fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
         # When parallel is True, use_program_cache does not make a difference.
-        self.compare_padding_static_mode(parallel=True, use_program_cache=True)
+        self.compare_padding_static_mode(use_program_cache=True)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
index 0ae21ccf6e3..9966c04ef45 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
@@ -16,14 +16,12 @@ import os
 
 os.environ['CPU_NUM'] = '2'
 
-import multiprocessing
 import unittest
 
 import numpy
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.compiler as compiler
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 from paddle.fluid.executor import Executor
@@ -41,30 +39,19 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
             places.append(core.CUDAPlace(0))
 
         for p in places:
-            for with_data_parallel in [False, True]:
-                with fluid.program_guard(fluid.Program(), fluid.Program()):
-                    with fluid.scope_guard(fluid.Scope()):
-                        self.run_main(p, with_data_parallel)
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                with fluid.scope_guard(fluid.Scope()):
+                    self.run_main(p)
 
-    def run_main(self, place, with_data_parallel):
+    def run_main(self, place):
         self.place = place
-        self.with_data_parallel = with_data_parallel
 
         if not core.is_compiled_with_cuda() and isinstance(
             self.place, core.CUDAPlace
         ):
             return
 
-        if isinstance(self.place, core.CUDAPlace):
-            device_cnt = (
-                core.get_cuda_device_count() if self.with_data_parallel else 1
-            )
-        else:
-            device_cnt = (
-                int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-                if self.with_data_parallel
-                else 1
-            )
+        device_cnt = 1
 
         d0 = paddle.static.data("d0", shape=[-1, 10], dtype='float32')
         d1 = paddle.static.data("d1", shape=[-1, 10], dtype='float32')
@@ -139,19 +126,12 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
         exe.run(fluid.default_startup_program())
 
         prog = fluid.default_main_program()
-        if self.with_data_parallel:
-            prog = compiler.CompiledProgram(
-                fluid.default_main_program()
-            ).with_data_parallel(loss_name=loss.name)
 
         for _ in range(5):
             d = []
             for i in range(3):
                 tmp = numpy.random.random(size=[10]).astype('float32')
-                if not self.with_data_parallel:
-                    d.append(tmp)
-                else:
-                    d.append(numpy.array([tmp] * device_cnt))
+                d.append(numpy.array([tmp] * device_cnt))
 
             outs = exe.run(
                 program=prog,
diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
index fe9d09cb54d..092cc45ffdd 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
@@ -83,11 +83,11 @@ class TestExecutor(unittest.TestCase):
 
 class ExecutorPaddingRNNTest(PaddingRNNTestBase):
     def train_and_save_inference_program(
-        self, rnn_model="static", parallel=True, use_program_cache=True
+        self, rnn_model="static", use_program_cache=True
     ):
         config = RNNConfig("test", rnn_model)
         with fluid.scope_guard(fluid.Scope()):
-            self.train(config, parallel, use_program_cache)
+            self.train(config, use_program_cache)
             fluid.io.save_inference_model(
                 main_program=self.main_program,
                 feeded_var_names=self.feed_order,
@@ -101,7 +101,7 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase):
         for rnn_model in ["static", "padding"]:
             # Set parallel to False to use the default executor.
             self.train_and_save_inference_program(
-                rnn_model=rnn_model, parallel=True, use_program_cache=True
+                rnn_model=rnn_model, use_program_cache=True
             )
 
             x_np = np.random.random(
diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
index 11ea8260efe..700bbfa95d1 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
@@ -64,9 +64,7 @@ class TestExecutor(unittest.TestCase):
                 exe = fluid.Executor(cpu)
                 lr, cost = self.net()
                 exe.run(startup_program)
-                compiled_prog = fluid.CompiledProgram(
-                    main_program
-                ).with_data_parallel(loss_name=cost.name)
+                compiled_prog = fluid.CompiledProgram(main_program)
                 train_data = [[1.0], [2.0], [3.0], [4.0]]
                 y_true = [[2.0], [4.0], [6.0], [8.0]]
                 a = 0
diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
index 3f6ce3636b6..acdb8b78549 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
@@ -119,9 +119,7 @@ class TestExecutor(unittest.TestCase):
                 cpu = fluid.CPUPlace()
                 exe = fluid.Executor(cpu)
                 exe.run(startup_program)
-                compiled_prog = fluid.CompiledProgram(
-                    main_program
-                ).with_data_parallel(loss_name=cost.name)
+                compiled_prog = fluid.CompiledProgram(main_program)
                 train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype(
                     'float32'
                 )
diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
index 5e8179886ea..25069c668e6 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
@@ -72,28 +72,20 @@ class TestExecutorReturnTensorNotOverOverwritingWithLayers(unittest.TestCase):
     def setUp(self):
         pass
 
-    def calc_add_out(self, place=None, parallel=None):
+    def calc_add_out(self, place=None):
         x = paddle.ones(shape=[3, 3], dtype='float32')
         y = paddle.ones(shape=[3, 3], dtype='float32')
         out = paddle.add(x=x, y=y)
         program = fluid.default_main_program()
-        if parallel:
-            program = fluid.CompiledProgram(program).with_data_parallel(
-                places=place
-            )
         exe = fluid.Executor(place)
         out = exe.run(program, fetch_list=[out], return_numpy=False)
         return out
 
-    def calc_sub_out(self, place=None, parallel=None):
+    def calc_sub_out(self, place=None):
         x = paddle.ones(shape=[2, 2], dtype='float32')
         y = paddle.ones(shape=[2, 2], dtype='float32')
         out = paddle.subtract(x=x, y=y)
         program = fluid.default_main_program()
-        if parallel:
-            program = fluid.CompiledProgram(program).with_data_parallel(
-                places=place
-            )
         exe = fluid.Executor(place)
         out = exe.run(program, fetch_list=[out], return_numpy=False)
         return out
@@ -104,12 +96,11 @@ class TestExecutorReturnTensorNotOverOverwritingWithLayers(unittest.TestCase):
             places.append(fluid.CUDAPlace(0))
 
         for place in places:
-            for parallel in [True, False]:
-                add_out = self.calc_add_out(place, parallel)
-                add_out1 = np.array(add_out[0])
-                sub_out = self.calc_sub_out(place, parallel)
-                add_out2 = np.array(add_out[0])
-                np.testing.assert_array_equal(add_out1, add_out2)
+            add_out = self.calc_add_out(place)
+            add_out1 = np.array(add_out[0])
+            sub_out = self.calc_sub_out(place)
+            add_out2 = np.array(add_out[0])
+            np.testing.assert_array_equal(add_out1, add_out2)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
index 0e5330014d3..8828b8b0e81 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -20,7 +20,6 @@ import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.compiler as compiler
 import paddle.fluid.core as core
 
 os.environ['CPU_NUM'] = str(4)
@@ -46,16 +45,12 @@ class TestFeedData(unittest.TestCase):
             else int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
         )
 
-    def _get_feed_batch_size(self, use_cuda, use_parallel_executor):
+    def _get_feed_batch_size(self, use_cuda):
         """
         Returns actual fed data size. We should multiple the number of
         devices when it is using ParallelExecutor
         """
-        return (
-            self.data_batch_size * self._get_device_count(use_cuda)
-            if use_parallel_executor
-            else self.data_batch_size
-        )
+        return self.data_batch_size
 
     def _simple_fc_net(self, in_size, label_size, class_num, hidden_sizes):
         in_data = fluid.data(name="data", dtype='float32', shape=in_size)
@@ -85,57 +80,45 @@ class TestFeedData(unittest.TestCase):
         for use_cuda in (
             [True, False] if core.is_compiled_with_cuda() else [False]
         ):
-            for use_parallel_executor in [False, True]:
-                print('Test Parameters:'),
-                print(
-                    {
-                        'use_cuda': use_cuda,
-                        'use_parallel_executor': use_parallel_executor,
-                    }
-                )
-                # Test feeding without error
-                self._test_feed_data_match_shape_type(
-                    use_cuda, use_parallel_executor
-                )
-                self._test_feed_data_contains_neg_one(
-                    use_cuda, use_parallel_executor
-                )
-                self._test_feed_lod_tensor(use_cuda, use_parallel_executor)
-
-                # Test exception message when feeding with error
-                in_shape_tuple = (-1, 3, 4, 8)
-                error_shape_list = [self.data_batch_size, 3, 4, 5]
-
-                with self.assertRaises(ValueError) as shape_mismatch_err:
-                    self._test_feed_data_shape_mismatch(
-                        use_cuda, use_parallel_executor
-                    )
-                self.assertEqual(
-                    str(shape_mismatch_err.exception),
-                    "The fed Variable %r should have dimensions = %r, "
-                    "shape = %r, but received fed shape %r on each device"
-                    % (
-                        'data',
-                        len(in_shape_tuple),
-                        in_shape_tuple,
-                        error_shape_list,
-                    ),
-                )
-
-                with self.assertRaises(ValueError) as dtype_mismatch_err:
-                    self._test_feed_data_dtype_mismatch(
-                        use_cuda, use_parallel_executor
-                    )
-                self.assertEqual(
-                    str(dtype_mismatch_err.exception),
-                    "The data type of fed Variable %r must be 'int64', but "
-                    "received 'float64'" % ('label'),
-                )
-
-    def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor):
-        feed_batch_size = self._get_feed_batch_size(
-            use_cuda, use_parallel_executor
-        )
+            print('Test Parameters:'),
+            print(
+                {
+                    'use_cuda': use_cuda,
+                }
+            )
+            # Test feeding without error
+            self._test_feed_data_match_shape_type(use_cuda)
+            self._test_feed_data_contains_neg_one(use_cuda)
+            self._test_feed_lod_tensor(use_cuda)
+
+            # Test exception message when feeding with error
+            in_shape_tuple = (-1, 3, 4, 8)
+            error_shape_list = [self.data_batch_size, 3, 4, 5]
+
+            with self.assertRaises(ValueError) as shape_mismatch_err:
+                self._test_feed_data_shape_mismatch(use_cuda)
+            self.assertEqual(
+                str(shape_mismatch_err.exception),
+                "The fed Variable %r should have dimensions = %r, "
+                "shape = %r, but received fed shape %r on each device"
+                % (
+                    'data',
+                    len(in_shape_tuple),
+                    in_shape_tuple,
+                    error_shape_list,
+                ),
+            )
+
+            with self.assertRaises(ValueError) as dtype_mismatch_err:
+                self._test_feed_data_dtype_mismatch(use_cuda)
+            self.assertEqual(
+                str(dtype_mismatch_err.exception),
+                "The data type of fed Variable %r must be 'int64', but "
+                "received 'float64'" % ('label'),
+            )
+
+    def _test_feed_data_dtype_mismatch(self, use_cuda):
+        feed_batch_size = self._get_feed_batch_size(use_cuda)
         in_size = [self.data_batch_size, 3, 4, 5]
         feed_in_data = np.random.uniform(
             size=[feed_batch_size, 3, 4, 5]
@@ -150,11 +133,10 @@ class TestFeedData(unittest.TestCase):
             feed_in_data,
             feed_label,
             use_cuda,
-            use_parallel_executor,
         )
 
-    def _test_feed_data_shape_mismatch(self, use_cuda, use_parallel_executor):
-        batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor)
+    def _test_feed_data_shape_mismatch(self, use_cuda):
+        batch_size = self._get_feed_batch_size(use_cuda)
         in_size = [None, 3, 4, 8]
         feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype(
             np.float32
@@ -169,11 +151,10 @@ class TestFeedData(unittest.TestCase):
             feed_in_data,
             feed_label,
             use_cuda,
-            use_parallel_executor,
         )
 
-    def _test_feed_data_contains_neg_one(self, use_cuda, use_parallel_executor):
-        batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor)
+    def _test_feed_data_contains_neg_one(self, use_cuda):
+        batch_size = self._get_feed_batch_size(use_cuda)
         in_size = [-1, 3, 4, 5]
         feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype(
             np.float32
@@ -188,13 +169,10 @@ class TestFeedData(unittest.TestCase):
             feed_in_data,
             feed_label,
             use_cuda,
-            use_parallel_executor,
         )
 
-    def _test_feed_data_match_shape_type(self, use_cuda, use_parallel_executor):
-        feed_batch_size = self._get_feed_batch_size(
-            use_cuda, use_parallel_executor
-        )
+    def _test_feed_data_match_shape_type(self, use_cuda):
+        feed_batch_size = self._get_feed_batch_size(use_cuda)
         in_size = [self.data_batch_size, 3, 4, 5]
         feed_in_data = np.random.uniform(
             size=[feed_batch_size, 3, 4, 5]
@@ -209,10 +187,9 @@ class TestFeedData(unittest.TestCase):
             feed_in_data,
             feed_label,
             use_cuda,
-            use_parallel_executor,
         )
 
-    def _test_feed_lod_tensor(self, use_cuda, use_parallel_executor):
+    def _test_feed_lod_tensor(self, use_cuda):
         device_count = self._get_device_count(use_cuda)
 
         in_size = [device_count, 3, 4, 5]
@@ -241,7 +218,6 @@ class TestFeedData(unittest.TestCase):
             feed_data_tensor,
             feed_label_tensor,
             use_cuda,
-            use_parallel_executor,
         )
 
     def _feed_data_in_executor(
@@ -251,7 +227,6 @@ class TestFeedData(unittest.TestCase):
         feed_in_data,
         feed_label,
         use_cuda,
-        use_parallel_executor,
     ):
 
         startup_program = fluid.Program()
@@ -268,10 +243,6 @@ class TestFeedData(unittest.TestCase):
         exe.run(startup_program)
 
         train_program = main_program
-        if use_parallel_executor:
-            train_program = compiler.CompiledProgram(
-                main_program
-            ).with_data_parallel(loss_name=loss.name)
 
         for i in range(self.iterations):
             fetches = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
index 23e1925df61..c20f4ebcc24 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import unittest
 
 import numpy as np
@@ -44,7 +43,6 @@ class TestFetchLoDTensorArray(unittest.TestCase):
                 return loss, array
 
     def check_network(self, use_cuda=True):
-        os.environ["CPU_NUM"] = str(2)
         main_program = fluid.Program()
         startup_program = fluid.Program()
 
@@ -60,35 +58,15 @@ class TestFetchLoDTensorArray(unittest.TestCase):
         feed_dict = {'image': image, 'label': label}
 
         build_strategy = fluid.BuildStrategy()
-        binary = fluid.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy
+        binary = fluid.CompiledProgram(
+            main_program, build_strategy=build_strategy
         )
 
-        device_num = fluid.core.get_cuda_device_count() if use_cuda else 2
         for _ in range(3):
             loss_v, array_v = exe.run(
-                binary,
-                feed=feed_dict,
-                fetch_list=[loss, array],
-                return_merged=False,
+                binary, feed=feed_dict, fetch_list=[loss, array]
             )
-            self.assertEqual(np.array(loss_v).shape, (device_num, 1))
-            self.assertEqual(
-                np.array(array_v[0][0]).shape, (batch_size / device_num, 784)
-            )
-            self.assertEqual(
-                np.array(array_v[0][1]).shape, (batch_size / device_num, 1)
-            )
-            self.assertEqual(np.array(array_v[0][2]).shape, (1,))
-
-        for _ in range(3):
-            loss_v, array_v = exe.run(
-                binary,
-                feed=feed_dict,
-                fetch_list=[loss, array],
-                return_merged=True,
-            )
-            self.assertEqual(np.array(loss_v).shape, (device_num,))
+            self.assertEqual(np.array(loss_v).shape, (1,))
             self.assertEqual(np.array(array_v[0]).shape, (batch_size, 784))
             self.assertEqual(np.array(array_v[1]).shape, (batch_size, 1))
             np.testing.assert_allclose(loss_v, array_v[2], rtol=1e-05)
@@ -98,13 +76,6 @@ class TestFetchLoDTensorArray(unittest.TestCase):
             self.check_network(use_cuda=True)
         self.check_network(use_cuda=False)
 
-    def test_fetch_unmerged_parallel_graph(self):
-        fluid.core.globals()['FLAGS_enable_parallel_graph'] = True
-        if fluid.core.is_compiled_with_cuda():
-            self.check_network(use_cuda=True)
-        self.check_network(use_cuda=False)
-        fluid.core.globals()['FLAGS_enable_parallel_graph'] = False
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
index 83574bae6b4..14452e0c2f1 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -83,8 +83,8 @@ class TestFuseBatchNormActPass(unittest.TestCase):
         # close fused_bn_act_ops
         build_strategy = fluid.BuildStrategy()
         build_strategy.fuse_bn_act_ops = False
-        binary = fluid.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy
+        binary = fluid.CompiledProgram(
+            main_program, build_strategy=build_strategy
         )
         train_reader = paddle.batch(
             paddle.dataset.mnist.train(), batch_size=batch_size
@@ -103,8 +103,8 @@ class TestFuseBatchNormActPass(unittest.TestCase):
         # open fused_bn_act_ops
         build_strategy_fused = fluid.BuildStrategy()
         build_strategy_fused.fuse_bn_act_ops = True
-        binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy_fused
+        binary_fused = fluid.CompiledProgram(
+            main_program, build_strategy=build_strategy_fused
         )
         train_reader_fused = paddle.batch(
             paddle.dataset.mnist.train(), batch_size=batch_size
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
index c00f10d91d4..8f18696979d 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -198,8 +198,8 @@ class TestFusedBnAddActAPI(unittest.TestCase):
         )
         build_strategy_fused = fluid.BuildStrategy()
         build_strategy_fused.fuse_bn_add_act_ops = True
-        binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy_fused
+        binary_fused = fluid.CompiledProgram(
+            main_program, build_strategy=build_strategy_fused
         )
         exe = fluid.Executor(place)
         loss_vals_fused = []
@@ -221,8 +221,8 @@ class TestFusedBnAddActAPI(unittest.TestCase):
         # build_origin_program: turn off fused_bn_act_ops
         build_strategy = fluid.BuildStrategy()
         build_strategy.fuse_bn_add_act_ops = False
-        binary = fluid.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy_fused
+        binary = fluid.CompiledProgram(
+            main_program, build_strategy=build_strategy_fused
         )
         loss_vals = []
         scope = fluid.Scope()
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
index a3141128a54..0156eee44fb 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
@@ -146,11 +146,8 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase):
     def _test_output(self):
         build_strategy = paddle.static.BuildStrategy()
         build_strategy.fuse_gemm_epilogue = True
-        program = paddle.static.CompiledProgram(self.main_prog)
-        program = program.with_data_parallel(
-            loss_name=self.loss.name,
-            build_strategy=build_strategy,
-            places=paddle.static.cuda_places(),
+        program = paddle.static.CompiledProgram(
+            self.main_prog, build_strategy=build_strategy
         )
 
         result = self.exe.run(
@@ -332,11 +329,8 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase):
     def _test_output(self):
         build_strategy = paddle.static.BuildStrategy()
         build_strategy.fuse_gemm_epilogue = True
-        program = paddle.static.CompiledProgram(self.main_prog)
-        program = program.with_data_parallel(
-            loss_name=self.loss.name,
-            build_strategy=build_strategy,
-            places=paddle.static.cuda_places(),
+        program = paddle.static.CompiledProgram(
+            self.main_prog, build_strategy=build_strategy
         )
 
         outs_res = self.exe.run(program, feed=self.feed, fetch_list=self.fetch)
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index fd8523bfc15..779d93d0560 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -238,9 +238,7 @@ class TestInstance(unittest.TestCase):
 
         # will print warning message
 
-        cp_prog = CompiledProgram(program).with_data_parallel(
-            loss_name=avg_cost.name
-        )
+        cp_prog = CompiledProgram(program)
 
         save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, cp_prog)
         self.assertRaises(
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
index 9f448e7f07a..bae5e5a9d13 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
@@ -97,9 +97,7 @@ class TestInplaceAddto(unittest.TestCase):
 
             strategy = fluid.BuildStrategy()
             strategy.enable_addto = enable_addto
-            compiled = fluid.CompiledProgram(main).with_data_parallel(
-                loss_name=loss.name, build_strategy=strategy
-            )
+            compiled = fluid.CompiledProgram(main, build_strategy=strategy)
 
             exe.run(startup)
             img = np.random.uniform(-128, 128, [8, 3, 224, 224]).astype(
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
index b614709ec99..e0c499c26ed 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
@@ -64,9 +64,7 @@ class TestSoftmaxWithXe(unittest.TestCase):
                 build_strategy = fluid.BuildStrategy()
                 build_strategy.enable_inplace = inplace
                 prog = fluid.CompiledProgram(
-                    fluid.default_main_program()
-                ).with_data_parallel(
-                    build_strategy=build_strategy, places=place
+                    fluid.default_main_program(), build_strategy=build_strategy
                 )
 
                 fetch_list = [z_d.name, s_d.name]
diff --git a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
index 32ce652ace4..3c270024420 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
@@ -41,8 +41,8 @@ class TestMemoryReuseExcludeFeedVar(unittest.TestCase):
         exe.run(fluid.default_startup_program())
 
         compiled_prog = fluid.CompiledProgram(
-            fluid.default_main_program()
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            fluid.default_main_program(), build_strategy=build_strategy
+        )
 
         image_tensor = fluid.LoDTensor()
         np_image = np.random.uniform(
diff --git a/python/paddle/fluid/tests/unittests/test_reader_reset.py b/python/paddle/fluid/tests/unittests/test_reader_reset.py
index 9524068316b..aab9993cec4 100644
--- a/python/paddle/fluid/tests/unittests/test_reader_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_reader_reset.py
@@ -68,9 +68,7 @@ class TestReaderReset(unittest.TestCase):
             paddle.batch(self.prepare_data(), batch_size=self.batch_size)
         )
 
-        train_cp = compiler.CompiledProgram(main_prog).with_data_parallel(
-            places=[place]
-        )
+        train_cp = compiler.CompiledProgram(main_prog)
 
         batch_id = 0
         pass_count = 0
diff --git a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
index d580636ce50..fec2375f764 100644
--- a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
+++ b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
@@ -90,9 +90,7 @@ class TestResnet50Accuracy(unittest.TestCase):
         loss = self.build_program(main_program, startup_program)
         exe = paddle.static.Executor(place)
 
-        compiled_prog = paddle.static.CompiledProgram(
-            main_program
-        ).with_data_parallel(loss_name=loss.name)
+        compiled_prog = paddle.static.CompiledProgram(main_program)
         loss_vals = []
         scope = paddle.static.Scope()
 
diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py
index a55346bc05a..36874c420bb 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py
@@ -126,11 +126,7 @@ class TestWeightDecay(unittest.TestCase):
         build_strategy.memory_optimize = use_ir_memory_optimize
 
         train_cp = compiler.CompiledProgram(
-            fluid.default_main_program()
-        ).with_data_parallel(
-            loss_name=loss.name,
-            exec_strategy=exec_strategy,
-            build_strategy=build_strategy,
+            fluid.default_main_program(), build_strategy=build_strategy
         )
 
         loss_set = []
diff --git a/python/paddle/static/quantization/tests/test_graph.py b/python/paddle/static/quantization/tests/test_graph.py
index 64ec55a4e3e..0704990df62 100644
--- a/python/paddle/static/quantization/tests/test_graph.py
+++ b/python/paddle/static/quantization/tests/test_graph.py
@@ -76,11 +76,11 @@ class TestGraph(unittest.TestCase):
         build_strategy.memory_optimize = False
         build_strategy.enable_inplace = False
         origin_binary = paddle.static.CompiledProgram(
-            graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            graph.graph, build_strategy=build_strategy
+        )
         backup_binary = paddle.static.CompiledProgram(
-            backup_graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            backup_graph.graph, build_strategy=build_strategy
+        )
         place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
         exe = paddle.static.Executor(place)
         exe.run(startup)
diff --git a/python/paddle/static/quantization/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/static/quantization/tests/test_moving_average_abs_max_scale_op.py
index 8b95cae34c1..c822601994a 100644
--- a/python/paddle/static/quantization/tests/test_moving_average_abs_max_scale_op.py
+++ b/python/paddle/static/quantization/tests/test_moving_average_abs_max_scale_op.py
@@ -70,9 +70,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
         exe = paddle.static.Executor(place)
         exe.run(startup_program)
 
-        binary = paddle.static.CompiledProgram(main_program).with_data_parallel(
-            loss_name=loss.name
-        )
+        binary = paddle.static.CompiledProgram(main_program)
 
         img, label = init_data()
         feed_dict = {"image": img, "label": label}
diff --git a/python/paddle/static/quantization/tests/test_quantization_mkldnn_pass.py b/python/paddle/static/quantization/tests/test_quantization_mkldnn_pass.py
index fa33fb1a87f..cf5ad87bb1c 100644
--- a/python/paddle/static/quantization/tests/test_quantization_mkldnn_pass.py
+++ b/python/paddle/static/quantization/tests/test_quantization_mkldnn_pass.py
@@ -143,8 +143,8 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase):
         build_strategy.memory_optimize = False
         build_strategy.enable_inplace = False
         binary = paddle.static.CompiledProgram(
-            main_graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            main_graph.graph, build_strategy=build_strategy
+        )
         quantized_test_program = test_graph.to_program()
         iters = 5
         batch_size = 8
diff --git a/python/paddle/static/quantization/tests/test_quantization_pass.py b/python/paddle/static/quantization/tests/test_quantization_pass.py
index 1bad3ea9e93..a1d23fd249f 100644
--- a/python/paddle/static/quantization/tests/test_quantization_pass.py
+++ b/python/paddle/static/quantization/tests/test_quantization_pass.py
@@ -373,8 +373,8 @@ class TestQuantizationFreezePass(unittest.TestCase):
         build_strategy.enable_inplace = False
         build_strategy.fuse_all_reduce_ops = False
         binary = paddle.static.CompiledProgram(
-            main_graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            main_graph.graph, build_strategy=build_strategy
+        )
         quantized_test_program = test_graph.to_program()
         iters = 5
         batch_size = 8
diff --git a/python/paddle/static/quantization/tests/test_quantization_scale_pass.py b/python/paddle/static/quantization/tests/test_quantization_scale_pass.py
index 6e9a1bf11f3..644d853da3d 100644
--- a/python/paddle/static/quantization/tests/test_quantization_scale_pass.py
+++ b/python/paddle/static/quantization/tests/test_quantization_scale_pass.py
@@ -143,8 +143,8 @@ class TestQuantizationScalePass(unittest.TestCase):
         build_strategy.enable_inplace = False
         build_strategy.fuse_all_reduce_ops = False
         binary = paddle.static.CompiledProgram(
-            main_graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            main_graph.graph, build_strategy=build_strategy
+        )
         iters = 5
         batch_size = 8
 
diff --git a/python/paddle/static/quantization/tests/test_user_defined_quantization.py b/python/paddle/static/quantization/tests/test_user_defined_quantization.py
index b5856854b44..80d4e6e564c 100644
--- a/python/paddle/static/quantization/tests/test_user_defined_quantization.py
+++ b/python/paddle/static/quantization/tests/test_user_defined_quantization.py
@@ -191,8 +191,8 @@ class TestUserDefinedQuantization(unittest.TestCase):
         build_strategy.enable_inplace = False
         build_strategy.fuse_all_reduce_ops = False
         binary = paddle.static.CompiledProgram(
-            main_graph.graph
-        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
+            main_graph.graph, build_strategy=build_strategy
+        )
         iters = 5
         batch_size = 8
 
-- 
GitLab