diff --git a/paddle/fluid/distributed/auto_parallel/dist_attr.h b/paddle/fluid/distributed/auto_parallel/dist_attr.h
index 637c2b0559487b0dca2bf59fc46bd8e8b452bced..b38a21f336a0873d17465b383220624b4c7370be 100644
--- a/paddle/fluid/distributed/auto_parallel/dist_attr.h
+++ b/paddle/fluid/distributed/auto_parallel/dist_attr.h
@@ -288,8 +288,8 @@ class OperatorDistAttr {
   std::string impl_type_ = kDefault;
   int64_t impl_idx_ = 0;
   bool is_recompute_ = false;
-  std::string execution_stream_;
-  int64_t scheduling_priority_;  // lower value, higher priority, default to 0
+  std::string execution_stream_ = kDefault;
+  int64_t scheduling_priority_ = 0;  // lower value, higher priority
   std::map<std::string, bool> annotated_;
 };
 
diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py
index 445cb50c7f6a4f36efee10710037bc650daac310..d8febaf9d51c058bfaed77ff030635b1cbc01449 100644
--- a/python/paddle/distributed/auto_parallel/engine.py
+++ b/python/paddle/distributed/auto_parallel/engine.py
@@ -228,6 +228,8 @@ class Engine:
 
         self.history = None
 
+        paddle.framework.set_flags({'FLAGS_new_executor_sequential_run': 1})
+
     def _prepare_data_spec(self, data, split, batch_size):
         inputs_spec = []
         labels_spec = []
diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py
index 32874f0769150920c873945fd4b803d2522cc254..9eb2f806eabc39362db684b2219ed4339771b4fa 100644
--- a/python/paddle/distributed/auto_parallel/utils.py
+++ b/python/paddle/distributed/auto_parallel/utils.py
@@ -2337,13 +2337,3 @@ def is_dep_skip_op(op):
         return True
 
     return False
-
-
-def use_standalone_executor():
-    return os.environ.get('FLAGS_CONVERT_GRAPH_TO_PROGRAM', None) in [
-        1,
-        '1',
-        True,
-        'True',
-        'true',
-    ]
diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
index 2cfa113a7aef34d93a60a4a55d96ef627754dc2b..8be8c67bca66436617e7c4928ad87ed813d199a9 100644
--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -27,10 +27,10 @@ from paddle.distributed.auto_parallel.utils import (
     is_loss_grad_op,
     is_optimize_op,
     ring_id_to_process_group,
-    use_standalone_executor,
 )
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
 from paddle.fluid import unique_name
+from paddle.fluid.executor import _is_enable_standalone_executor
 from paddle.fluid.framework import default_main_program
 
 from .pass_base import PassBase, PassType, register_pass
@@ -92,7 +92,7 @@ class DataParallelOptimizationPass(PassBase):
         self.global_rank = int(self.get_attr("global_rank"))
         self.use_sharding = self.get_attr("use_sharding")
         self.coalesce_prefix = 'coalesce_grad'
-        if use_standalone_executor():
+        if _is_enable_standalone_executor():
             self.gradient_sync_stream = "gradient_sync_stream"
 
         with paddle.static.program_guard(main_program, startup_program):
@@ -313,7 +313,7 @@ class DataParallelOptimizationPass(PassBase):
 
     def _calc_wait_comms(self):
 
-        if use_standalone_executor():
+        if _is_enable_standalone_executor():
             return
 
         block = default_main_program().global_block()
@@ -546,7 +546,7 @@ class DataParallelOptimizationPass(PassBase):
         # multiple stream executor(standalone exe). This function just for standalone exe. Refactor here
         # in future when only one executor stay.
 
-        if not use_standalone_executor() or len(grad_groups) == 0:
+        if not _is_enable_standalone_executor() or len(grad_groups) == 0:
             return
         block = default_main_program().global_block()
 
diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
index 8503ea94b986e071ebc5e8635b1c7ec8f6be5f43..0ff3fbcf951d0edb9bcc68e1652724376b955772 100644
--- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py
+++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
@@ -18,6 +18,7 @@ import numpy as np
 
 import paddle
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+from paddle.fluid.executor import _is_enable_standalone_executor
 
 from ..auto_parallel.dist_attribute import OperatorDistAttr, TensorDistAttr
 from ..auto_parallel.operators.common import SyncMode
@@ -29,7 +30,6 @@ from ..auto_parallel.utils import (
     insert_dependencies_for_vars,
     is_gradient_clip_op,
     is_optimize_op,
-    use_standalone_executor,
 )
 from .pass_base import PassBase, register_pass
 
@@ -378,7 +378,7 @@ class ClipGradByGloblNormPass(PassBase):
                     self.clip_helper._init_dist_attr(allreduce_op)
 
                     if (
-                        use_standalone_executor
+                        _is_enable_standalone_executor()
                         and insert_leaf_fill_constant_node
                     ):
 
diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py
index 5f68131d0428d2aa98ad759b08bb913776f147da..bb3ebbeaf8f7f06b90b32da3626071c74ae5d5ac 100644
--- a/python/paddle/distributed/passes/auto_parallel_sharding.py
+++ b/python/paddle/distributed/passes/auto_parallel_sharding.py
@@ -33,10 +33,10 @@ from paddle.distributed.auto_parallel.utils import (
     is_optimize_op,
     naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
     set_var_dist_attr,
-    use_standalone_executor,
 )
 from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
 from paddle.fluid import unique_name
+from paddle.fluid.executor import _is_enable_standalone_executor
 from paddle.fluid.framework import default_main_program, default_startup_program
 from paddle.framework import core
 
@@ -1170,7 +1170,7 @@ class ShardingPass(PassBase):
         P.S. this overlap pass is ONLY adapted for standalone executor (graph based) and stream awared allocator.
         """
 
-        if not use_standalone_executor() or (not self.enable_overlap):
+        if not _is_enable_standalone_executor() or (not self.enable_overlap):
             return
 
         self.grad_comm_group_stream_pairs = []
diff --git a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
index 0650c4c577ebcbb5a5db559adf070637c92e18c1..07de0c1dcbfeb1209834c7b58ef9e8339b9438d2 100644
--- a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
+++ b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
@@ -20,8 +20,8 @@ from paddle.distributed.auto_parallel.operators.common import (
 from paddle.distributed.auto_parallel.utils import (
     OpRole,
     insert_dependencies_for_vars,
-    use_standalone_executor,
 )
+from paddle.fluid.executor import _is_enable_standalone_executor
 
 from .auto_parallel_sharding import ShardingPass, _supported_optimizer_type
 from .pass_base import PassBase, register_pass
@@ -70,7 +70,9 @@ class AutoParalSupplementDepPass(PassBase):
     def _apply_single_impl(self, main_program, startup_program, context):
 
         # TODO general this pass for all case.
-        if not use_standalone_executor or not _sharding_pass_applied(context):
+        if not _is_enable_standalone_executor or not _sharding_pass_applied(
+            context
+        ):
             return
 
         self._dist_context = self.get_attr("dist_context", None)
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index b2ce9156691f08adf093c5002676540113d307ca..d5db9a7f72c0de77c7f8ee6a3467bf6f721bcf01 100755
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -871,19 +871,6 @@ class _ExecutorCache:
             use_fetch_v2=True,
         )
 
-        if (
-            os.environ.get('FLAGS_CONVERT_GRAPH_TO_PROGRAM', None)
-            in [1, '1', True, 'True', 'true']
-            and not program._is_start_up_program_
-        ):
-            if program.num_blocks > 1:
-                # If there are multiple blocks in the program, subblock will not be executed with the new executor in temporary
-                logging.warning("There are more than 1 block in program.")
-            elif program.num_blocks == 1:
-                logging.warning("There are 1 block in program.")
-            else:
-                logging.warning("There are no block in program.")
-
         # standalone executor will apply buffer_shared_inplace_pass and
         # inplace_addto_op_pass to program according to build_strategy
         enable_inplace = (
@@ -1711,10 +1698,6 @@ class Executor:
             if core.is_compiled_with_mlu():
                 return False
 
-            use_standalone_executor_for_distribution = os.environ.get(
-                'FLAGS_CONVERT_GRAPH_TO_PROGRAM', None
-            ) in [1, '1', True, 'True', 'true']
-
             compiled = isinstance(
                 program, compiler.CompiledProgram
             ) or isinstance(program._graph, compiler.CompiledProgram)
@@ -1784,20 +1767,8 @@ class Executor:
                         UserWarning,
                     )
                     return False
-
-            # delete this code after supporting fleet
-            from paddle.distributed.fleet import fleet
-
-            if fleet._role_maker is not None:
-                warnings.warn(
-                    "Standalone executor is not used for fleet", UserWarning
-                )
-                return use_standalone_executor_for_distribution
-
             return True
 
-        # NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `,
-        # use StandaloneExecutor to run the program.
         if (
             return_merged
             and self._enable_interpreter_core
diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
index 1b2e8b6f8689b1e1d17665e678853173b2866cb9..b939ee43883a9eb5952384d30c8382cd5acbf5fe 100644
--- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
@@ -7,8 +7,6 @@ if(WITH_GPU OR APPLE)
 
   # Compiling shared library will cost some time, but running process is very fast.
   set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
-  set_tests_properties(test_custom_relu_op_setup
-                       PROPERTIES ENVIRONMENT FLAGS_CONVERT_GRAPH_TO_PROGRAM=1)
   set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
   set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
   set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180)
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index b9904cb6f296e82cc1c145f495b7a16a96e7b149..173c1b1b5db4f001a95736fb8dfdf963d5e3bc34 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1251,11 +1251,6 @@ py_test_modules(
   test_eager_deletion_padding_rnn_for_interpretercore MODULES
   test_eager_deletion_padding_rnn ENVS FLAGS_CONVERT_GRAPH_TO_PROGRAM=true)
 
-set_tests_properties(
-  test_buffer_shared_memory_reuse_pass
-  test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
-  PROPERTIES ENVIRONMENT FLAGS_CONVERT_GRAPH_TO_PROGRAM=true)
-
 # ExecutionStrategy is deprecated in standalone executor
 set_tests_properties(test_parallel_executor_dry_run
                      PROPERTIES ENVIRONMENT "FLAGS_USE_STANDALONE_EXECUTOR=0")
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_sharding_with_newexe.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_sharding_with_newexe.py
index 91ffae423c314ebd07479936604a1a52979d4cad..f71e024f36dc9e1ebc942b3dce7f4ec366c743f4 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_sharding_with_newexe.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_sharding_with_newexe.py
@@ -18,7 +18,6 @@ import sys
 import tempfile
 import unittest
 
-os.environ["FLAGS_CONVERT_GRAPH_TO_PROGRAM"] = str(1)
 os.environ["FLAGS_add_dependency_for_communication_op"] = 'false'
 
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
index f5eb234945c757e2577590cd5b75354436dc18b8..58091bd847f2c960a58b32da9ffb40b7755d1608 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
@@ -31,7 +31,6 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36001,127.0.0.1:36002",
             "http_proxy": "",
             "https_proxy": "",
-            "FLAGS_CONVERT_GRAPH_TO_PROGRAM": "1",
         }
 
         node_b = {
@@ -41,7 +40,6 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36001,127.0.0.1:36002",
             "http_proxy": "",
             "https_proxy": "",
-            "FLAGS_CONVERT_GRAPH_TO_PROGRAM": "1",
         }
 
         def node_func():
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
index 5a6486991dc9da4818d15420349510058bed9981..d2b5b9fe62b5934000e82cd39ca98beab491917c 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import random
 import sys
 import unittest
@@ -123,7 +122,6 @@ class TestDataParallelPassWithScale2(TestDataParallelPassWithScale1):
 class TestDataParallelPassWithStandaloneEXE(TestDataParallelPassWithScale1):
     def init(self):
         if paddle.is_compiled_with_cuda():
-            os.environ['FLAGS_CONVERT_GRAPH_TO_PROGRAM'] = "1"
             paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
         self.rtol = 1e-5
         self.atol = 1e-8
diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py
index 03eb219a0b5d0f2d19d6c87da4a17539c7a86c6f..b93b494a3515ee33a3163801123eb015565fb07c 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -1687,7 +1687,6 @@ class TestDistBase(unittest.TestCase):
             "http_proxy": "",
             "NCCL_P2P_DISABLE": "1",
             "NCCL_SHM_DISABLE": "1",
-            "FLAGS_CONVERT_GRAPH_TO_PROGRAM": "1",
         }
 
         if check_error_log: