diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
index b6dea66f7bfcb7bf108a709bd5ad40087c44a121..b3a925070b320a785429570743f2e99de51fe3b2 100644
--- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
@@ -473,9 +473,7 @@ class OptimizationTuner:
 
         parent_env = copy.copy(os.environ.copy())
         # env flags need for profile
-        new_env = {
-            "FLAGS_USE_STANDALONE_EXECUTOR": "False",
-        }
+        new_env = {}
         new_env.update(parent_env)
 
         # TODO if any rank hang or fail, kill all processes
diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
index f943e58748d07042d2e69a3c9856f846bb007769..5d519bcc94e06b544284d899c9253063018215e0 100644
--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -34,7 +34,6 @@ from paddle.distributed.auto_parallel.utils import (
     ring_id_to_process_group,
 )
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-from paddle.fluid.executor import _is_enable_standalone_executor
 from paddle.static import default_main_program
 from paddle.utils import unique_name
 
@@ -97,8 +96,7 @@ class DataParallelOptimizationPass(PassBase):
         self.global_rank = int(self.get_attr("global_rank"))
         self.use_sharding = self.get_attr("use_sharding")
         self.coalesce_prefix = 'coalesce_grad'
-        if _is_enable_standalone_executor():
-            self.gradient_sync_stream = "gradient_sync_stream"
+        self.gradient_sync_stream = "gradient_sync_stream"
 
         with paddle.static.program_guard(main_program, startup_program):
             self._analyze_program()
@@ -316,8 +314,7 @@ class DataParallelOptimizationPass(PassBase):
 
     def _calc_wait_comms(self):
 
-        if _is_enable_standalone_executor():
-            return
+        return
 
         block = default_main_program().global_block()
 
@@ -602,7 +599,7 @@ class DataParallelOptimizationPass(PassBase):
         # multiple stream executor(standalone exe). This function just for standalone exe. Refactor here
         # in future when only one executor stay.
 
-        if not _is_enable_standalone_executor() or len(grad_groups) == 0:
+        if len(grad_groups) == 0:
             return
         block = default_main_program().global_block()
 
diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
index 525420422af681258b5a632e5c14f3b435cc0468..20e602c9a20446c7c683f9a3efd894186aa91f3d 100644
--- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py
+++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
@@ -18,7 +18,6 @@ import numpy as np
 
 import paddle
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-from paddle.fluid.executor import _is_enable_standalone_executor
 
 from ..auto_parallel.dist_attribute import OperatorDistAttr, TensorDistAttr
 from ..auto_parallel.operators.common import (
@@ -460,10 +459,7 @@ class ClipGradByGloblNormPass(PassBase):
                     )
                     self.clip_helper._init_dist_attr(allreduce_op)
 
-                    if (
-                        _is_enable_standalone_executor()
-                        and insert_leaf_fill_constant_node
-                    ):
+                    if insert_leaf_fill_constant_node:
 
                         # NOTE add naive deps for global norm sync in graph exe
                         j = idx - 1
diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py
index d9c82746ae9fbb3717767f2b9f570e26480793f8..acdbcf18d33b8229e1c5e2e7681a2cc630b81e4b 100644
--- a/python/paddle/distributed/passes/auto_parallel_sharding.py
+++ b/python/paddle/distributed/passes/auto_parallel_sharding.py
@@ -35,7 +35,6 @@ from paddle.distributed.auto_parallel.utils import (
     set_var_dist_attr,
 )
 from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
-from paddle.fluid.executor import _is_enable_standalone_executor
 from paddle.framework import core
 from paddle.static import default_main_program, default_startup_program
 from paddle.utils import unique_name
@@ -1168,7 +1167,7 @@ class ShardingPass(PassBase):
         P.S. this overlap pass is ONLY adapted for standalone executor (graph based) and stream awared allocator.
         """
 
-        if not _is_enable_standalone_executor() or (not self.enable_overlap):
+        if not self.enable_overlap:
             return
 
         self.grad_comm_group_stream_pairs = []
diff --git a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
index 224629af72794347f99caaec6185f250e8d873b1..c164b6e8ddbc478216e725a602d1c39f4764d758 100644
--- a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
+++ b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
@@ -21,7 +21,6 @@ from paddle.distributed.auto_parallel.utils import (
     OpRole,
     insert_dependencies_for_vars,
 )
-from paddle.fluid.executor import _is_enable_standalone_executor
 
 from .auto_parallel_sharding import ShardingPass, _supported_optimizer_type
 from .pass_base import PassBase, register_pass
@@ -70,9 +69,7 @@ class AutoParalSupplementDepPass(PassBase):
     def _apply_single_impl(self, main_program, startup_program, context):
 
         # TODO general this pass for all case.
-        if not _is_enable_standalone_executor or not _sharding_pass_applied(
-            context
-        ):
+        if not _sharding_pass_applied(context):
             return
 
         self._dist_context = self.get_attr("dist_context", None)
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index d205a122db654b67c43a2af5e22198fd8b5954a4..2b8b4fcd380e05c806d5bd73527e20dda825415c 100755
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -493,14 +493,6 @@ def _to_name_str(var):
         return _to_str(var)
 
 
-def _is_enable_standalone_executor():
-    return (
-        framework._enable_standalone_executor_ is None
-        or framework._enable_standalone_executor_
-        in [1, '1', True, 'True', 'true']
-    )
-
-
 def _is_dy2st_enable_standalone_executor():
     return framework._dy2st_enable_standalone_executor_ in [
         1,
@@ -1004,8 +996,6 @@ class Executor:
             "__auto_checkpoint_executor__"
         )
 
-        # NOTE: Whether to use experimental executor `StandaloneExecutor`.
-        self._enable_interpreter_core = _is_enable_standalone_executor()
         self._executor_cache = _ExecutorCache()
 
         self._fleet_executor = None
@@ -1605,9 +1595,7 @@ class Executor:
 
             return True
 
-        if self._enable_interpreter_core and _can_use_interpreter_core(
-            program, self.place
-        ):
+        if _can_use_interpreter_core(program, self.place):
 
             if feed is None:
                 feed = {}
@@ -1685,132 +1673,12 @@ class Executor:
 
         acp._auto_checkpoint(self, program)
 
-        # For backward compatibility, run directly.
-        if not compiled:
-
-            return self._run_program(
-                program,
-                feed=feed,
-                fetch_list=fetch_list,
-                feed_var_name=feed_var_name,
-                fetch_var_name=fetch_var_name,
-                scope=scope,
-                return_numpy=return_numpy,
-                use_program_cache=use_program_cache,
-            )
-
         program._compile(scope, self.place)
         assert (
             program._is_inference
         ), f"Program must have _is_inference = True, but get {program._is_inference}"
         return self._run_inference(program._executor, feed)
 
-    def _run_program(
-        self,
-        program,
-        feed,
-        fetch_list,
-        feed_var_name,
-        fetch_var_name,
-        scope,
-        return_numpy,
-        use_program_cache,
-    ):
-        from paddle.optimizer.lr import LRScheduler
-
-        if feed is None:
-            feed = {}
-        elif isinstance(feed, (list, tuple)):
-            assert len(feed) == 1, "Not compiled with data parallel"
-            feed = feed[0]
-
-        if not isinstance(feed, dict):
-            raise TypeError(
-                "feed requires dict as its Parameter. But you passed in %s"
-                % (type(feed))
-            )
-
-        assert program is not None, "The program should not be Empty"
-        if not isinstance(program, Program):
-            raise TypeError(
-                "Executor requires Program as its Parameter. But you passed in %s"
-                % (type(program))
-            )
-
-        if not isinstance(fetch_var_name, str):
-            raise TypeError(
-                "The name of fetch variable requires string as its Parameter. But you passed in %s"
-                % (type(fetch_var_name))
-            )
-
-        if use_program_cache:
-            cache_key = _get_strong_program_cache_key(program, feed, fetch_list)
-            cached_program = self._get_program_cache(cache_key)
-            cached_ctx = self._get_ctx_cache(cache_key)
-            cached_scope = self._get_scope_cache(cache_key)
-            if cached_program is None:
-                cached_program = _add_feed_fetch_ops(
-                    program=program,
-                    feed=feed,
-                    fetch_list=fetch_list,
-                    feed_var_name=feed_var_name,
-                    fetch_var_name=fetch_var_name,
-                )
-                self._add_program_cache(cache_key, cached_program)
-                fetch_list_str = list(map(_to_name_str, fetch_list))
-                cached_ctx = self._default_executor.prepare(
-                    cached_program.desc, 0, fetch_list_str, False
-                )
-                # currently, we cache program, vars, sub_scope here
-                # we suppose that in a life cycle of training, a user
-                # will not create many programs. So, here the basic
-                # rule of caching is to cache all unseen (program, var, scope)
-                # when a user use use_program_cache.
-                cached_scope = scope.new_scope()
-                self._default_executor.create_variables(
-                    cached_program.desc, cached_scope, 0
-                )
-                self._add_ctx_cache(cache_key, cached_ctx)
-                self._add_scope_cache(cache_key, cached_scope)
-            program = cached_program
-            ctx = cached_ctx
-            scope = cached_scope
-        else:
-            program = _add_feed_fetch_ops(
-                program=program,
-                feed=feed,
-                fetch_list=fetch_list,
-                feed_var_name=feed_var_name,
-                fetch_var_name=fetch_var_name,
-            )
-
-        self._feed_data(program, feed, feed_var_name, scope)
-        if hasattr(program, 'lr_schedulerr'):
-            assert isinstance(
-                program.lr_scheduler, LRScheduler
-            ), "must be LRScheduler"
-            lr_scheduler = program.lr_scheduler
-            lr_value = lr_scheduler()
-            lr_var = program.global_block().vars[lr_scheduler._var_name]
-            data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
-            tensor = core.get_variable_tensor(scope, lr_scheduler._var_name)
-            tensor.set(data, self.place)
-
-        if not use_program_cache:
-            self._default_executor.run(
-                program.desc, scope, 0, True, True, [fetch_var_name]
-            )
-        else:
-            self._default_executor.run_prepared_ctx(
-                ctx, scope, False, False, False
-            )
-        arr = scope.find_var(fetch_var_name).get_fetch_list()
-        tensors = arr._move_to_list()
-        if return_numpy:
-            return as_numpy(tensors)
-        else:
-            return tensors
-
     def _run_inference(self, exe, feed):
         return exe.run(feed)
 
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 97fcb9ad1ad7349112cbb903e0c4bb5aa7e61fa9..98d0fbf0620b7d96f7c9f4c5270d9d5bd31061c5 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -116,9 +116,7 @@ _already_patch_eager_tensor = False
 _already_patch_varbase = False
 _current_cuda_graph_mode = None
 _global_flags_ = core.globals()
-_enable_standalone_executor_ = os.environ.get(
-    'FLAGS_USE_STANDALONE_EXECUTOR', None
-)
+
 _dy2st_enable_standalone_executor_ = os.environ.get(
     'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1
 )
@@ -270,17 +268,6 @@ ipu_index_attr_name = 'ipu_index'
 ipu_stage_attr_name = 'ipu_stage'
 
 
-@signature_safe_contextmanager
-def _enable_standalone_executor(enable=True):
-    global _enable_standalone_executor_
-    original_ = _enable_standalone_executor_
-    _enable_standalone_executor_ = enable
-    try:
-        yield
-    finally:
-        _enable_standalone_executor_ = original_
-
-
 @signature_safe_contextmanager
 def ipu_shard_guard(index=-1, stage=-1):
     """
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 1eb9e24eef8fbc07b39df1d6faa93690f27a11fd..d7f856fde675e8c15125ed5c25fe6c3003a63ded 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1150,25 +1150,6 @@ if(WITH_GLOO)
                        PROPERTIES TIMEOUT 120)
 endif()
 
-if($ENV{USE_STANDALONE_EXECUTOR})
-  # these test will fail in some server due to PR#42149, temporarily set it use old executor.
-  set_tests_properties(test_apply_pass_to_program
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(test_buffer_shared_memory_reuse_pass
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(
-    test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
-    PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(test_imperative_optimizer
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(test_imperative_star_gan_with_gradient_penalty
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(test_switch_autotune
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-  set_tests_properties(test_imperative_mnist_sorted_gradient
-                       PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0)
-endif()
-
 set(TEST_CINN_OPS
     test_softmax_op
     test_expand_v2_op
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
index 96ed9ecd69e71f8fc1d512ada0a949841168fa24..d8205fdf4528d327760e528ccc0fd229cd730b5d 100755
--- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -201,9 +201,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
   set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60)
   set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60)
   set_tests_properties(test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30)
-  set_tests_properties(
-    test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60 ENVIRONMENT
-                                           FLAGS_USE_STANDALONE_EXECUTOR=0)
+  set_tests_properties(test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60)
 
   if(WITH_MKLDNN
      AND TENSORRT_FOUND
diff --git a/python/paddle/fluid/tests/unittests/test_eager_run_program.py b/python/paddle/fluid/tests/unittests/test_eager_run_program.py
index fe038bc71044689b0268490c7b1707a90ee1e058..0dd6b320362eb64b8e323ddf58a34a45dfcea508 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_run_program.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_run_program.py
@@ -20,10 +20,6 @@ import paddle
 from paddle import _legacy_C_ops
 from paddle.fluid import core
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.executor import (
-    _is_dy2st_enable_standalone_executor,
-    _is_enable_standalone_executor,
-)
 from paddle.fluid.framework import Variable
 
 
@@ -140,10 +136,7 @@ class TestRunProgram(unittest.TestCase):
             [out.name + '@GRAD'],
         ]
 
-        use_interpretorcore = (
-            _is_enable_standalone_executor()
-            and _is_dy2st_enable_standalone_executor()
-        )
+        use_interpretorcore = True
         attrs.extend(('use_interpretorcore', use_interpretorcore))
         if use_interpretorcore:
             attrs.extend(
diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py
index 1561f25359d6337f6f4df08bd96d084ef1349824..c54ab122a95b54a9db97bd0a853f299ebde81a6c 100644
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -21,10 +21,6 @@ import paddle
 from paddle import _legacy_C_ops, fluid
 from paddle.fluid import core, framework
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.executor import (
-    _is_dy2st_enable_standalone_executor,
-    _is_enable_standalone_executor,
-)
 from paddle.fluid.framework import global_var
 
 paddle.enable_static()
@@ -240,10 +236,7 @@ class RunProgramOpTest(unittest.TestCase):
                 self.program_desc, self.fwd_op_num, len(outputs['Out'])
             )
 
-            use_interpretorcore = (
-                _is_enable_standalone_executor()
-                and _is_dy2st_enable_standalone_executor()
-            )
+            use_interpretorcore = True
             self.attrs.extend(('use_interpretorcore', use_interpretorcore))
             if use_interpretorcore:
                 self.attrs.extend(
@@ -292,10 +285,7 @@ class RunProgramOpTest(unittest.TestCase):
                 self.program_desc, self.fwd_op_num, len(outputs['Out'])
             )
 
-            use_interpretorcore = (
-                _is_enable_standalone_executor()
-                and _is_dy2st_enable_standalone_executor()
-            )
+            use_interpretorcore = True
             self.attrs.extend(('use_interpretorcore', use_interpretorcore))
             if use_interpretorcore:
                 self.attrs.extend(
diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py
index edc0a0cc158e92d531898f173ddeadff0977b85f..4eba9a31dbc2b9f4f20d9b9441850b0219c68b35 100644
--- a/python/paddle/jit/translated_layer.py
+++ b/python/paddle/jit/translated_layer.py
@@ -21,10 +21,6 @@ import paddle
 from paddle import _legacy_C_ops
 from paddle.fluid import backward, core, framework, unique_name
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.executor import (
-    _is_dy2st_enable_standalone_executor,
-    _is_enable_standalone_executor,
-)
 from paddle.fluid.framework import OpProtoHolder, _non_static_mode
 from paddle.jit.dy2static.partial_program import (
     LazyInitialized,
@@ -976,10 +972,7 @@ def _run_dygraph(instance, input, program_holder):
             )
         )
 
-    use_interpretorcore = (
-        _is_enable_standalone_executor()
-        and _is_dy2st_enable_standalone_executor()
-    )
+    use_interpretorcore = True
     attrs.extend(('use_interpretorcore', use_interpretorcore))
     if use_interpretorcore:
         attrs.extend(
diff --git a/test/custom_op/CMakeLists.txt b/test/custom_op/CMakeLists.txt
index d7f32625db4abb0649329a21684da86f761ecaf9..fbdc8f9cc653e4e4e9b3f38bf1ec448fadf13e92 100644
--- a/test/custom_op/CMakeLists.txt
+++ b/test/custom_op/CMakeLists.txt
@@ -11,15 +11,6 @@ if(WITH_TESTING)
     set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
     set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
     set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180)
-    if($ENV{USE_STANDALONE_EXECUTOR})
-      # these test will fail in some server due to PR#42149, temporarily set it use old executor.
-      set_tests_properties(
-        test_custom_relu_op_setup PROPERTIES ENVIRONMENT
-                                             FLAGS_USE_STANDALONE_EXECUTOR=0)
-      set_tests_properties(
-        test_custom_relu_model PROPERTIES ENVIRONMENT
-                                          FLAGS_USE_STANDALONE_EXECUTOR=0)
-    endif()
   endif()
 
   if(WITH_GPU AND WITH_DISTRIBUTE)
diff --git a/test/standalone_executor/test_standalone_controlflow.py b/test/standalone_executor/test_standalone_controlflow.py
index 069240823d8fd5a0da514c1a375149e5f0ce99e5..fcdf7f81587087a6101390fbc0b48593bf2f3c32 100644
--- a/test/standalone_executor/test_standalone_controlflow.py
+++ b/test/standalone_executor/test_standalone_controlflow.py
@@ -17,7 +17,7 @@ import unittest
 import numpy as np
 
 import paddle
-from paddle.fluid import core, framework
+from paddle.fluid import core
 from paddle.fluid.framework import Program, program_guard
 
 paddle.enable_static()
@@ -25,7 +25,7 @@ paddle.enable_static()
 
 #  test the compatibility of new executor: run old
 #  and new executor twice and check the result.
-#  please override the _get_feeds() and build_prgram()
+#  please override the _get_feeds() and build_prgram(), run_dygraph_once()
 class TestCompatibility(unittest.TestCase):
     def setUp(self):
         self.place = (
@@ -78,26 +78,53 @@ class TestCompatibility(unittest.TestCase):
             ret.append(exe.run(main_program, feed=feed, fetch_list=fetch_vars))
         return ret
 
-    def run_raw_executor(self, feed):
-        with framework._enable_standalone_executor(False):
-            out = self._run(feed)
+    def run_dygraph_once(self, feed):
+        x = paddle.tensor.fill_constant(shape=[1], dtype='float32', value=0.1)
+        y = paddle.tensor.fill_constant(shape=[1], dtype='float32', value=0.23)
+        if x < y:
+            out = [
+                paddle.tensor.fill_constant(
+                    shape=[1, 2], dtype='int32', value=1
+                ).numpy(),
+                paddle.tensor.fill_constant(
+                    shape=[2, 3], dtype='bool', value=True
+                ).numpy(),
+            ]
+        else:
+            out = [
+                paddle.tensor.fill_constant(
+                    shape=[3, 4], dtype='float32', value=3
+                ).numpy(),
+                paddle.tensor.fill_constant(
+                    shape=[4, 5], dtype='int64', value=2
+                ).numpy(),
+            ]
         return out
 
+    def run_dygraph(self, feed):
+        ret = []
+        for _ in range(self.iter_run):
+            ret.append(self.run_dygraph_once(feed))
+        return ret
+
     def run_new_executor(self, feed):
-        with framework._enable_standalone_executor(True):
-            out = self._run(feed)
+        out = self._run(feed)
         return out
 
     def test_with_feed(self):
         feed = self._get_feed()
+        paddle.enable_static()
         res = self.run_new_executor(feed)
-        gt = self.run_raw_executor(feed)
+        paddle.disable_static()
+
+        gt = self.run_dygraph(feed)
+
         for x, y in zip(gt, res):
             if isinstance(x, list):
                 for tx, ty in zip(x, y):
                     np.testing.assert_array_equal(tx, ty)
             elif isinstance(x, np.ndarray):
-                np.testing.assert_array_equal(tx, ty)
+                np.testing.assert_array_equal(x, y)
             else:
                 raise Exception("Not Implement!")
 
@@ -129,6 +156,12 @@ class TestWhile(TestCompatibility):
             exe = paddle.static.Executor(paddle.CPUPlace())
         return main_program, startup_program, i
 
+    def run_dygraph_once(self, feed):
+        i = 1
+        while i < 10:
+            i = i + 1
+        return [i]
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/standalone_executor/test_standalone_executor.py b/test/standalone_executor/test_standalone_executor.py
index c208044722f52a5c09a49dae269b9d8294df6a11..08b150c84c342352f8aa9661f603453a0af6b143 100644
--- a/test/standalone_executor/test_standalone_executor.py
+++ b/test/standalone_executor/test_standalone_executor.py
@@ -23,7 +23,7 @@ import unittest
 import numpy as np
 
 import paddle
-from paddle.fluid import core, framework
+from paddle.fluid import core
 from paddle.fluid.core import StandaloneExecutor
 from paddle.profiler import profiler
 
@@ -143,16 +143,15 @@ class ExecutorStatisticsTestCase(unittest.TestCase):
 
         scope = paddle.static.Scope()
         with paddle.static.scope_guard(scope):
-            with framework._enable_standalone_executor(enable):
-                exe = paddle.static.Executor(self.place)
-                helper_profiler = profiler.Profiler(
-                    targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2)
-                )
-                helper_profiler.start()
-                for i in range(self.iter_n):
-                    exe.run(main_program, fetch_list=fetch_list)
-                    helper_profiler.step()
-                helper_profiler.stop()
+            exe = paddle.static.Executor(self.place)
+            helper_profiler = profiler.Profiler(
+                targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2)
+            )
+            helper_profiler.start()
+            for i in range(self.iter_n):
+                exe.run(main_program, fetch_list=fetch_list)
+                helper_profiler.step()
+            helper_profiler.stop()
 
         self.assertTrue(os.path.exists(self.perf_path))
         with open(self.perf_path, 'r') as load_f:
@@ -183,15 +182,14 @@ class MultiStreamModelTestCase(unittest.TestCase):
         paddle.seed(2020)
         main_program, startup_program, fetch_list = build_program()
 
-        with framework._enable_standalone_executor(use_new_executor):
-            scope = core.Scope()
-            exe = paddle.static.Executor(self.place)
-            outs = []
-            for i in range(self.iter_n):
-                outs.append(
-                    exe.run(main_program, scope=scope, fetch_list=fetch_list)
-                )
-            print(outs)
+        scope = core.Scope()
+        exe = paddle.static.Executor(self.place)
+        outs = []
+        for i in range(self.iter_n):
+            outs.append(
+                exe.run(main_program, scope=scope, fetch_list=fetch_list)
+            )
+        print(outs)
         return outs
 
 
@@ -249,30 +247,46 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
 
         return outs
 
-    def run_raw_executor(self, feed, use_compiled=False):
-        with framework._enable_standalone_executor(False):
-            # run construct program 1
-            out1 = self._run(
-                feed, use_str=False, is_double=False, use_compiled=use_compiled
-            )
-            # run construct program 2 with same executor
-            out2 = self._run(
-                feed, use_str=True, is_double=True, use_compiled=use_compiled
-            )
+    def run_dygraph(self, feed):
+        def run_once(is_double):
+            paddle.seed(2020)
+            a = feed['a']
+            a = paddle.to_tensor(a, dtype='float32')
+            b = paddle.ones([2, 2]) * 2
+            t = paddle.nn.Linear(2, 2)(a)
+            c = t + b
+            if is_double:
+                c = c + c
+            return c.numpy()
 
-            return [out1, out2]
+        out1 = []
+        for i in range(self.iter_run):
+            out1.append(run_once(False))
+        out2 = []
+        for i in range(self.iter_run):
+            out2.append(run_once(True))
+        return [out1, out2]
 
     def run_new_executor(self, feed, use_compiled=False):
-        with framework._enable_standalone_executor():
-            out = self.run_raw_executor(feed, use_compiled=use_compiled)
-        return out
+        # run construct program 1
+        out1 = self._run(
+            feed, use_str=False, is_double=False, use_compiled=use_compiled
+        )
+        # run construct program 2 with same executor
+        out2 = self._run(
+            feed, use_str=True, is_double=True, use_compiled=use_compiled
+        )
+
+        return [out1, out2]
 
     def test_with_feed(self):
         data = np.ones([2, 2], dtype="float32")
         feed = {"a": data, 'fake_input': data}
 
-        res = self.run_new_executor(feed)
-        gt = self.run_raw_executor(feed)
+        with paddle.fluid.framework._static_guard():
+            res = self.run_new_executor(feed)
+        with paddle.fluid.dygraph.guard():
+            gt = self.run_dygraph(feed)
         for x, y in zip(gt, res):
             np.testing.assert_array_equal(x, y)
 
@@ -280,8 +294,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
         feed = [{'a': np.ones([2, 2], dtype="float32")}]
 
         with self.assertRaises(TypeError):
-            with framework._enable_standalone_executor():
-                self._run(feed[0], add_wrong_fetch=True)
+            self._run(feed[0], add_wrong_fetch=True)
 
     def test_empty_program(self):
         program = paddle.static.Program()
@@ -291,8 +304,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
 
         for i in range(10):
             print(i, flush=1)
-            with framework._enable_standalone_executor():
-                out = exe.run(program, feed=None)
+            out = exe.run(program, feed=None)
 
 
 class TestException(unittest.TestCase):
@@ -328,8 +340,7 @@ class TestException(unittest.TestCase):
         return out
 
     def run_new_executor(self, feed):
-        with framework._enable_standalone_executor():
-            out = self._run(feed)
+        out = self._run(feed)
         return out
 
     def test_exception(self):
@@ -399,13 +410,11 @@ class TestInplaceApiWithDataTransform(unittest.TestCase):
             with paddle.fluid.device_guard("cpu"):
                 x = paddle.increment(x)
             exe = paddle.static.Executor(paddle.CUDAPlace(0))
-            with framework._enable_standalone_executor():
-
-                for i in range(10):
-                    (a,) = exe.run(
-                        paddle.static.default_main_program(), fetch_list=[x]
-                    )
-                    self.assertEqual(a[0], 1)
+            for i in range(10):
+                (a,) = exe.run(
+                    paddle.static.default_main_program(), fetch_list=[x]
+                )
+                self.assertEqual(a[0], 1)
 
 
 if __name__ == "__main__":
diff --git a/test/standalone_executor/test_standalone_multiply_write.py b/test/standalone_executor/test_standalone_multiply_write.py
index d876606c57a51461919a3ac4c80a35a1ebb7c91b..96e35a336951c8da16bf72557c059ca6cda29443 100644
--- a/test/standalone_executor/test_standalone_multiply_write.py
+++ b/test/standalone_executor/test_standalone_multiply_write.py
@@ -39,6 +39,14 @@ class TestMultiplyWrite(TestCompatibility):
             paddle.assign(inp2, out)
         return main_program, startup_program, out
 
+    def run_dygraph_once(self, feed):
+        out = paddle.full((1,), 1)
+        inp1 = paddle.full((1,), 2)
+        inp2 = paddle.full((1,), 3)
+        paddle.assign(inp1, out)
+        paddle.assign(inp2, out)
+        return [out.numpy()]
+
     def setUp(self):
         self.place = paddle.CPUPlace()
         self.iter_run = 5