diff --git a/python/paddle/fluid/dygraph/amp/auto_cast.py b/python/paddle/fluid/dygraph/amp/auto_cast.py
index 9c62f8edba491815d8513ff8226e0f094e76395e..fd5131ce070f337cfeb1c4362f9ad60f3182fa93 100644
--- a/python/paddle/fluid/dygraph/amp/auto_cast.py
+++ b/python/paddle/fluid/dygraph/amp/auto_cast.py
@@ -20,7 +20,6 @@ from paddle.fluid import core
 import contextlib
 from paddle.fluid.framework import (
     Variable,
-    _non_static_mode,
     OpProtoHolder,
     Parameter,
     _dygraph_tracer,
diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
index fa0c12e16082db24d247132ef351fe1024162668..c36d77d9a11ae5383c11b7d01b5f4aaf2cf87219 100644
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -27,7 +27,6 @@ from ..data_feeder import convert_dtype
 import warnings
 from ..framework import (
     _get_paddle_place,
-    _in_legacy_dygraph,
     _in_eager_without_dygraph_check,
 )
 import paddle
@@ -113,11 +112,7 @@ _functional_dygraph_context_manager = None
 @signature_safe_contextmanager
 def param_guard(parameters):
     # Note: parameters is a reference of self._parameters or self._buffers
-    if (
-        in_declarative_mode()
-        and not framework._non_static_mode()
-        and parameters
-    ):
+    if in_declarative_mode() and not framework.in_dygraph_mode() and parameters:
         origin_parameters = parameters.copy()
         for name, var_base in parameters.items():
             if isinstance(var_base, list):
@@ -189,7 +184,7 @@ def enabled():
             print(fluid.dygraph.enabled())  # False
     """
     # TODO(jiabin): Make this check as in_dygraph_mode when we support default eager mode.
-    return framework._non_static_mode()
+    return framework.in_dygraph_mode()
 
 
 def enable_dygraph(place=None):
diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py
index 2078f0fcf0491f68af0e288ec1714b2690bb4fa8..ba34cb19777fa6068005f0077a85e401c398f0f1 100644
--- a/python/paddle/fluid/dygraph/checkpoint.py
+++ b/python/paddle/fluid/dygraph/checkpoint.py
@@ -18,7 +18,6 @@ import functools
 from ..framework import (
     Variable,
     default_main_program,
-    _non_static_mode,
     dygraph_only,
     Parameter,
     ParamBase,
diff --git a/python/paddle/fluid/dygraph/layer_hooks.py b/python/paddle/fluid/dygraph/layer_hooks.py
index 8a373cd17c86d597350c371db6d6df4fa7803a8e..f610f1a2f8dee1af85261cf37e4ff01cf5c6d324 100644
--- a/python/paddle/fluid/dygraph/layer_hooks.py
+++ b/python/paddle/fluid/dygraph/layer_hooks.py
@@ -14,7 +14,7 @@
 
 import warnings
 
-from paddle.fluid.framework import default_main_program, _non_static_mode
+from paddle.fluid.framework import default_main_program, in_dygraph_mode
 
 
 class LayerOpsRecoder:
@@ -34,7 +34,7 @@ def record_program_ops_pre_hook(layer, inputs):
     """
     A pre-hook to mark op numbers before enter layer.forward.
     """
-    if not _non_static_mode():
+    if not in_dygraph_mode():
         if layer._op_recorder.start < 0:
             layer._op_recorder.start = len(
                 default_main_program().current_block().ops
@@ -55,7 +55,7 @@ def set_op_customized_attrs_post_hook(layer, inputs, outputs):
     """
     A post-hook to append customized attributes into all operators generated in current layer.
     """
-    if not _non_static_mode() and layer._op_recorder.is_valid:
+    if not in_dygraph_mode() and layer._op_recorder.is_valid:
 
         start = layer._op_recorder.start
         end = len(default_main_program().current_block().ops)
diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py
index efbf78609a6a62d694cb88ff257e7ccf192b4070..2e3964bf6ccf5807425a495b7c237a496edf64d7 100644
--- a/python/paddle/fluid/dygraph/layer_object_helper.py
+++ b/python/paddle/fluid/dygraph/layer_object_helper.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import copy
-from ..framework import Parameter, _non_static_mode, _global_flags
+from ..framework import Parameter, in_dygraph_mode, _global_flags
 from ..param_attr import ParamAttr
 from .. import core
 
@@ -169,7 +169,7 @@ class LayerObjectHelper(LayerHelperBase):
         if (use_mkldnn is not None) and use_mkldnn:
             act['use_mkldnn'] = use_mkldnn
         act_type = act.pop('type')
-        if _non_static_mode():
+        if in_dygraph_mode():
             res = _append_activation_in_dygraph(
                 input_var, act_type, use_cudnn, use_mkldnn
             )
diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
index 02b0e2bcfe1be3018b90e0457be95b0b181eab8e..cf6c000ba3f0f58f414be34f7b13fc3c4518ef71 100644
--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -46,7 +46,6 @@ from paddle.fluid import framework
 from ..param_attr import ParamAttr
 from paddle.fluid.executor import Executor, global_scope
 from paddle.fluid.framework import (
-    _non_static_mode,
     convert_np_dtype_to_dtype_,
     in_dygraph_mode,
 )
@@ -153,7 +152,7 @@ class Layer:
         self._helper = LayerObjectHelper(self._full_name)
         self._built = False
         self._dtype = dtype
-        self._init_in_dynamic_mode = framework._non_static_mode()
+        self._init_in_dynamic_mode = in_dygraph_mode()
 
         self._parameters = collections.OrderedDict()
         # Buffers the variable (not parameter) created in layer
@@ -211,7 +210,7 @@ class Layer:
         # global setting in dygraph
         # NOTE(chenweihang): nn.Layer also can be used in static mode,
         # but _dygraph_tracer() can not be called in static mode
-        if _non_static_mode():
+        if in_dygraph_mode():
             framework._dygraph_tracer().train_mode()
         # Layer-level setting
         self.training = True
@@ -252,7 +251,7 @@ class Layer:
         # global setting in dygraph
         # NOTE(chenweihang): nn.Layer also can be used in static mode,
         # but _dygraph_tracer() can not be called in static mode
-        if _non_static_mode():
+        if in_dygraph_mode():
             framework._dygraph_tracer().eval_mode()
         # Layer-level setting
         self.training = False
@@ -1667,7 +1666,7 @@ class Layer:
         for key in state_dict.keys():
             if key not in match_keys:
                 unexpected_keys.append(key)
-        if _non_static_mode():
+        if in_dygraph_mode():
             for param, state in matched_param_state:
                 param.set_value(state)
         else:
diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py
index 6a864efc42eedc08a4c838e7a0de08a72ddfc6a8..cb78b8b9d5932fa3778ed2cd77db7a6dd53f102f 100644
--- a/python/paddle/fluid/dygraph/math_op_patch.py
+++ b/python/paddle/fluid/dygraph/math_op_patch.py
@@ -17,7 +17,6 @@ from ..framework import (
     Variable,
     convert_np_dtype_to_dtype_,
     _varbase_creator,
-    _in_legacy_dygraph,
     in_dygraph_mode,
 )
 from ..layers.layer_function_generator import OpProtoHolder
@@ -123,17 +122,13 @@ def monkey_patch_math_varbase():
         """
         if not isinstance(dtype, core.VarDesc.VarType):
             dtype = convert_np_dtype_to_dtype_(dtype)
-
-        if _in_legacy_dygraph():
-            return _legacy_C_ops.cast(
-                self, 'in_dtype', self.dtype, 'out_dtype', dtype
-            )
         return _C_ops.cast(self, dtype)
 
     def _scalar_elementwise_op_(var, scale, bias):
         if framework.in_dygraph_mode():
             return _C_ops.scale(var, float(scale), bias, True)
-        return _legacy_C_ops.scale(var, 'scale', scale, 'bias', bias)
+        else:
+            return _legacy_C_ops.scale(var, 'scale', scale, 'bias', bias)
 
     def _neg_(var):
         return _scalar_elementwise_op_(var, -1.0, 0.0)
@@ -194,10 +189,7 @@ def monkey_patch_math_varbase():
         perm = []
         for i in range(len(var.shape)):
             perm.insert(0, i)
-        if _in_legacy_dygraph():
-            out, _ = _legacy_C_ops.transpose2(var, 'axis', perm)
-        else:
-            out = _C_ops.transpose(var, perm)
+        out = _C_ops.transpose(var, perm)
         return out
 
     def _scalar_add_(var, value):
diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index 8a833eb7a04e86d199e5e7b6752812822e39fc79..afebf277d0d216f761224f2fb5325ae21cae2c56 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -20,7 +20,6 @@ from .. import dygraph_utils
 from . import layers
 from ..framework import (
     Variable,
-    _non_static_mode,
     OpProtoHolder,
     Parameter,
     _dygraph_tracer,
@@ -28,7 +27,6 @@ from ..framework import (
     default_main_program,
     _global_flags,
     in_dygraph_mode,
-    _in_legacy_dygraph,
 )
 
 from ..data_feeder import (
@@ -247,115 +245,81 @@ class BatchNorm(layers.Layer):
         # variance and variance out share the same memory
         variance_out = self._variance
 
-        if _non_static_mode():
-            if in_dygraph_mode():
-                batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
-                    input,
-                    self._mean,
-                    self._variance,
-                    self.weight,
-                    self.bias,
-                    not self.training,
-                    self._momentum,
-                    self._epsilon,
-                    self._data_layout,
-                    self._use_global_stats,
-                    self._trainable_statistics,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
-                )
-
-            elif _in_legacy_dygraph():
-                attrs = (
-                    "momentum",
-                    self._momentum,
-                    "epsilon",
-                    self._epsilon,
-                    "is_test",
-                    not self.training,
-                    "data_layout",
-                    self._data_layout,
-                    "use_mkldnn",
-                    self._use_mkldnn,
-                    "fuse_with_relu",
-                    self._fuse_with_relu,
-                    "use_global_stats",
-                    self._use_global_stats,
-                    'trainable_statistics',
-                    self._trainable_statistics,
-                )
-                batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    self._mean,
-                    self._variance,
-                    None,
-                    mean_out,
-                    variance_out,
-                    *attrs
-                )
-
+        if in_dygraph_mode():
+            batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
+                input,
+                self._mean,
+                self._variance,
+                self.weight,
+                self.bias,
+                not self.training,
+                self._momentum,
+                self._epsilon,
+                self._data_layout,
+                self._use_global_stats,
+                self._trainable_statistics,
+            )
             return dygraph_utils._append_activation_in_dygraph(
                 batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
             )
+        else:
+            check_variable_and_dtype(
+                input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
+            )
 
-        check_variable_and_dtype(
-            input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
-        )
-
-        attrs = {
-            "momentum": self._momentum,
-            "epsilon": self._epsilon,
-            "is_test": self._is_test,
-            "data_layout": self._data_layout,
-            "use_mkldnn": False,
-            "fuse_with_relu": self._fuse_with_relu,
-            "use_global_stats": self._use_global_stats,
-            "trainable_statistics": self._trainable_statistics,
-        }
-
-        inputs = {
-            "X": [input],
-            "Scale": [self.weight],
-            "Bias": [self.bias],
-            "Mean": [self._mean],
-            "Variance": [self._variance],
-        }
-
-        saved_mean = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        saved_variance = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        reserve_space = self._helper.create_variable_for_type_inference(
-            dtype=self._helper.input_dtype(input), stop_gradient=True
-        )
-
-        batch_norm_out = (
-            input
-            if self._in_place
-            else self._helper.create_variable_for_type_inference(self._dtype)
-        )
+            attrs = {
+                "momentum": self._momentum,
+                "epsilon": self._epsilon,
+                "is_test": self._is_test,
+                "data_layout": self._data_layout,
+                "use_mkldnn": False,
+                "fuse_with_relu": self._fuse_with_relu,
+                "use_global_stats": self._use_global_stats,
+                "trainable_statistics": self._trainable_statistics,
+            }
+
+            inputs = {
+                "X": [input],
+                "Scale": [self.weight],
+                "Bias": [self.bias],
+                "Mean": [self._mean],
+                "Variance": [self._variance],
+            }
+
+            saved_mean = self._helper.create_variable_for_type_inference(
+                dtype=self._dtype, stop_gradient=True
+            )
+            saved_variance = self._helper.create_variable_for_type_inference(
+                dtype=self._dtype, stop_gradient=True
+            )
+            reserve_space = self._helper.create_variable_for_type_inference(
+                dtype=self._helper.input_dtype(input), stop_gradient=True
+            )
 
-        outputs = {
-            "Y": [batch_norm_out],
-            "MeanOut": [mean_out],
-            "VarianceOut": [variance_out],
-            "SavedMean": [saved_mean],
-            "SavedVariance": [saved_variance],
-        }
-        if reserve_space is not None:
-            outputs["ReserveSpace"] = [reserve_space]
+            batch_norm_out = (
+                input
+                if self._in_place
+                else self._helper.create_variable_for_type_inference(
+                    self._dtype
+                )
+            )
 
-        self._helper.append_op(
-            type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
-        )
+            outputs = {
+                "Y": [batch_norm_out],
+                "MeanOut": [mean_out],
+                "VarianceOut": [variance_out],
+                "SavedMean": [saved_mean],
+                "SavedVariance": [saved_variance],
+            }
+            if reserve_space is not None:
+                outputs["ReserveSpace"] = [reserve_space]
+
+            self._helper.append_op(
+                type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
+            )
 
-        # Currently, we don't support inplace in dygraph mode
-        return self._helper.append_activation(batch_norm_out, self._act)
+            # Currently, we don't support inplace in dygraph mode
+            return self._helper.append_activation(batch_norm_out, self._act)
 
 
 class RowConv(layers.Layer):
@@ -410,7 +374,7 @@ class RowConv(layers.Layer):
         self, name_scope, future_context_size, param_attr=None, act=None
     ):
         assert (
-            not _non_static_mode()
+            not in_dygraph_mode()
         ), "RowConv is not supported by dynamic graph mode yet!"
         super().__init__(name_scope)
         self._act = act
diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py
index 84a011e6fb2b23e29c337a8c095c010b18bff274..936c6ee7034393fe4dac6748826d63d5fc8eabd6 100644
--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -32,8 +32,6 @@ from ..layers import collective
 from paddle.fluid.dygraph import base as imperative_base
 from paddle.fluid.framework import (
     ParamBase,
-    _in_legacy_dygraph,
-    _non_static_mode,
     in_dygraph_mode,
 )
 
@@ -302,23 +300,7 @@ def _reshape_inplace(x, shape):
 
 @framework.dygraph_only
 def _split_tensors(coalesced_grads_and_grad_vars):
-    if _in_legacy_dygraph():
-        for (
-            coalesced_grad,
-            origin_grad_vars,
-            grad_shapes,
-        ) in coalesced_grads_and_grad_vars:
-            grad_var_len = [np.prod(g_shape) for g_shape in grad_shapes]
-            framework._dygraph_tracer().trace_op(
-                type='split',
-                inputs={'X': coalesced_grad},
-                outputs={'Out': origin_grad_vars},
-                attrs={'sections': grad_var_len, 'axis': 0},
-            )
-            for g_var, g_shape in zip(origin_grad_vars, grad_shapes):
-                _reshape_inplace(x=g_var, shape=g_shape)
-                assert g_var.shape == g_shape
-    elif in_dygraph_mode():
+    if in_dygraph_mode():
         for (
             coalesced_grad,
             origin_grad_vars,
@@ -587,7 +569,7 @@ class DataParallel(layers.Layer):
         super().__init__(layers.full_name() + "_data_parallel")
 
         assert (
-            _non_static_mode()
+            in_dygraph_mode()
         ), "It's not supported to construct DataParallel in static mode."
 
         self._layers = layers
@@ -704,21 +686,6 @@ class DataParallel(layers.Layer):
                 [self.last_comm_buffer_size, self.comm_buffer_size],
                 self.find_unused_parameters,
             )
-        elif _in_legacy_dygraph():
-            self.group_indices = core.assign_group_by_size(
-                trainable_parameters,
-                is_sparse_gradient,
-                [self.last_comm_buffer_size, self.comm_buffer_size],
-            )
-
-            self._reducer = core.Reducer(
-                trainable_parameters,
-                list(reversed(self.group_indices)),
-                is_sparse_gradient,
-                parallel_helper.__parallel_ctx__clz__,
-                [self.last_comm_buffer_size, self.comm_buffer_size],
-                self.find_unused_parameters,
-            )
 
     def _find_varbase(self, obj):
         var_type = core.eager.Tensor if in_dygraph_mode() else core.VarBase
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index 3a94b51219e361517a61ae5046a1dae98c885d1a..3b89aa5115740c52992680251d605ca668231d3f 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -20,7 +20,7 @@ import sys
 
 import paddle
 from .. import framework
-from ..framework import convert_np_dtype_to_dtype_, _in_legacy_dygraph
+from ..framework import convert_np_dtype_to_dtype_
 from .. import core
 from .. import unique_name
 from ..framework import (
diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
index 7cd3c98a68634bcfc7716f76676b110e0436f80f..c9fcefa7620530dd951a06e2dc45bae2e52b4577 100644
--- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
@@ -42,7 +42,9 @@ class TestBprLossOp1(OpTest):
         self.outputs = {"Y": bpr_loss}
 
     def test_check_output(self):
+        paddle.enable_static()
         self.check_output()
+        paddle.disable_static()
 
     def test_check_grad(self):
         self.check_grad(["X"], "Y", numeric_grad_delta=0.001)
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
index 4f941ebb762919167abee6a3fb338f3a1d5901da..6b9df6bd495502c496e4fdde207fe250fb11a1b6 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -522,9 +522,11 @@ class TestCUDNNLstmOp(OpTest):
                 place, atol=1e-5, no_check_set=['Reserve', 'StateOut']
             )
         else:
+            paddle.enable_static()
             self.check_output_with_place(
                 place, no_check_set=['Reserve', 'StateOut']
             )
+            paddle.disable_static()
 
     def test_grad_with_place(self):
         place = core.CUDAPlace(0)