diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 12d31aee41e394968d58753f2b54fcce8648a35e..bba9c226dc07b68a9ee9835486658478cc8a59b9 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -22,23 +22,32 @@ except ImportError:
                      )
 
 from .batch import batch  # noqa: F401
-from .fluid import monkey_patch_variable
-from .fluid.dygraph import monkey_patch_math_varbase
+from .framework import monkey_patch_variable
+from .framework import monkey_patch_math_varbase
 monkey_patch_variable()
 monkey_patch_math_varbase()
+
+from .framework import disable_signal_handler  # noqa: F401
+from .framework import get_flags  # noqa: F401
+from .framework import set_flags  # noqa: F401
+
+from .framework import disable_static  # noqa: F401
+from .framework import enable_static  # noqa: F401
+from .framework import in_dynamic_mode  # noqa: F401
+
 from .framework.dtype import dtype as dtype  # noqa: F401
-from paddle.framework.dtype import uint8  # noqa: F401
-from paddle.framework.dtype import int8  # noqa: F401
-from paddle.framework.dtype import int16  # noqa: F401
-from paddle.framework.dtype import int32  # noqa: F401
-from paddle.framework.dtype import int64  # noqa: F401
-from paddle.framework.dtype import float16  # noqa: F401
-from paddle.framework.dtype import float32  # noqa: F401
-from paddle.framework.dtype import float64  # noqa: F401
-from paddle.framework.dtype import bfloat16  # noqa: F401
-from paddle.framework.dtype import bool  # noqa: F401
-from paddle.framework.dtype import complex64  # noqa: F401
-from paddle.framework.dtype import complex128  # noqa: F401
+from .framework.dtype import uint8  # noqa: F401
+from .framework.dtype import int8  # noqa: F401
+from .framework.dtype import int16  # noqa: F401
+from .framework.dtype import int32  # noqa: F401
+from .framework.dtype import int64  # noqa: F401
+from .framework.dtype import float16  # noqa: F401
+from .framework.dtype import float32  # noqa: F401
+from .framework.dtype import float64  # noqa: F401
+from .framework.dtype import bfloat16  # noqa: F401
+from .framework.dtype import bool  # noqa: F401
+from .framework.dtype import complex64  # noqa: F401
+from .framework.dtype import complex128  # noqa: F401
 from .framework import VarBase as Tensor  # noqa: F401
 Tensor.__qualname__ = 'Tensor'  # noqa: F401
 import paddle.compat  # noqa: F401
@@ -142,6 +151,7 @@ from .tensor.manipulation import scatter_nd_add  # noqa: F401
 from .tensor.manipulation import scatter_nd  # noqa: F401
 from .tensor.manipulation import shard_index  # noqa: F401
 from .tensor.manipulation import slice  # noqa: F401
+from .tensor.manipulation import crop  # noqa: F401
 from .tensor.manipulation import split  # noqa: F401
 from .tensor.manipulation import squeeze  # noqa: F401
 from .tensor.manipulation import squeeze_  # noqa: F401
@@ -316,23 +326,15 @@ from .tensor.stat import quantile  # noqa: F401
 from .device import get_cudnn_version  # noqa: F401
 from .device import set_device  # noqa: F401
 from .device import get_device  # noqa: F401
-from .fluid.framework import is_compiled_with_cinn  # noqa: F401
-from .fluid.framework import is_compiled_with_cuda  # noqa: F401
-from .fluid.framework import is_compiled_with_rocm  # noqa: F401
-from .fluid.framework import disable_signal_handler  # noqa: F401
-from .fluid.framework import get_flags  # noqa: F401
-from .fluid.framework import set_flags  # noqa: F401
 from .device import is_compiled_with_xpu  # noqa: F401
 from .device import is_compiled_with_npu  # noqa: F401
 from .device import is_compiled_with_ipu  # noqa: F401
 from .device import is_compiled_with_mlu  # noqa: F401
+from .device import is_compiled_with_cinn  # noqa: F401
+from .device import is_compiled_with_cuda  # noqa: F401
+from .device import is_compiled_with_rocm  # noqa: F401
 from .device import XPUPlace  # noqa: F401
 
-from .fluid.dygraph.base import enable_dygraph as disable_static  # noqa: F401
-from .fluid.dygraph.base import disable_dygraph as enable_static  # noqa: F401
-from .fluid.framework import in_dygraph_mode as in_dynamic_mode  # noqa: F401
-from .fluid.layers import crop_tensor as crop  # noqa: F401
-
 # high-level api
 from .hapi import Model  # noqa: F401
 from . import callbacks  # noqa: F401
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 72e8e73ce7c2e51b9f7d1e38dba1098149ffcf89..7da9c0accfb49e7641e2ed6fa8e65b5ce92c4a0b 100644
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -39,4 +39,13 @@ from .io import save  # noqa: F401
 from .io import load  # noqa: F401
 from ..fluid.dygraph.parallel import DataParallel  # noqa: F401
 
+from ..fluid import monkey_patch_variable
+from ..fluid.dygraph import monkey_patch_math_varbase
+from ..fluid.framework import disable_signal_handler  # noqa: F401
+from ..fluid.framework import get_flags  # noqa: F401
+from ..fluid.framework import set_flags  # noqa: F401
+from ..fluid.dygraph.base import enable_dygraph as disable_static  # noqa: F401
+from ..fluid.dygraph.base import disable_dygraph as enable_static  # noqa: F401
+from ..fluid.framework import in_dygraph_mode as in_dynamic_mode  # noqa: F401
+
 __all__ = []
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index ad8f28f40bb58102a93fc98ddf260c6cc72ab1a2..c0820e140268b622f6c39aa86b955024076f343a 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -14,7 +14,6 @@
 
 # TODO: import all neural network related api under this directory,
 # including layers, linear, conv, rnn etc.
-
 from ..fluid.dygraph.layers import Layer  # noqa: F401
 from ..fluid.dygraph.container import LayerList  # noqa: F401
 from ..fluid.dygraph.container import ParameterList  # noqa: F401
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index ac08ac9391eb343c7baf6bdf3e9ef74b32eea108..91449ef538ff3387077a32d0099bb50432490bea 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -22,11 +22,11 @@ from ...tensor.math import multiply
 
 import warnings
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
-from ...fluid import core
+from ...fluid.framework import convert_np_dtype_to_dtype_
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 import paddle
-from paddle import _C_ops
+from paddle import _C_ops, in_dynamic_mode
+from paddle.framework import core
 
 __all__ = []
 
@@ -61,7 +61,7 @@ def celu(x, alpha=1.0, name=None):
     if alpha == 0:
         raise ZeroDivisionError("alpha cannot be 0 for celu")
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.celu(x, 'alpha', alpha)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu')
@@ -110,7 +110,7 @@ def elu(x, alpha=1.0, name=None):
             #  [ 1.          15.6      ]]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.elu(x, 'alpha', alpha)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu')
@@ -174,7 +174,7 @@ def gelu(x, approximate=False, name=None):
             #  [ 0.84119201,  1.39957154]]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.gelu(x, 'approximate', approximate)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu')
@@ -222,7 +222,7 @@ def hardshrink(x, threshold=0.5, name=None):
             out = F.hardshrink(x) # [-1., 0., 2.5]
 
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.hard_shrink(x, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -273,7 +273,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
             out = F.hardtanh(x) # [-1., 0.3, 1.]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.brelu(x, 't_min', min, 't_max', max)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -328,7 +328,7 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
             out = F.hardsigmoid(x) # [0., 1., 0.666667]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -382,7 +382,7 @@ def hardswish(x, name=None):
             out = F.hardswish(x) # [0., 5., 0.666667]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.hard_swish(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -427,7 +427,7 @@ def leaky_relu(x, negative_slope=0.01, name=None):
             out = F.leaky_relu(x) # [-0.02, 0., 1.]
 
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.leaky_relu(x, 'alpha', negative_slope)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -518,7 +518,7 @@ def prelu(x, weight, data_format="NCHW", name=None):
                 1], "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
         mode = 'channel'
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.prelu(x, weight, 'mode', mode, 'data_format', data_format)
 
     helper = LayerHelper('prelu', **locals())
@@ -560,7 +560,7 @@ def relu(x, name=None):
             out = F.relu(x) # [0., 0., 1.]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.relu(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu')
@@ -605,7 +605,7 @@ def log_sigmoid(x, name=None):
             out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.logsigmoid(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -672,7 +672,7 @@ def maxout(x, groups, axis=1, name=None):
             #    [0.7142536  0.88725346 0.61093384 0.38833922]]]]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.maxout(x, 'groups', groups, 'axis', axis)
 
     check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout')
@@ -721,7 +721,7 @@ def relu6(x, name=None):
             out = F.relu6(x) # [0, 0.3, 6]
     """
     threshold = 6.0
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.relu6(x, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu6')
@@ -780,7 +780,7 @@ def selu(x,
         raise ValueError(
             "The alpha must be no less than zero. Received: {}.".format(alpha))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.selu(x, 'scale', scale, 'alpha', alpha)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'selu')
@@ -821,7 +821,7 @@ def silu(x, name=None):
             out = F.silu(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.silu(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'silu')
@@ -951,7 +951,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
         dtype = convert_np_dtype_to_dtype_(dtype)
     use_cudnn = True
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         outs_cast = x if dtype is None \
             else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
         return _C_ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn)
@@ -1026,7 +1026,7 @@ def softplus(x, beta=1, threshold=20, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1081,7 +1081,7 @@ def softshrink(x, threshold=0.5, name=None):
             "The threshold must be no less than zero. Received: {}.".format(
                 threshold))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.softshrink(x, 'lambda', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1122,7 +1122,7 @@ def softsign(x, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.softsign(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1160,7 +1160,7 @@ def swish(x, name=None):
             out = F.swish(x) # [-0.238406, 0., 0.731059]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.swish(x, 'beta', 1.0)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'swish')
@@ -1204,7 +1204,7 @@ def mish(x, name=None):
             x = paddle.to_tensor([-5., 0., 5.])
             out = F.mish(x) # [-0.03357624, 0., 4.99955208]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.mish(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mish')
@@ -1240,7 +1240,7 @@ def tanhshrink(x, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.tanh_shrink(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1286,7 +1286,7 @@ def thresholded_relu(x, threshold=1.0, name=None):
             out = F.thresholded_relu(x) # [2., 0., 0.]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.thresholded_relu(x, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1360,7 +1360,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
     if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
         dtype = convert_np_dtype_to_dtype_(dtype)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if dtype is not None:
             x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
         return _C_ops.log_softmax(x, 'axis', axis)
@@ -1498,7 +1498,7 @@ def gumbel_softmax(x, temperature=1.0, hard=False, axis=-1, name=None):
             # [0.00000000, 0.00000000, 0.00000000, 0.00001258, 0.99998736, 0.00000000]]
         
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.gumbel_softmax(x, 'temperature', temperature, 'hard',
                                      hard, 'axis', axis)
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 5a010ad2f20c55e92a11508407343caf0bf300c0..ed668ed124c2318c88cde2bd4acb31ce2b2e4f7c 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -14,13 +14,11 @@
 
 import warnings
 import paddle
-from ...fluid.framework import in_dygraph_mode, default_main_program
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers.tensor import fill_constant
 from ...tensor import concat
 from ...tensor.creation import zeros
 from paddle.static import Variable
-from ...fluid.layers import core
 from ...fluid import dygraph_utils
 # TODO: define the common functions to build a neural network  
 from ...fluid.layers import unfold  # noqa: F401
@@ -30,13 +28,17 @@ from ...tensor import clip
 from ...tensor import sum
 from ...tensor import sqrt
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
-from ...fluid.framework import in_dygraph_mode, _varbase_creator
+from ...fluid.framework import _varbase_creator
 
-from ...fluid.framework import in_dygraph_mode
-from ...fluid import core, dygraph_utils
-from ...fluid import core, layers
+from ...fluid import dygraph_utils
+from ...fluid import layers
 from ...fluid.data_feeder import check_variable_and_dtype
+
 from paddle import _C_ops
+from paddle.framework import in_dynamic_mode
+from paddle.tensor.creation import full
+from paddle.framework import core
+from paddle.static import default_main_program
 
 __all__ = []
 
@@ -353,11 +355,11 @@ def interpolate(x,
     if out_shape is not None and scale is not None:
         raise ValueError("Only one of size or scale_factor should be defined.")
     if out_shape is not None:
-        if isinstance(out_shape, Variable) and not in_dygraph_mode():
+        if isinstance(out_shape, Variable) and not in_dynamic_mode():
             out_shape.stop_gradient = True
             inputs['OutSize'] = out_shape
         else:
-            if in_dygraph_mode():
+            if in_dynamic_mode():
                 if isinstance(out_shape, Variable):
                     out_shape = list(out_shape.numpy())
                 for i, dim in enumerate(out_shape):
@@ -428,7 +430,7 @@ def interpolate(x,
                     attrs['out_w'] = out_shape[2]
 
     else:
-        if in_dygraph_mode() and isinstance(scale, Variable):
+        if in_dynamic_mode() and isinstance(scale, Variable):
             scale = list(scale.numpy())
         if isinstance(scale, Variable):
             scale.stop_gradient = True
@@ -454,7 +456,7 @@ def interpolate(x,
                 "Attr(scale)'s type should be float, int, list, tuple, or Tensor."
             )
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attr_list = []
         for k, v in attrs.items():
             attr_list.append(k)
@@ -719,7 +721,7 @@ def bilinear(x1, x2, weight, bias=None, name=None):
 
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.bilinear_tensor_product(x1, x2, weight, bias)
 
     check_variable_and_dtype(x1, 'x1', ['float32', 'float64'], 'bilinear')
@@ -891,7 +893,7 @@ def dropout(x,
         seed = None
         mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode  #semantic transfer
 
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             if default_main_program().random_seed != 0:
                 seed = default_main_program().random_seed
             out, mask = _C_ops.dropout(
@@ -930,7 +932,7 @@ def dropout(x,
             attrs=attrs)
         return out
     else:  #sometimes called dropout_nd #TODO: optimize with c++
-        if not in_dygraph_mode():
+        if not in_dynamic_mode():
             check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'dropout')
         dtype = x.dtype
         keep_prob = 1 - p
@@ -943,7 +945,7 @@ def dropout(x,
 
             #get mask shape
             input_shape = x.shape
-            if not in_dygraph_mode():
+            if not in_dynamic_mode():
                 input_shape_tensor = paddle.shape(x)
             drop_axes = [axis] if isinstance(axis, int) else list(axis)
             if min(drop_axes) < 0 or max(drop_axes) > len(input_shape) - 1:
@@ -954,7 +956,7 @@ def dropout(x,
                     "length of axis should not be greater than dimensions of x:{}, but get length of axis: {}".
                     format(len(input_shape), len(drop_axes)))
             mask_shape = [1] * len(input_shape)
-            if not in_dygraph_mode():
+            if not in_dynamic_mode():
                 for i in drop_axes:
                     mask_shape[i] = input_shape_tensor[i]
             else:
@@ -964,7 +966,7 @@ def dropout(x,
             #get mask
             random_tensor = paddle.uniform(
                 mask_shape, dtype='float32', min=0., max=1.0)
-            p = layers.fill_constant(shape=[1], dtype='float32', value=p)
+            p = full(shape=[1], fill_value=p, dtype='float32')
             keep_mask = paddle.greater_equal(random_tensor, p)
 
             scale_input = paddle.cast(scale_input, dtype)
@@ -1122,7 +1124,7 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
     if p < 0 or p > 1:
         raise ValueError("p argument should between 0 and 1")
 
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                  'alpha_dropout')
 
@@ -1142,16 +1144,15 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
         #get mask
         random_tensor = paddle.uniform(
             input_shape, dtype='float32', min=0., max=1.0)
-        p = layers.fill_constant(shape=[1], dtype='float32', value=p)
+        p = full(shape=[1], fill_value=p, dtype='float32')
         keep_mask = paddle.greater_equal(random_tensor, p)
         keep_mask = paddle.cast(keep_mask, dtype)
         drop_mask = paddle.subtract(
-            layers.fill_constant(
-                shape=input_shape, dtype=dtype, value=1.),
-            keep_mask)
+            full(
+                shape=input_shape, fill_value=1., dtype=dtype), keep_mask)
 
         #apply mask
-        b = layers.fill_constant(shape=[1], dtype=dtype, value=b)
+        b = full(shape=[1], fill_value=b, dtype=dtype)
         y = paddle.add(paddle.multiply(x, keep_mask),
                        paddle.scale(
                            drop_mask, scale=alpha_p))
@@ -1347,7 +1348,7 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None):
                 unsqueezed_dim = [1]
                 x = unsqueeze(x, axis=unsqueezed_dim)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if isinstance(pad, Variable):
             pad = pad.numpy()
         out = _C_ops.pad3d(x, "paddings", pad, "mode", mode, "value", value,
@@ -1519,7 +1520,7 @@ def linear(x, weight, bias=None, name=None):
           #     [0.9440598  0.9440598  0.9440598  0.9440598 ]
           #     [2.1077576  2.1077576  2.1077576  2.1077576 ]]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pre_bias = _C_ops.matmul_v2(x, weight, 'trans_x', False, 'trans_y',
                                     False)
 
@@ -1614,7 +1615,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
     if epsilon > 1. or epsilon < 0.:
         raise ValueError("The value of epsilon must be between 0 and 1.")
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.label_smooth(label, prior_dist, 'epsilon', float(epsilon))
 
     check_variable_and_dtype(label, 'label', ['float32', 'float64'],
@@ -1765,7 +1766,7 @@ def class_center_sample(label, num_classes, num_samples, group=None):
     if (seed is None or seed == 0) and default_main_program().random_seed != 0:
         seed = default_main_program().random_seed
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         remapped_label, sampled_class_center = _C_ops.class_center_sample(
             label, 'num_classes', num_classes, 'num_samples', num_samples,
             'ring_id', ring_id, 'nranks', nranks, 'rank', rank, 'fix_seed',
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 31cb91bc93b48f243428fb8f7048422e6d74c9d7..f7d765d8541164bfe1ecb26436cf22a88a19fc14 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -16,9 +16,8 @@ from paddle.fluid.framework import _global_flags
 
 import numpy as np
 from ...device import get_cudnn_version
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
-from ...fluid import core, dygraph_utils, get_flags
+from ...fluid import dygraph_utils
 from ...fluid.layers.utils import convert_to_list, _is_symmetric_padding
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...framework import ParamAttr
@@ -27,6 +26,11 @@ from paddle import _C_ops
 from ...tensor.manipulation import unsqueeze, squeeze
 from ...tensor.math import add
 from ...fluid.layers import nn
+from paddle.device import is_compiled_with_cuda
+from paddle.device import is_compiled_with_rocm
+from paddle.device import is_compiled_with_npu
+from paddle import in_dynamic_mode
+from paddle import get_flags
 
 __all__ = []
 
@@ -114,7 +118,7 @@ def _conv_nd(x,
              name=None):
 
     # Due to the poor performance of NHWC, we transpose the input to NCHW.
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
                  'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn',
                  use_mkldnn, 'fuse_relu_before_depthwise_conv', False,
@@ -342,13 +346,13 @@ def conv1d(x,
     l_type = "conv2d"
 
     # When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance
-    if (core.is_compiled_with_cuda() and num_channels == groups and
+    if (is_compiled_with_cuda() and num_channels == groups and
             num_channels != 1 and num_filters % num_channels == 0):
         l_type = 'depthwise_conv2d'
         use_cudnn = False
 
     # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
-    if core.is_compiled_with_npu():
+    if is_compiled_with_npu():
         if (num_channels == groups and num_channels == num_filters):
             l_type = 'depthwise_conv2d'
         else:
@@ -357,7 +361,7 @@ def conv1d(x,
     squeeze_aixs = -3 if channel_last else -2
     x = unsqueeze(x, axis=[squeeze_aixs])
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
                  'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
                  'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
@@ -553,7 +557,7 @@ def conv2d(x,
 
     cudnn_version = get_cudnn_version()
 
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                          cudnn_version is not None) else False
 
     use_mkldnn = _global_flags()["FLAGS_use_mkldnn"]
@@ -567,20 +571,20 @@ def conv2d(x,
     if (num_channels == groups and num_channels != 1 and
             num_filters % num_channels == 0):
         l_type = 'depthwise_conv2d'
-        if core.is_compiled_with_rocm():
+        if is_compiled_with_rocm():
             use_cudnn = True
         else:
             use_cudnn = False
 
     # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
-    if core.is_compiled_with_npu():
+    if is_compiled_with_npu():
         if (num_channels == groups and num_channels == num_filters):
             l_type = 'depthwise_conv2d'
         else:
             l_type = 'conv2d'
 
-    if (core.is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")
-        ["FLAGS_conv2d_disable_cudnn"]):
+    if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[
+            "FLAGS_conv2d_disable_cudnn"]):
         use_cudnn = False
 
     return _conv_nd(x, weight, bias, stride, padding, padding_algorithm,
@@ -815,7 +819,7 @@ def conv1d_transpose(x,
     x = unsqueeze(x, axis=[squeeze_axis])
     weight = unsqueeze(weight, axis=[-1])
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('output_padding', output_padding, 'output_size', output_size,
                  'strides', stride, 'paddings', padding, 'padding_algorithm',
                  padding_algorithm, 'dilations', dilation, 'groups', groups,
@@ -1026,7 +1030,7 @@ def conv2d_transpose(x,
 
     cudnn_version = get_cudnn_version()
 
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                          cudnn_version is not None) else False
 
     # update attrs
@@ -1057,7 +1061,7 @@ def conv2d_transpose(x,
         op_type = 'depthwise_conv2d_transpose'
         use_cudnn = False
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('output_padding', output_padding, 'output_size', output_size,
                  'strides', stride, 'paddings', padding, 'padding_algorithm',
                  padding_algorithm, 'dilations', dilation, 'groups', groups,
@@ -1242,7 +1246,7 @@ def conv3d(x,
                                                                   groups))
 
     cudnn_version = get_cudnn_version()
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                          cudnn_version is not None) else False
 
     padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
@@ -1458,13 +1462,13 @@ def conv3d_transpose(x,
     cudnn_version = get_cudnn_version()
 
     #TODO(LielinJiang): whether to use cudnn according to the version of cudnn
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                          cudnn_version is not None) else False
 
     op_type = 'conv3d_transpose'
     data_format_ = "NHWC" if channel_last else "NCHW"
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('output_padding', output_padding, 'output_size', output_size,
                  'paddings', padding, "padding_algorithm", padding_algorithm,
                  'strides', stride, 'dilations', dilation, 'groups', groups,
diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py
index bccb7bc7334fb04105f0c79fcd826e9a8624192c..6a8686b612e7f3dc1feecc2bcc905bcb40b212a4 100644
--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -17,12 +17,12 @@
 import numpy as np
 from ...fluid.data_feeder import check_dtype
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
 from ...tensor.creation import assign
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 from ...fluid.layers.layer_function_generator import templatedoc
-from ...fluid.layers.sequence_lod import sequence_mask
+from ...fluid.layers.sequence_lod import sequence_mask  #noqa: F401
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -125,7 +125,7 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1):
                "dim1 and dim2 cannot be the same dimension." \
                 "But received dim1 = %d, dim2 = %d\n"%(dim1, dim2)
 
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         __check_input(input, offset, dim1, dim2)
     helper = LayerHelper("diag_embed", **locals())
 
diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py
index f71d3001f6f3b426e0db1fb36733beaceff3b849..de8a7ff6d3c7b6cd87d6301f2cd0bb7af119a74d 100644
--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -14,12 +14,11 @@
 
 from __future__ import print_function
 import warnings
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.layers import core
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -87,7 +86,7 @@ def one_hot(x, num_classes, name=None):
 
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.one_hot_v2(x, 'depth', num_classes, 'allow_out_of_range',
                                  False)
     else:
@@ -196,7 +195,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
         raise ValueError("padding_idx must be within [-{}, {})".format(
             weight.shape[0], weight.shape[0]))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.lookup_table_v2(
             weight, x, 'is_sparse', sparse, 'is_distributed', False,
             'remote_prefetch', False, 'padding_idx', padding_idx)
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 8dc040325934f42eca30960fcd70abdfe87a11c9..636d2f645c5b0bd156ee7491ab847ec5c89f9ad7 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -14,15 +14,12 @@
 # limitations under the License.
 
 import paddle
-from ...fluid.layer_helper import LayerHelper
 from ...fluid.data_feeder import check_variable_and_dtype
-import paddle.fluid as fluid
 
 # TODO: define loss functions of neural network
 import numpy as np
 import paddle
 import paddle.fluid as fluid
-from ...fluid.framework import core, in_dygraph_mode
 from ...fluid.layers.nn import _elementwise_op_in_dygraph
 from ...fluid.layers import dice_loss  # noqa: F401
 from ...fluid.layers import log_loss  # noqa: F401
@@ -34,11 +31,12 @@ from ...fluid.layers import square_error_cost  # noqa: F401
 from ...fluid.layers import edit_distance  # noqa: F401
 from ...fluid.layers import huber_loss
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from ...fluid.framework import _varbase_creator
 from ...static import Variable
 from paddle.utils import deprecated
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.framework import core
 
 __all__ = []
 
@@ -115,7 +113,7 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean',
             "'mean' or 'none', but received %s, which is not allowed." %
             reduction)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         out = _C_ops.bce_loss(input, label)
         if weight is not None:
             out = _C_ops.elementwise_mul(out, weight, 'axis', -1)
@@ -249,7 +247,7 @@ def binary_cross_entropy_with_logits(logit,
             "should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
             % reduction)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         one = _varbase_creator(dtype=logit.dtype)
         _C_ops.fill_constant(one, 'value',
                              float(1.0), 'force_cpu', False, 'dtype', one.dtype,
@@ -284,8 +282,7 @@ def binary_cross_entropy_with_logits(logit,
     out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
         logit, label, name=sigmoid_name)
 
-    one = paddle.fluid.layers.fill_constant(
-        shape=[1], value=1.0, dtype=logit.dtype)
+    one = paddle.full(shape=[1], fill_value=1.0, dtype=logit.dtype)
     if pos_weight is not None:
         fluid.data_feeder.check_variable_and_dtype(
             pos_weight, 'pos_weight', ['float32', 'float64'],
@@ -392,7 +389,7 @@ def hsigmoid_loss(input,
             #  [2.2407534]]
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         out, _, _ = _C_ops.hierarchical_sigmoid(
             input, weight, label, path_table, path_code, bias, 'num_classes',
             num_classes, 'is_sparse', is_sparse, 'remote_prefetch', is_sparse)
@@ -569,7 +566,7 @@ def margin_ranking_loss(input,
         raise ValueError(
             "The value of 'reduction' in MarginRankingLoss should be 'sum', 'mean' or 'none', but "
             "received %s, which is not allowed." % reduction)
-    if fluid.framework.in_dygraph_mode():
+    if in_dynamic_mode():
         out = _C_ops.elementwise_sub(other, input)
         out = _C_ops.elementwise_mul(out, label)
         if margin != 0.0:
@@ -595,8 +592,7 @@ def margin_ranking_loss(input,
 
     if margin != 0.0:
         margin_var = out.block.create_var(dtype=out.dtype)
-        paddle.fluid.layers.fill_constant(
-            [1], out.dtype, margin, out=margin_var)
+        margin_var = paddle.full(shape=[1], fill_value=margin, dtype=out.dtype)
         out = paddle.add(out, margin_var)
 
     result_out = helper.create_variable_for_type_inference(input.dtype)
@@ -686,7 +682,7 @@ def l1_loss(input, label, reduction='mean', name=None):
             "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
             "received %s, which is not allowed." % reduction)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         unreduced = _elementwise_op_in_dygraph(
             input, label, axis=-1, act='abs', op_name='elementwise_sub')
         if reduction == 'mean':
@@ -776,7 +772,7 @@ def nll_loss(input,
             input_dims))
     n = input_shape[0]
     c = input_shape[1]
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if input_dims != 2 and input_dims != 4:
             input, _ = _C_ops.reshape2(input, None, 'shape', [n, c, 1, -1])
             label, _ = _C_ops.reshape2(label, None, 'shape', [n, 1, -1])
@@ -995,7 +991,7 @@ def mse_loss(input, label, reduction='mean', name=None):
             "'reduction' in 'mse_loss' should be 'sum', 'mean' or 'none', "
             "but received {}.".format(reduction))
 
-    if not paddle.fluid.framework.in_dygraph_mode():
+    if not in_dynamic_mode():
         paddle.fluid.data_feeder.check_variable_and_dtype(
             input, 'input', ['float32', 'float64'], 'mse_loss')
         paddle.fluid.data_feeder.check_variable_and_dtype(
@@ -1099,7 +1095,7 @@ def ctc_loss(log_probs,
     loss_out = fluid.layers.warpctc(log_probs, labels, blank, norm_by_times,
                                     input_lengths, label_lengths)
 
-    loss_out = fluid.layers.squeeze(loss_out, [-1])
+    loss_out = paddle.squeeze(loss_out, [-1])
     assert reduction in ['mean', 'sum', 'none']
     if reduction == 'mean':
         loss_out = paddle.mean(loss_out / label_lengths)
@@ -1319,7 +1315,7 @@ def margin_cross_entropy(logits,
     if input_dims - 1 == label_dims:
         label = paddle.unsqueeze(label, axis=-1)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         softmax, loss = _C_ops.margin_cross_entropy(
             logits, label, 'ring_id', ring_id, 'rank', rank, 'nranks', nranks,
             'margin1', margin1, 'margin2', margin2, 'margin3', margin3, 'scale',
@@ -1664,7 +1660,7 @@ def cross_entropy(input,
              (got nput_dims{}, label_dims{})'.format(input_dims, label_dims))
     if input_dims - 1 == label_dims:
         label = paddle.unsqueeze(label, axis=axis)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if soft_label == False:
             valid_label = paddle.cast(
                 label != ignore_index, dtype=label.dtype) * label
@@ -1978,7 +1974,7 @@ def sigmoid_focal_loss(logit,
                 "Expected one dimension of normalizer in sigmoid_focal_loss but got {}.".
                 format(normalizer_dims))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         one = _varbase_creator(dtype=logit.dtype)
         _C_ops.fill_constant(one, 'value',
                              float(1.0), 'force_cpu', False, 'dtype', one.dtype,
@@ -2025,7 +2021,7 @@ def sigmoid_focal_loss(logit,
     loss = paddle.nn.functional.binary_cross_entropy_with_logits(
         logit, label, reduction='none', name=bce_name)
 
-    pred = fluid.layers.sigmoid(logit)
+    pred = paddle.nn.functional.sigmoid(logit)
     p_t = pred * label + (1 - pred) * (1 - label)
 
     alpha_t = alpha * label + (1 - alpha) * (1 - label)
@@ -2125,7 +2121,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None):
             "'reduction' in 'hinge_embedding_loss' should be 'sum', 'mean' or 'none', "
             "but received {}.".format(reduction))
 
-    if not paddle.fluid.framework.in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                  'hinge_embedding_loss')
         check_variable_and_dtype(label, 'label', ['float32', 'float64'],
diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py
index 9b765a1d7c78243f13bda15f2bbbbd485ee7432a..c59d0eb5e6d115cb06ba27091180178cb813fa35 100644
--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -17,13 +17,13 @@ import paddle
 import paddle.fluid as fluid
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode, core
 from ...framework import create_parameter
 from ..initializer import Constant
 from ...framework import ParamAttr
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 import numbers
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -78,7 +78,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
             # [[0.         0.24253564 0.37139067]
             # [1.         0.97014254 0.9284767 ]]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         eps = fluid.dygraph.base.to_variable([epsilon], dtype=x.dtype)
         out = _C_ops.p_norm(x, 'axis', axis, 'porder',
                             float(p), 'keepdim', True, 'epsilon', epsilon)
@@ -104,7 +104,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
     helper.append_op(
         type='p_norm', inputs={'X': x}, outputs={'Out': out}, attrs=attrs)
     eps = out.block.create_var(dtype=out.dtype)
-    paddle.fluid.layers.fill_constant([1], out.dtype, epsilon, out=eps)
+    eps = paddle.full(shape=[1], fill_value=epsilon, dtype=out.dtype)
     return paddle.divide(x, paddle.maximum(out, eps), name=name)
 
 
@@ -180,7 +180,7 @@ def batch_norm(x,
     else:
         trainable_statistics = not use_global_stats
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         # for dygraph need tuple
         attrs = ("momentum", momentum, "epsilon", epsilon, "is_test",
                  not training, "data_layout", data_format, "use_mkldnn", False,
@@ -301,7 +301,7 @@ def layer_norm(x,
                          str_normalized_shape[
                              1:] + ', but got input shape ' + str(input_shape))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pre_act, _, _ = _C_ops.layer_norm(x, weight, bias, 'epsilon', epsilon,
                                           'begin_norm_axis', begin_norm_axis)
         return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
@@ -385,7 +385,7 @@ def instance_norm(x,
 
     """
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         out, _, _ = _C_ops.instance_norm(x, weight, bias, "epsilon", eps,
                                          "momentum", momentum, "data_format",
                                          data_format)
@@ -474,7 +474,7 @@ def local_response_norm(x,
             y = paddle.nn.functional.local_response_norm(x, size=5)
             print(y.shape)  # [3, 3, 112, 112]
         """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32'], 'local_response_norm')
     if data_format not in ['NCL', 'NLC', 'NCHW', 'NHWC', 'NCDHW', 'NDHWC']:
         raise ValueError(
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 01ddf05fb82d29c04f03c89b89c3ebeb53e79c39..a528a72ec5cacaa9d22a46c9b20d05150cb2bc83 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 # TODO: define pooling functions
-from ...fluid import core
-from ...fluid.framework import in_dygraph_mode
 from ...fluid.layers import utils, LayerHelper
 from ...tensor.manipulation import unsqueeze, squeeze
 from ...fluid.data_feeder import check_type, check_variable_and_dtype
 from paddle import _C_ops
-from paddle import _C_ops
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -210,7 +208,7 @@ def avg_pool1d(x,
     """
     """NCL to NCHW"""
     data_format = "NCHW"
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d')
     _check_input(x, 3)
     x = unsqueeze(x, [2])
@@ -232,7 +230,7 @@ def avg_pool1d(x,
     # use 2d to implenment 1d should expand padding in advance.
     padding = _expand_low_nd_padding(padding)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.pool2d(
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
@@ -346,7 +344,7 @@ def avg_pool2d(x,
     padding, padding_algorithm = _update_padding_nd(
         padding, 2, channel_last, ceil_mode=ceil_mode)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', kernel_size,
                                'global_pooling', False, 'padding_algorithm',
                                padding_algorithm, 'strides', stride, 'paddings',
@@ -468,7 +466,7 @@ def avg_pool3d(x,
     _check_value_limitation(kernel_size, "kernel_size", min_limit=1e-3)
     _check_value_limitation(stride, "stride", min_limit=1e-3)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.pool3d(
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
             'paddings', padding, 'global_pooling', False, 'padding_algorithm',
@@ -571,7 +569,7 @@ def max_pool1d(x,
     """
     """NCL to NCHW"""
     data_format = "NCHW"
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d')
     _check_input(x, 3)
     x = unsqueeze(x, [2])
@@ -587,7 +585,7 @@ def max_pool1d(x,
     # use 2d to implenment 1d should expand padding in advance.
     padding = _expand_low_nd_padding(padding)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if return_mask:
             pool_out = _C_ops.max_pool2d_with_index(
                 x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
@@ -746,7 +744,7 @@ def max_unpool1d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                                kernel_size, 'strides', stride, 'paddings',
                                padding, "output_size", output_size,
@@ -861,7 +859,7 @@ def max_unpool2d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                                kernel_size, 'strides', stride, 'paddings',
                                padding, "output_size", output_size,
@@ -973,7 +971,7 @@ def max_unpool3d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.unpool3d(x, indices, 'unpooling_type', 'max', 'ksize',
                                  kernel_size, 'strides', stride, 'paddings',
                                  padding, "output_size", output_size,
@@ -1029,7 +1027,7 @@ def max_pool2d(x,
             "When setting return_mask to true, data_format must be set to NCHW in API:max_pool2d"
         )
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if return_mask:
             output = _C_ops.max_pool2d_with_index(
                 x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
@@ -1160,7 +1158,7 @@ def max_pool3d(x,
             "When setting return_mask to true, data_format must be set to NCDHW in API:max_pool3d"
         )
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         if return_mask:
             output = _C_ops.max_pool3d_with_index(
                 x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides',
@@ -1250,7 +1248,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
               # pool_out shape: [1, 3, 16])
     """
     pool_type = 'avg'
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                  'adaptive_pool2d')
         check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
@@ -1258,7 +1256,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
     pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
 
     x = unsqueeze(x, [2])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pool_out = _C_ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
                                  pool_size, 'adaptive', True)
         return squeeze(pool_out, [2])
@@ -1333,7 +1331,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
                             output_size=[3, 3])
             # out.shape is [2, 3, 3, 3]
     """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                  'adaptive_avg_pool2d')
         check_type(data_format, 'data_format', str, 'adaptive_avg_pool2d')
@@ -1357,7 +1355,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
         if output_size[1] == None:
             output_size[1] = in_w
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', output_size,
                                'global_pooling', False, 'adaptive', True,
                                'data_format', data_format)
@@ -1437,7 +1435,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
                             output_size=[3, 3, 3])
             # out.shape is [2, 3, 3, 3, 3]
     """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                  'adaptive_avg_pool3d')
         check_type(data_format, 'data_format', str, 'adaptive_avg_pool3d')
@@ -1463,7 +1461,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
         if output_size[2] == None:
             output_size[2] = in_w
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         output = _C_ops.pool3d(x, 'pooling_type', 'avg', 'ksize', output_size,
                                'global_pooling', False, 'adaptive', True,
                                'data_format', data_format)
@@ -1537,7 +1535,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
               # pool_out shape: [1, 3, 16] indices  shape: [1, 3, 16]
     """
     pool_type = 'max'
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                  'adaptive_max_pool1d')
         check_type(output_size, 'pool_size', int, 'adaptive_max_pool1d')
@@ -1547,7 +1545,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
     pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
 
     x = unsqueeze(x, [2])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pool_out = _C_ops.max_pool2d_with_index(
             x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
         return (squeeze(pool_out[0], [2]), squeeze(
@@ -1619,7 +1617,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
                             output_size=[3, 3])
               # out.shape is [2, 3, 3, 3]
     """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                  'adaptive_max_pool2d')
         check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool2d')
@@ -1636,7 +1634,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
         if output_size[1] == None:
             output_size[1] = in_w
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pool_out = _C_ops.max_pool2d_with_index(
             x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
         return pool_out if return_mask else pool_out[0]
@@ -1710,7 +1708,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
               # out.shape is [2, 3, 3, 3, 3]
     """
 
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                  'adaptive_max_pool3d')
         check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool3d')
@@ -1729,7 +1727,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
         if output_size[2] == None:
             output_size[2] = in_w
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         pool_out = _C_ops.max_pool3d_with_index(
             x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
         return pool_out if return_mask else pool_out[0]
diff --git a/python/paddle/nn/functional/sparse_attention.py b/python/paddle/nn/functional/sparse_attention.py
index c39fcb8554a2f6e64610e3135043f86c7943e620..53be014527815295209f147153e9335fc74c33a1 100644
--- a/python/paddle/nn/functional/sparse_attention.py
+++ b/python/paddle/nn/functional/sparse_attention.py
@@ -14,10 +14,10 @@
 
 import warnings
 import paddle
-from ...fluid.framework import in_dygraph_mode, default_main_program
+from ...fluid.framework import default_main_program
 from paddle.fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 
 
 def sparse_attention(query,
@@ -143,7 +143,7 @@ def sparse_attention(query,
             #       [1.60885942, 2.60885954],
             #       [1.99830270, 2.99830270]]]]
     """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         result_attention, result_sdd, result_softmax = _C_ops.sparse_attention(
             query, key, value, sparse_csr_offset, sparse_csr_columns,
             key_padding_mask, attn_mask)
diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py
index bd3e27a25e12c47abd22282d1c72e14910b1188c..43c7757a8777ba703194cce989b2469d0fc900e0 100644
--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 from ...device import get_cudnn_version
-from ...fluid.framework import core, in_dygraph_mode
 from ...static import Variable
 from ...fluid.layer_helper import LayerHelper
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid import dygraph_utils
 import numpy as np
 from paddle import _C_ops
+from ...device import is_compiled_with_rocm
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -83,14 +84,14 @@ def affine_grid(theta, out_shape, align_corners=True, name=None):
         use_cudnn = True
     else:
         use_cudnn = False
-    if core.is_compiled_with_rocm():
+    if is_compiled_with_rocm():
         use_cudnn = False  # ROCM platform do not have MIOPEN kernel for affine_grid
 
     if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \
             isinstance(out_shape, Variable)):
         raise ValueError("The out_shape should be a list, tuple or Tensor.")
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         _out_shape = out_shape.numpy().tolist() if isinstance(
             out_shape, Variable) else out_shape
         return _C_ops.affine_grid(theta, "output_shape", _out_shape,
@@ -263,7 +264,7 @@ def grid_sample(x,
 
     cudnn_version = get_cudnn_version()
     use_cudnn = False
-    if not core.is_compiled_with_rocm() and (
+    if not is_compiled_with_rocm() and (
             cudnn_version is not None
     ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros':
         use_cudnn = True
@@ -271,7 +272,7 @@ def grid_sample(x,
         x.stop_gradient = False
         grid.stop_gradient = False
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners',
                  align_corners, 'use_cudnn', use_cudnn)
         out = getattr(_C_ops, 'grid_sampler')(x, grid, *attrs)
@@ -329,7 +330,7 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
                          "But recevie Attr(data_format): {} ".format(
                              data_format))
 
-    if in_dygraph_mode():
+    if in_dynamic_mode():
         return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
                                     "data_format", data_format)
 
diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py
index 13a70a179ffe38e2f2ef4b335657d7e45d1bb84a..746d2b67b2a1d396b377b9f6edbcd6ee233eaabd 100644
--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@@ -11,11 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from ...fluid import framework
-from ...fluid import core
-from ...fluid import unique_name
-from ...fluid.core import VarDesc
+import paddle
 from ...fluid.data_feeder import check_type
 from ...fluid.initializer import NumpyArrayInitializer
 
@@ -88,13 +84,14 @@ class Assign(NumpyArrayInitializer):
     def __init__(self, value, name=None):
         import numpy
         check_type(value, 'value',
-                   (numpy.ndarray, list, tuple, framework.Variable), 'Assign')
+                   (numpy.ndarray, list, tuple, paddle.static.Variable),
+                   'Assign')
 
         if (isinstance(value, (list, tuple))):
             value = numpy.array(value)
 
         # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
-        if (isinstance(value, framework.Variable)):
+        if (isinstance(value, paddle.static.Variable)):
             value = value.numpy()
 
         super(Assign, self).__init__(value)
diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py
index 26aa349b5b1b4b7f64d555c46fb00347a636fe50..514afb15a8edb30ba5a1511328ecf6796f8f15d5 100644
--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -15,7 +15,9 @@
 from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.core import VarDesc
-from ...fluid import unique_name, framework
+from ...fluid import framework
+from paddle import in_dynamic_mode
+from paddle.utils import unique_name
 
 __all__ = []
 
@@ -221,6 +223,6 @@ class Dirac(Initializer):
                        "out_dtype": var.dtype},
                 stop_gradient=True)
 
-        if not framework.in_dygraph_mode():
+        if not in_dynamic_mode():
             var.op = op
         return op
diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py
index 8e0acb9ab2d20ffa1fd0647f54884f303c837ba6..84cdb971d77d46ddea36eeda03200010d22f3568 100644
--- a/python/paddle/nn/initializer/orthogonal.py
+++ b/python/paddle/nn/initializer/orthogonal.py
@@ -14,9 +14,9 @@
 
 from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid.core import VarDesc
-from ...fluid import unique_name, framework
+from ...fluid import framework
 from ...tensor import diag, transpose, sign, qr, reshape
+from paddle.utils import unique_name
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
index 617981cb8f74c07a8d0944a2b655d66b3d13fbd4..400585c43183038bfbe68f5dd0e241044d06f4bb 100644
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -14,8 +14,6 @@
 
 # TODO: define activation functions of neural network
 
-from ...fluid import core
-from ...fluid.framework import in_dygraph_mode
 from ...framework import ParamAttr
 from ..initializer import Constant
 from paddle.framework import get_default_dtype
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index 89ff156bded2af2ea22cbb85ee35b2def011dbe9..9ae9d5bec437e9d4ec74d5696fae7043f0186d97 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -15,10 +15,10 @@
 # TODO: define the common classes to build a neural network
 import paddle
 from ...fluid.dygraph import Flatten  # noqa: F401
-from ...fluid.framework import in_dygraph_mode
 from .. import functional as F
 from ...fluid.framework import _dygraph_tracer
 from paddle.nn import Layer
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -1456,7 +1456,7 @@ class Embedding(Layer):
             dtype=self._dtype,
             is_bias=False)
 
-        if in_dygraph_mode() and padding_idx != -1:
+        if in_dynamic_mode() and padding_idx != -1:
             with paddle.no_grad():
                 self.weight[padding_idx] = 0.0
 
diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py
index 26fd544ecce11234301b948f91128a4e6c052210..fd7355e162ae77fdca683436e5cd69fa2f3c3351 100644
--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -16,14 +16,15 @@
 
 import numpy as np
 
-from ...fluid import get_flags
-from ...fluid import core
+from paddle import get_flags
 from ...device import get_cudnn_version
 from .. import Layer
 from ..initializer import Normal
 from .. import functional as F
 from ...fluid.layers import utils
 from ..functional.conv import _update_padding_nd
+from ...device import is_compiled_with_cuda
+from ...device import is_compiled_with_rocm
 
 __all__ = []
 
@@ -138,7 +139,7 @@ class _ConvNd(Layer):
 
         cudnn_version = get_cudnn_version()
 
-        self._use_cudnn = True if (core.is_compiled_with_cuda() and
+        self._use_cudnn = True if (is_compiled_with_cuda() and
                                    cudnn_version is not None) else False
 
         self._op_type = "conv" + str(dims) + 'd'
@@ -146,13 +147,13 @@ class _ConvNd(Layer):
                                           in_channels != 1 and
                                           out_channels % in_channels == 0):
             self._op_type = 'depthwise_conv2d'
-            if core.is_compiled_with_rocm():
+            if is_compiled_with_rocm():
                 self._use_cudnn = True
             else:
                 self._use_cudnn = False
 
-        if (core.is_compiled_with_cuda() and get_flags(
-                "FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
+        if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[
+                "FLAGS_conv2d_disable_cudnn"]):
             self._use_cudnn = False
 
     def extra_repr(self):
diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py
index 0547bf75a4bf6c4b2b4a878fdf37f00c007ef4bc..1fb7e8c4f2148f7dc0d387d2a7ae44aab604c703 100644
--- a/python/paddle/nn/layer/distance.py
+++ b/python/paddle/nn/layer/distance.py
@@ -16,10 +16,10 @@ import numpy as np
 
 import paddle
 from .. import Layer
-from ...fluid.framework import core, in_dygraph_mode
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
 from ...fluid.layer_helper import LayerHelper
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -78,7 +78,7 @@ class PairwiseDistance(Layer):
         check_type(self.keepdim, 'keepdim', (bool), 'PairwiseDistance')
 
     def forward(self, x, y):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             sub = _C_ops.elementwise_sub(x, y)
             return _C_ops.p_norm(sub, 'axis', 1, 'porder', self.p, 'keepdim',
                                  self.keepdim, 'epsilon', self.epsilon)
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 9da41f26969c8064c7147b17f4f73f6e5bc7bead..7e40c029a02ecb1a8b263a2133c2d36269282d86 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -16,11 +16,11 @@
 # TODO: define loss functions of neural network
 import numpy as np
 import paddle.fluid as fluid
-import paddle.fluid.core as core
 import paddle
 from .. import functional as F
-from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator
+from paddle.fluid.framework import _varbase_creator
 from .. import Layer
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -591,7 +591,7 @@ class MSELoss(Layer):
         self.reduction = reduction
 
     def forward(self, input, label):
-        if not fluid.framework.in_dygraph_mode():
+        if not in_dynamic_mode():
             fluid.data_feeder.check_variable_and_dtype(
                 input, 'input', ['float32', 'float64'], 'MSELoss')
             fluid.data_feeder.check_variable_and_dtype(
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index de9f8663e67692f8c23d378abb8e1f7171943d95..8113073d757d6e978caa99c7199e61a2cb8a79d4 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -33,12 +33,11 @@ from ...fluid.dygraph import BatchNorm  # noqa: F401
 from ...fluid.dygraph import SpectralNorm  # noqa: F401
 
 from ...framework import get_default_dtype, set_default_dtype
-from ...fluid.framework import in_dygraph_mode
 
 from ..initializer import Constant
 from ...framework import ParamAttr
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 
 from ..functional import batch_norm, layer_norm, instance_norm
 
@@ -49,6 +48,7 @@ from ...framework import no_grad
 from .. import functional as F
 from paddle import _C_ops
 from .. import Layer
+from paddle import in_dynamic_mode
 
 __all__ = []
 
@@ -1087,7 +1087,7 @@ class SyncBatchNorm(_BatchNormBase):
 
         ### train mode: use mini-batch stats, eval mode: use global stats
         ### use_global_stats only support False in sync_batch_norm
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
                      "is_test", not self.training, "data_layout",
                      self._data_format, "use_mkldnn", False, "fuse_with_relu",
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index f7d5448d1324be02fb749985435466ecf7214ab2..2bb1f1311107b18f2a5e30ca8f1661a45e9b4c92 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -33,6 +33,11 @@ from paddle.fluid.layers import utils
 from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
 from paddle.fluid.data_feeder import convert_dtype
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.framework import core
+from paddle.static import default_startup_program
+from paddle.static import program_guard
+
 __all__ = []
 
 
@@ -970,8 +975,8 @@ class RNNBase(LayerList):
             # dropout state may also can be hided and avoid saving
             # should dropout state be persistable for static-graph
             self._dropout_state = self.create_variable(
-                dtype=fluid.core.VarDesc.VarType.UINT8)
-            if fluid.framework.in_dygraph_mode():
+                dtype=core.VarDesc.VarType.UINT8)
+            if in_dynamic_mode():
                 with paddle.no_grad():
                     _C_ops.coalesce_tensor(self._all_weights, self._all_weights,
                                            self._flat_weight[0], "copy_data",
@@ -979,8 +984,8 @@ class RNNBase(LayerList):
                                            params[0].dtype)
                     return
             # for static-graph, append coalesce_tensor into startup program
-            with fluid.program_guard(fluid.default_startup_program(),
-                                     fluid.default_startup_program()):
+            with program_guard(default_startup_program(),
+                               default_startup_program()):
                 with paddle.no_grad():
                     self._helper.append_op(
                         type="coalesce_tensor",
@@ -999,7 +1004,7 @@ class RNNBase(LayerList):
         if not self.time_major:
             inputs = paddle.tensor.transpose(inputs, [1, 0, 2])
 
-        if fluid.framework.in_dygraph_mode():
+        if in_dynamic_mode():
             _, _, out, state = _C_ops.rnn(
                 inputs, initial_states, self._all_weights, sequence_length,
                 self._dropout_state, self.state_components, 'dropout_prob',
@@ -1014,7 +1019,7 @@ class RNNBase(LayerList):
                 for i in range(self.state_components)
             ]
             reserve = self._helper.create_variable_for_type_inference(
-                dtype=fluid.core.VarDesc.VarType.UINT8, stop_gradient=True)
+                dtype=core.VarDesc.VarType.UINT8, stop_gradient=True)
 
             inputs = {
                 'Input': inputs,
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index ce5fb3e616eb5960da6c2535c10d34d0fbf4766d..2c0eb88e0875c95b7ff832ef947a04d752fa068a 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.dygraph import layers
 from ...tensor import math, manipulation
+from .. import Layer
 
 __all__ = []
 
 
-class FloatFunctionalLayer(layers.Layer):
+class FloatFunctionalLayer(Layer):
     def __init__(self):
         super(FloatFunctionalLayer, self).__init__()
 
diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py
index 7ad43da6ed5c880ef190164594409d4f38735300..8e9316a19623bd696d549c59347a3399f38a3a70 100644
--- a/python/paddle/nn/quant/quant_layers.py
+++ b/python/paddle/nn/quant/quant_layers.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.dygraph import layers
-from paddle.fluid import core
+from paddle.framework import core
 from paddle.fluid import dygraph_utils
-from paddle.fluid import unique_name
-from paddle.fluid.param_attr import ParamAttr
+from paddle.utils import unique_name
+from paddle.framework import ParamAttr
 from paddle.fluid.framework import _varbase_creator
-from paddle.fluid.framework import in_dygraph_mode
-from paddle.fluid.initializer import Constant
+from paddle.nn.initializer import Constant
 from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.nn import functional as F
 import logging
 from paddle.fluid.log_helper import get_logger
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.nn import Layer
 
 __all__ = [
     'FakeQuantAbsMax',
@@ -43,7 +43,7 @@ _logger = get_logger(
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
-class FakeQuantAbsMax(layers.Layer):
+class FakeQuantAbsMax(Layer):
     r"""
     FakeQuantAbsMax layer does the abs_max quant and then dequant.
     Its computational formula is described as below:
@@ -76,7 +76,7 @@ class FakeQuantAbsMax(layers.Layer):
             self._scale = None
 
     def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             attrs = ('bit_length', self._quant_bits)
             quant_out = _varbase_creator(
                 type=input.type,
@@ -125,7 +125,7 @@ class FakeQuantAbsMax(layers.Layer):
         return quant_out
 
 
-class FakeQuantMovingAverageAbsMax(layers.Layer):
+class FakeQuantMovingAverageAbsMax(Layer):
     r"""
     FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
     Its computational formula is described as below:
@@ -175,7 +175,7 @@ class FakeQuantMovingAverageAbsMax(layers.Layer):
         self._accum.stop_gradient = True
 
     def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             attrs = ('moving_rate', self._moving_rate, 'bit_length',
                      self._quant_bits, 'is_test', not self.training)
             quant_out = _varbase_creator(
@@ -223,7 +223,7 @@ class FakeQuantMovingAverageAbsMax(layers.Layer):
         return quant_out
 
 
-class FakeQuantChannelWiseAbsMax(layers.Layer):
+class FakeQuantChannelWiseAbsMax(Layer):
     def __init__(self,
                  name=None,
                  channel_num=None,
@@ -253,7 +253,7 @@ class FakeQuantChannelWiseAbsMax(layers.Layer):
             self._scale = None
 
     def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             attrs = ('bit_length', self._quant_bits, 'quant_axis',
                      self._quant_axis)
             quant_out = _varbase_creator(
@@ -306,7 +306,7 @@ class FakeQuantChannelWiseAbsMax(layers.Layer):
         return quant_out
 
 
-class MovingAverageAbsMaxScale(layers.Layer):
+class MovingAverageAbsMaxScale(Layer):
     def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
         r"""
         MovingAverageMaxScale layer is used to calculating the output quantization
@@ -345,7 +345,7 @@ class MovingAverageAbsMaxScale(layers.Layer):
         self._accum.stop_gradient = True
 
     def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
             attrs = ('moving_rate', self._moving_rate, 'is_test',
                      not self.training)
             state = self._state if self.training else None
@@ -393,7 +393,7 @@ class MovingAverageAbsMaxScale(layers.Layer):
 QuantStub = MovingAverageAbsMaxScale
 
 
-class QuantizedConv2D(layers.Layer):
+class QuantizedConv2D(Layer):
     """
     The computational logic of QuantizedConv2D is the same with Conv2D.
     The only difference is that its inputs are all fake quantized.
@@ -482,7 +482,7 @@ class QuantizedConv2D(layers.Layer):
             data_format=self._data_format)
 
 
-class QuantizedConv2DTranspose(layers.Layer):
+class QuantizedConv2DTranspose(Layer):
     """
     The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
     The only difference is that its inputs are all fake quantized.
@@ -588,7 +588,7 @@ class QuantizedConv2DTranspose(layers.Layer):
             data_format=self._data_format)
 
 
-class QuantizedLinear(layers.Layer):
+class QuantizedLinear(Layer):
     """
     The computational logic of QuantizedLinear is the same with Linear.
     The only difference is that its inputs are all fake quantized.
@@ -657,7 +657,7 @@ class QuantizedLinear(layers.Layer):
         return out
 
 
-class MAOutputScaleLayer(layers.Layer):
+class MAOutputScaleLayer(Layer):
     """
     Add MovingAverageMaxScale layer to the behind of the input layer.
     Calculate the scale (moving average abs max) for the output of the input layer.
@@ -684,7 +684,7 @@ class MAOutputScaleLayer(layers.Layer):
             return self._ma_output_scale(out)
 
 
-class FakeQuantMAOutputScaleLayer(layers.Layer):
+class FakeQuantMAOutputScaleLayer(Layer):
     """
     Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
     """
diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py
index 8d2cc8062d2ccb408e3407033af924938d3d9aa1..c131d218a1cde9409d18e98fac709a3f85a40a64 100755
--- a/python/paddle/nn/utils/weight_norm_hook.py
+++ b/python/paddle/nn/utils/weight_norm_hook.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import paddle
 import numpy as np
 from ... import fluid
 from ...fluid import dygraph
@@ -39,25 +39,25 @@ def l2_norm(x, axis, epsilon=1e-12, name=None):
             "axis": 1 if axis is None else axis,
             "epsilon": epsilon,
         })
-    return F.squeeze(norm, axes=[axis])
+    return paddle.squeeze(norm, axis=[axis])
 
 
 def norm_except_dim(p, dim):
     shape = p.shape
     ndims = len(shape)
     if dim == -1:
-        return F.sqrt(F.reduce_sum(F.square(p)) + 1e-12)
+        return paddle.sqrt(paddle.sum(paddle.square(p)) + 1e-12)
     elif dim == 0:
-        p_matrix = F.reshape(p, (shape[0], -1))
+        p_matrix = paddle.reshape(p, (shape[0], -1))
         return l2_norm(p_matrix, axis=1)
     elif dim == ndims - 1:
-        p_matrix = F.reshape(p, (-1, shape[-1]))
+        p_matrix = paddle.reshape(p, (-1, shape[-1]))
         return l2_norm(p_matrix, axis=0)
     else:
         perm = list(range(ndims))
         perm[0] = dim
         perm[dim] = 0
-        p_transposed = F.transpose(p, perm)
+        p_transposed = paddle.transpose(p, perm)
         return norm_except_dim(p_transposed, 0)
 
 
@@ -66,25 +66,25 @@ def _weight_norm(v, g, dim):
     ndims = len(shape)
 
     if dim == -1:
-        v_normalized = v / (F.sqrt(F.reduce_sum(F.square(v))) + 1e-12)
+        v_normalized = v / (paddle.sqrt(paddle.sum(paddle.square(v))) + 1e-12)
     elif dim == 0:
-        p_matrix = F.reshape(v, (shape[0], -1))
+        p_matrix = paddle.reshape(v, (shape[0], -1))
         v_normalized = F.l2_normalize(p_matrix, axis=1)
-        v_normalized = F.reshape(v_normalized, shape)
+        v_normalized = paddle.reshape(v_normalized, shape)
     elif dim == ndims - 1:
-        p_matrix = F.reshape(v, (-1, shape[-1]))
+        p_matrix = paddle.reshape(v, (-1, shape[-1]))
         v_normalized = F.l2_normalize(p_matrix, axis=0)
-        v_normalized = F.reshape(v_normalized, shape)
+        v_normalized = paddle.reshape(v_normalized, shape)
     else:
         perm = list(range(ndims))
         perm[0] = dim
         perm[dim] = 0
-        p_transposed = F.transpose(v, perm)
+        p_transposed = paddle.transpose(v, perm)
         transposed_shape = p_transposed.shape
-        p_matrix = F.reshape(p_transposed, (p_transposed.shape[0], -1))
+        p_matrix = paddle.reshape(p_transposed, (p_transposed.shape[0], -1))
         v_normalized = F.l2_normalize(p_matrix, axis=1)
-        v_normalized = F.reshape(v_normalized, transposed_shape)
-        v_normalized = F.transpose(v_normalized, perm)
+        v_normalized = paddle.reshape(v_normalized, transposed_shape)
+        v_normalized = paddle.transpose(v_normalized, perm)
     weight = F.elementwise_mul(
         v_normalized, g, axis=dim if dim is not None else -1)
     return weight
@@ -130,9 +130,9 @@ class WeightNorm(object):
         layer.add_parameter(name + "_v", v)
         g = layer.create_parameter(g_var.shape, dtype=g_var.dtype)
         layer.add_parameter(name + '_g', g)
-        with dygraph.no_grad():
-            F.assign(w, v)
-            F.assign(g_var, g)
+        with paddle.no_grad():
+            paddle.assign(w, v)
+            paddle.assign(g_var, g)
         setattr(layer, name, fn.compute_weight(layer))
 
         layer.register_forward_pre_hook(fn)
@@ -145,8 +145,8 @@ class WeightNorm(object):
         del layer._parameters[self.name + '_v']
         w = layer.create_parameter(w_var.shape, dtype=w_var.dtype)
         layer.add_parameter(self.name, w)
-        with dygraph.no_grad():
-            F.assign(w_var, w)
+        with paddle.no_grad():
+            paddle.assign(w_var, w)
 
     def __call__(self, layer, inputs):
         setattr(layer, self.name, self.compute_weight(layer))
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index d8ebae9d6bf3944a39834167ddd15270dd171a89..4df026cfa48923079fbacbf1facdbf75ec1d8727 100755
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -30,6 +30,7 @@ from ..fluid.layers import unstack  # noqa: F401
 
 from ..fluid.layers import scatter_nd  # noqa: F401
 from ..fluid.layers import shard_index  # noqa: F401
+from ..fluid.layers import crop_tensor as crop  # noqa: F401
 from ..fluid.layers.nn import _elementwise_op_in_dygraph
 from ..fluid import layers
 from ..fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only