unset fluid in nn.others (#34935)

a710738e · zhiboniu · GitHub · 1aa67778 · a710738e · a710738e
27 changed file
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -22,23 +22,32 @@ except ImportError:
                     )
 from .batch import batch  # noqa: F401
-from .fluid import monkey_patch_variable
+from .framework import monkey_patch_variable
-from .fluid.dygraph import monkey_patch_math_varbase
+from .framework import monkey_patch_math_varbase
 monkey_patch_variable()
 monkey_patch_math_varbase()
+from .framework import disable_signal_handler  # noqa: F401
+from .framework import get_flags  # noqa: F401
+from .framework import set_flags  # noqa: F401
+from .framework import disable_static  # noqa: F401
+from .framework import enable_static  # noqa: F401
+from .framework import in_dynamic_mode  # noqa: F401
 from .framework.dtype import dtype as dtype  # noqa: F401
-from paddle.framework.dtype import uint8  # noqa: F401
+from .framework.dtype import uint8  # noqa: F401
-from paddle.framework.dtype import int8  # noqa: F401
+from .framework.dtype import int8  # noqa: F401
-from paddle.framework.dtype import int16  # noqa: F401
+from .framework.dtype import int16  # noqa: F401
-from paddle.framework.dtype import int32  # noqa: F401
+from .framework.dtype import int32  # noqa: F401
-from paddle.framework.dtype import int64  # noqa: F401
+from .framework.dtype import int64  # noqa: F401
-from paddle.framework.dtype import float16  # noqa: F401
+from .framework.dtype import float16  # noqa: F401
-from paddle.framework.dtype import float32  # noqa: F401
+from .framework.dtype import float32  # noqa: F401
-from paddle.framework.dtype import float64  # noqa: F401
+from .framework.dtype import float64  # noqa: F401
-from paddle.framework.dtype import bfloat16  # noqa: F401
+from .framework.dtype import bfloat16  # noqa: F401
-from paddle.framework.dtype import bool  # noqa: F401
+from .framework.dtype import bool  # noqa: F401
-from paddle.framework.dtype import complex64  # noqa: F401
+from .framework.dtype import complex64  # noqa: F401
-from paddle.framework.dtype import complex128  # noqa: F401
+from .framework.dtype import complex128  # noqa: F401
 from .framework import VarBase as Tensor  # noqa: F401
 Tensor.__qualname__ = 'Tensor'  # noqa: F401
 import paddle.compat  # noqa: F401
@@ -142,6 +151,7 @@ from .tensor.manipulation import scatter_nd_add  # noqa: F401
 from .tensor.manipulation import scatter_nd  # noqa: F401
 from .tensor.manipulation import shard_index  # noqa: F401
 from .tensor.manipulation import slice  # noqa: F401
+from .tensor.manipulation import crop  # noqa: F401
 from .tensor.manipulation import split  # noqa: F401
 from .tensor.manipulation import squeeze  # noqa: F401
 from .tensor.manipulation import squeeze_  # noqa: F401
@@ -316,23 +326,15 @@ from .tensor.stat import quantile  # noqa: F401
 from .device import get_cudnn_version  # noqa: F401
 from .device import set_device  # noqa: F401
 from .device import get_device  # noqa: F401
-from .fluid.framework import is_compiled_with_cinn  # noqa: F401
-from .fluid.framework import is_compiled_with_cuda  # noqa: F401
-from .fluid.framework import is_compiled_with_rocm  # noqa: F401
-from .fluid.framework import disable_signal_handler  # noqa: F401
-from .fluid.framework import get_flags  # noqa: F401
-from .fluid.framework import set_flags  # noqa: F401
 from .device import is_compiled_with_xpu  # noqa: F401
 from .device import is_compiled_with_npu  # noqa: F401
 from .device import is_compiled_with_ipu  # noqa: F401
 from .device import is_compiled_with_mlu  # noqa: F401
+from .device import is_compiled_with_cinn  # noqa: F401
+from .device import is_compiled_with_cuda  # noqa: F401
+from .device import is_compiled_with_rocm  # noqa: F401
 from .device import XPUPlace  # noqa: F401
-from .fluid.dygraph.base import enable_dygraph as disable_static  # noqa: F401
-from .fluid.dygraph.base import disable_dygraph as enable_static  # noqa: F401
-from .fluid.framework import in_dygraph_mode as in_dynamic_mode  # noqa: F401
-from .fluid.layers import crop_tensor as crop  # noqa: F401
 # high-level api
 from .hapi import Model  # noqa: F401
 from . import callbacks  # noqa: F401

--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -39,4 +39,13 @@ from .io import save  # noqa: F401
 from .io import load  # noqa: F401
 from ..fluid.dygraph.parallel import DataParallel  # noqa: F401
+from ..fluid import monkey_patch_variable
+from ..fluid.dygraph import monkey_patch_math_varbase
+from ..fluid.framework import disable_signal_handler  # noqa: F401
+from ..fluid.framework import get_flags  # noqa: F401
+from ..fluid.framework import set_flags  # noqa: F401
+from ..fluid.dygraph.base import enable_dygraph as disable_static  # noqa: F401
+from ..fluid.dygraph.base import disable_dygraph as enable_static  # noqa: F401
+from ..fluid.framework import in_dygraph_mode as in_dynamic_mode  # noqa: F401
 __all__ = []
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -14,7 +14,6 @@
 # TODO: import all neural network related api under this directory,
 # including layers, linear, conv, rnn etc.
 from ..fluid.dygraph.layers import Layer  # noqa: F401
 from ..fluid.dygraph.container import LayerList  # noqa: F401
 from ..fluid.dygraph.container import ParameterList  # noqa: F401

--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -22,11 +22,11 @@ from ...tensor.math import multiply
 import warnings
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
+from ...fluid.framework import convert_np_dtype_to_dtype_
-from ...fluid import core
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 import paddle
-from paddle import _C_ops
+from paddle import _C_ops, in_dynamic_mode
+from paddle.framework import core
 __all__ = []
@@ -61,7 +61,7 @@ def celu(x, alpha=1.0, name=None):
    if alpha == 0:
        raise ZeroDivisionError("alpha cannot be 0 for celu")
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.celu(x, 'alpha', alpha)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu')
@@ -110,7 +110,7 @@ def elu(x, alpha=1.0, name=None):
            #  [ 1.          15.6      ]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.elu(x, 'alpha', alpha)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu')
@@ -174,7 +174,7 @@ def gelu(x, approximate=False, name=None):
            #  [ 0.84119201,  1.39957154]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.gelu(x, 'approximate', approximate)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu')
@@ -222,7 +222,7 @@ def hardshrink(x, threshold=0.5, name=None):
            out = F.hardshrink(x) # [-1., 0., 2.5]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.hard_shrink(x, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -273,7 +273,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
            out = F.hardtanh(x) # [-1., 0.3, 1.]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.brelu(x, 't_min', min, 't_max', max)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -328,7 +328,7 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
            out = F.hardsigmoid(x) # [0., 1., 0.666667]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -382,7 +382,7 @@ def hardswish(x, name=None):
            out = F.hardswish(x) # [0., 5., 0.666667]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.hard_swish(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -427,7 +427,7 @@ def leaky_relu(x, negative_slope=0.01, name=None):
            out = F.leaky_relu(x) # [-0.02, 0., 1.]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.leaky_relu(x, 'alpha', negative_slope)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -518,7 +518,7 @@ def prelu(x, weight, data_format="NCHW", name=None):
                1], "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
        mode = 'channel'
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.prelu(x, weight, 'mode', mode, 'data_format', data_format)
    helper = LayerHelper('prelu', **locals())
@@ -560,7 +560,7 @@ def relu(x, name=None):
            out = F.relu(x) # [0., 0., 1.]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.relu(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu')
@@ -605,7 +605,7 @@ def log_sigmoid(x, name=None):
            out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.logsigmoid(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -672,7 +672,7 @@ def maxout(x, groups, axis=1, name=None):
            #    [0.7142536  0.88725346 0.61093384 0.38833922]]]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.maxout(x, 'groups', groups, 'axis', axis)
    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout')
@@ -721,7 +721,7 @@ def relu6(x, name=None):
            out = F.relu6(x) # [0, 0.3, 6]
    """
    threshold = 6.0
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.relu6(x, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu6')
@@ -780,7 +780,7 @@ def selu(x,
        raise ValueError(
            "The alpha must be no less than zero. Received: {}.".format(alpha))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.selu(x, 'scale', scale, 'alpha', alpha)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'selu')
@@ -821,7 +821,7 @@ def silu(x, name=None):
            out = F.silu(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.silu(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'silu')
@@ -951,7 +951,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
        dtype = convert_np_dtype_to_dtype_(dtype)
    use_cudnn = True
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        outs_cast = x if dtype is None \
            else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
        return _C_ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn)
@@ -1026,7 +1026,7 @@ def softplus(x, beta=1, threshold=20, name=None):
            x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
            out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1081,7 +1081,7 @@ def softshrink(x, threshold=0.5, name=None):
            "The threshold must be no less than zero. Received: {}.".format(
                threshold))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.softshrink(x, 'lambda', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1122,7 +1122,7 @@ def softsign(x, name=None):
            x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
            out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.softsign(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1160,7 +1160,7 @@ def swish(x, name=None):
            out = F.swish(x) # [-0.238406, 0., 0.731059]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.swish(x, 'beta', 1.0)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'swish')
@@ -1204,7 +1204,7 @@ def mish(x, name=None):
            x = paddle.to_tensor([-5., 0., 5.])
            out = F.mish(x) # [-0.03357624, 0., 4.99955208]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.mish(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mish')
@@ -1240,7 +1240,7 @@ def tanhshrink(x, name=None):
            x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
            out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.tanh_shrink(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1286,7 +1286,7 @@ def thresholded_relu(x, threshold=1.0, name=None):
            out = F.thresholded_relu(x) # [2., 0., 0.]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.thresholded_relu(x, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1360,7 +1360,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
        dtype = convert_np_dtype_to_dtype_(dtype)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if dtype is not None:
            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
        return _C_ops.log_softmax(x, 'axis', axis)
@@ -1498,7 +1498,7 @@ def gumbel_softmax(x, temperature=1.0, hard=False, axis=-1, name=None):
            # [0.00000000, 0.00000000, 0.00000000, 0.00001258, 0.99998736, 0.00000000]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.gumbel_softmax(x, 'temperature', temperature, 'hard',
                                     hard, 'axis', axis)

--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -14,13 +14,11 @@
 import warnings
 import paddle
-from ...fluid.framework import in_dygraph_mode, default_main_program
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers.tensor import fill_constant
 from ...tensor import concat
 from ...tensor.creation import zeros
 from paddle.static import Variable
-from ...fluid.layers import core
 from ...fluid import dygraph_utils
 # TODO: define the common functions to build a neural network  
 from ...fluid.layers import unfold  # noqa: F401
@@ -30,13 +28,17 @@ from ...tensor import clip
 from ...tensor import sum
 from ...tensor import sqrt
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
-from ...fluid.framework import in_dygraph_mode, _varbase_creator
+from ...fluid.framework import _varbase_creator
-from ...fluid.framework import in_dygraph_mode
+from ...fluid import dygraph_utils
-from ...fluid import core, dygraph_utils
+from ...fluid import layers
-from ...fluid import core, layers
 from ...fluid.data_feeder import check_variable_and_dtype
 from paddle import _C_ops
+from paddle.framework import in_dynamic_mode
+from paddle.tensor.creation import full
+from paddle.framework import core
+from paddle.static import default_main_program
 __all__ = []
@@ -353,11 +355,11 @@ def interpolate(x,
    if out_shape is not None and scale is not None:
        raise ValueError("Only one of size or scale_factor should be defined.")
    if out_shape is not None:
-        if isinstance(out_shape, Variable) and not in_dygraph_mode():
+        if isinstance(out_shape, Variable) and not in_dynamic_mode():
            out_shape.stop_gradient = True
            inputs['OutSize'] = out_shape
        else:
-            if in_dygraph_mode():
+            if in_dynamic_mode():
                if isinstance(out_shape, Variable):
                    out_shape = list(out_shape.numpy())
                for i, dim in enumerate(out_shape):
@@ -428,7 +430,7 @@ def interpolate(x,
                    attrs['out_w'] = out_shape[2]
    else:
-        if in_dygraph_mode() and isinstance(scale, Variable):
+        if in_dynamic_mode() and isinstance(scale, Variable):
            scale = list(scale.numpy())
        if isinstance(scale, Variable):
            scale.stop_gradient = True
@@ -454,7 +456,7 @@ def interpolate(x,
                "Attr(scale)'s type should be float, int, list, tuple, or Tensor."
            )
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attr_list = []
        for k, v in attrs.items():
            attr_list.append(k)
@@ -719,7 +721,7 @@ def bilinear(x1, x2, weight, bias=None, name=None):
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.bilinear_tensor_product(x1, x2, weight, bias)
    check_variable_and_dtype(x1, 'x1', ['float32', 'float64'], 'bilinear')
@@ -891,7 +893,7 @@ def dropout(x,
        seed = None
        mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode  #semantic transfer
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            if default_main_program().random_seed != 0:
                seed = default_main_program().random_seed
            out, mask = _C_ops.dropout(
@@ -930,7 +932,7 @@ def dropout(x,
            attrs=attrs)
        return out
    else:  #sometimes called dropout_nd #TODO: optimize with c++
-        if not in_dygraph_mode():
+        if not in_dynamic_mode():
            check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'dropout')
        dtype = x.dtype
        keep_prob = 1 - p
@@ -943,7 +945,7 @@ def dropout(x,
            #get mask shape
            input_shape = x.shape
-            if not in_dygraph_mode():
+            if not in_dynamic_mode():
                input_shape_tensor = paddle.shape(x)
            drop_axes = [axis] if isinstance(axis, int) else list(axis)
            if min(drop_axes) < 0 or max(drop_axes) > len(input_shape) - 1:
@@ -954,7 +956,7 @@ def dropout(x,
                    "length of axis should not be greater than dimensions of x:{}, but get length of axis: {}".
                    format(len(input_shape), len(drop_axes)))
            mask_shape = [1] * len(input_shape)
-            if not in_dygraph_mode():
+            if not in_dynamic_mode():
                for i in drop_axes:
                    mask_shape[i] = input_shape_tensor[i]
            else:
@@ -964,7 +966,7 @@ def dropout(x,
            #get mask
            random_tensor = paddle.uniform(
                mask_shape, dtype='float32', min=0., max=1.0)
-            p = layers.fill_constant(shape=[1], dtype='float32', value=p)
+            p = full(shape=[1], fill_value=p, dtype='float32')
            keep_mask = paddle.greater_equal(random_tensor, p)
            scale_input = paddle.cast(scale_input, dtype)
@@ -1122,7 +1124,7 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
    if p < 0 or p > 1:
        raise ValueError("p argument should between 0 and 1")
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                 'alpha_dropout')
@@ -1142,16 +1144,15 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
        #get mask
        random_tensor = paddle.uniform(
            input_shape, dtype='float32', min=0., max=1.0)
-        p = layers.fill_constant(shape=[1], dtype='float32', value=p)
+        p = full(shape=[1], fill_value=p, dtype='float32')
        keep_mask = paddle.greater_equal(random_tensor, p)
        keep_mask = paddle.cast(keep_mask, dtype)
        drop_mask = paddle.subtract(
-            layers.fill_constant(
+            full(
-                shape=input_shape, dtype=dtype, value=1.),
+                shape=input_shape, fill_value=1., dtype=dtype), keep_mask)
-            keep_mask)
        #apply mask
-        b = layers.fill_constant(shape=[1], dtype=dtype, value=b)
+        b = full(shape=[1], fill_value=b, dtype=dtype)
        y = paddle.add(paddle.multiply(x, keep_mask),
                       paddle.scale(
                           drop_mask, scale=alpha_p))
@@ -1347,7 +1348,7 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None):
                unsqueezed_dim = [1]
                x = unsqueeze(x, axis=unsqueezed_dim)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if isinstance(pad, Variable):
            pad = pad.numpy()
        out = _C_ops.pad3d(x, "paddings", pad, "mode", mode, "value", value,
@@ -1519,7 +1520,7 @@ def linear(x, weight, bias=None, name=None):
          #     [0.9440598  0.9440598  0.9440598  0.9440598 ]
          #     [2.1077576  2.1077576  2.1077576  2.1077576 ]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pre_bias = _C_ops.matmul_v2(x, weight, 'trans_x', False, 'trans_y',
                                    False)
@@ -1614,7 +1615,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
    if epsilon > 1. or epsilon < 0.:
        raise ValueError("The value of epsilon must be between 0 and 1.")
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.label_smooth(label, prior_dist, 'epsilon', float(epsilon))
    check_variable_and_dtype(label, 'label', ['float32', 'float64'],
@@ -1765,7 +1766,7 @@ def class_center_sample(label, num_classes, num_samples, group=None):
    if (seed is None or seed == 0) and default_main_program().random_seed != 0:
        seed = default_main_program().random_seed
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        remapped_label, sampled_class_center = _C_ops.class_center_sample(
            label, 'num_classes', num_classes, 'num_samples', num_samples,
            'ring_id', ring_id, 'nranks', nranks, 'rank', rank, 'fix_seed',

--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -16,9 +16,8 @@ from paddle.fluid.framework import _global_flags
 import numpy as np
 from ...device import get_cudnn_version
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
-from ...fluid import core, dygraph_utils, get_flags
+from ...fluid import dygraph_utils
 from ...fluid.layers.utils import convert_to_list, _is_symmetric_padding
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...framework import ParamAttr
@@ -27,6 +26,11 @@ from paddle import _C_ops
 from ...tensor.manipulation import unsqueeze, squeeze
 from ...tensor.math import add
 from ...fluid.layers import nn
+from paddle.device import is_compiled_with_cuda
+from paddle.device import is_compiled_with_rocm
+from paddle.device import is_compiled_with_npu
+from paddle import in_dynamic_mode
+from paddle import get_flags
 __all__ = []
@@ -114,7 +118,7 @@ def _conv_nd(x,
             name=None):
    # Due to the poor performance of NHWC, we transpose the input to NCHW.
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
                 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn',
                 use_mkldnn, 'fuse_relu_before_depthwise_conv', False,
@@ -342,13 +346,13 @@ def conv1d(x,
    l_type = "conv2d"
    # When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance
-    if (core.is_compiled_with_cuda() and num_channels == groups and
+    if (is_compiled_with_cuda() and num_channels == groups and
            num_channels != 1 and num_filters % num_channels == 0):
        l_type = 'depthwise_conv2d'
        use_cudnn = False
    # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
-    if core.is_compiled_with_npu():
+    if is_compiled_with_npu():
        if (num_channels == groups and num_channels == num_filters):
            l_type = 'depthwise_conv2d'
        else:
@@ -357,7 +361,7 @@ def conv1d(x,
    squeeze_aixs = -3 if channel_last else -2
    x = unsqueeze(x, axis=[squeeze_aixs])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
                 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
                 'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
@@ -553,7 +557,7 @@ def conv2d(x,
    cudnn_version = get_cudnn_version()
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                         cudnn_version is not None) else False
    use_mkldnn = _global_flags()["FLAGS_use_mkldnn"]
@@ -567,20 +571,20 @@ def conv2d(x,
    if (num_channels == groups and num_channels != 1 and
            num_filters % num_channels == 0):
        l_type = 'depthwise_conv2d'
-        if core.is_compiled_with_rocm():
+        if is_compiled_with_rocm():
            use_cudnn = True
        else:
            use_cudnn = False
    # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
-    if core.is_compiled_with_npu():
+    if is_compiled_with_npu():
        if (num_channels == groups and num_channels == num_filters):
            l_type = 'depthwise_conv2d'
        else:
            l_type = 'conv2d'
-    if (core.is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")
+    if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[
-        ["FLAGS_conv2d_disable_cudnn"]):
+            "FLAGS_conv2d_disable_cudnn"]):
        use_cudnn = False
    return _conv_nd(x, weight, bias, stride, padding, padding_algorithm,
@@ -815,7 +819,7 @@ def conv1d_transpose(x,
    x = unsqueeze(x, axis=[squeeze_axis])
    weight = unsqueeze(weight, axis=[-1])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('output_padding', output_padding, 'output_size', output_size,
                 'strides', stride, 'paddings', padding, 'padding_algorithm',
                 padding_algorithm, 'dilations', dilation, 'groups', groups,
@@ -1026,7 +1030,7 @@ def conv2d_transpose(x,
    cudnn_version = get_cudnn_version()
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                         cudnn_version is not None) else False
    # update attrs
@@ -1057,7 +1061,7 @@ def conv2d_transpose(x,
        op_type = 'depthwise_conv2d_transpose'
        use_cudnn = False
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('output_padding', output_padding, 'output_size', output_size,
                 'strides', stride, 'paddings', padding, 'padding_algorithm',
                 padding_algorithm, 'dilations', dilation, 'groups', groups,
@@ -1242,7 +1246,7 @@ def conv3d(x,
                                                                  groups))
    cudnn_version = get_cudnn_version()
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                         cudnn_version is not None) else False
    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
@@ -1458,13 +1462,13 @@ def conv3d_transpose(x,
    cudnn_version = get_cudnn_version()
    #TODO(LielinJiang): whether to use cudnn according to the version of cudnn
-    use_cudnn = True if (core.is_compiled_with_cuda() and
+    use_cudnn = True if (is_compiled_with_cuda() and
                         cudnn_version is not None) else False
    op_type = 'conv3d_transpose'
    data_format_ = "NHWC" if channel_last else "NCHW"
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('output_padding', output_padding, 'output_size', output_size,
                 'paddings', padding, "padding_algorithm", padding_algorithm,
                 'strides', stride, 'dilations', dilation, 'groups', groups,

--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -17,12 +17,12 @@
 import numpy as np
 from ...fluid.data_feeder import check_dtype
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
 from ...tensor.creation import assign
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 from ...fluid.layers.layer_function_generator import templatedoc
-from ...fluid.layers.sequence_lod import sequence_mask
+from ...fluid.layers.sequence_lod import sequence_mask  #noqa: F401
+from paddle import in_dynamic_mode
 __all__ = []
@@ -125,7 +125,7 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1):
               "dim1 and dim2 cannot be the same dimension." \
                "But received dim1 = %d, dim2 = %d\n"%(dim1, dim2)
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        __check_input(input, offset, dim1, dim2)
    helper = LayerHelper("diag_embed", **locals())

--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -14,12 +14,11 @@
 from __future__ import print_function
 import warnings
-from ...fluid.framework import in_dygraph_mode
 from ...static import Variable
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.layers import core
 from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 __all__ = []
@@ -87,7 +86,7 @@ def one_hot(x, num_classes, name=None):
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.one_hot_v2(x, 'depth', num_classes, 'allow_out_of_range',
                                 False)
    else:
@@ -196,7 +195,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
        raise ValueError("padding_idx must be within [-{}, {})".format(
            weight.shape[0], weight.shape[0]))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.lookup_table_v2(
            weight, x, 'is_sparse', sparse, 'is_distributed', False,
            'remote_prefetch', False, 'padding_idx', padding_idx)

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -14,15 +14,12 @@
 # limitations under the License.
 import paddle
-from ...fluid.layer_helper import LayerHelper
 from ...fluid.data_feeder import check_variable_and_dtype
-import paddle.fluid as fluid
 # TODO: define loss functions of neural network
 import numpy as np
 import paddle
 import paddle.fluid as fluid
-from ...fluid.framework import core, in_dygraph_mode
 from ...fluid.layers.nn import _elementwise_op_in_dygraph
 from ...fluid.layers import dice_loss  # noqa: F401
 from ...fluid.layers import log_loss  # noqa: F401
@@ -34,11 +31,12 @@ from ...fluid.layers import square_error_cost  # noqa: F401
 from ...fluid.layers import edit_distance  # noqa: F401
 from ...fluid.layers import huber_loss
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from ...fluid.framework import _varbase_creator
 from ...static import Variable
 from paddle.utils import deprecated
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.framework import core
 __all__ = []
@@ -115,7 +113,7 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean',
            "'mean' or 'none', but received %s, which is not allowed." %
            reduction)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        out = _C_ops.bce_loss(input, label)
        if weight is not None:
            out = _C_ops.elementwise_mul(out, weight, 'axis', -1)
@@ -249,7 +247,7 @@ def binary_cross_entropy_with_logits(logit,
            "should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
            % reduction)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        one = _varbase_creator(dtype=logit.dtype)
        _C_ops.fill_constant(one, 'value',
                             float(1.0), 'force_cpu', False, 'dtype', one.dtype,
@@ -284,8 +282,7 @@ def binary_cross_entropy_with_logits(logit,
    out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
        logit, label, name=sigmoid_name)
-    one = paddle.fluid.layers.fill_constant(
+    one = paddle.full(shape=[1], fill_value=1.0, dtype=logit.dtype)
-        shape=[1], value=1.0, dtype=logit.dtype)
    if pos_weight is not None:
        fluid.data_feeder.check_variable_and_dtype(
            pos_weight, 'pos_weight', ['float32', 'float64'],
@@ -392,7 +389,7 @@ def hsigmoid_loss(input,
            #  [2.2407534]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        out, _, _ = _C_ops.hierarchical_sigmoid(
            input, weight, label, path_table, path_code, bias, 'num_classes',
            num_classes, 'is_sparse', is_sparse, 'remote_prefetch', is_sparse)
@@ -569,7 +566,7 @@ def margin_ranking_loss(input,
        raise ValueError(
            "The value of 'reduction' in MarginRankingLoss should be 'sum', 'mean' or 'none', but "
            "received %s, which is not allowed." % reduction)
-    if fluid.framework.in_dygraph_mode():
+    if in_dynamic_mode():
        out = _C_ops.elementwise_sub(other, input)
        out = _C_ops.elementwise_mul(out, label)
        if margin != 0.0:
@@ -595,8 +592,7 @@ def margin_ranking_loss(input,
    if margin != 0.0:
        margin_var = out.block.create_var(dtype=out.dtype)
-        paddle.fluid.layers.fill_constant(
+        margin_var = paddle.full(shape=[1], fill_value=margin, dtype=out.dtype)
-            [1], out.dtype, margin, out=margin_var)
        out = paddle.add(out, margin_var)
    result_out = helper.create_variable_for_type_inference(input.dtype)
@@ -686,7 +682,7 @@ def l1_loss(input, label, reduction='mean', name=None):
            "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
            "received %s, which is not allowed." % reduction)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        unreduced = _elementwise_op_in_dygraph(
            input, label, axis=-1, act='abs', op_name='elementwise_sub')
        if reduction == 'mean':
@@ -776,7 +772,7 @@ def nll_loss(input,
            input_dims))
    n = input_shape[0]
    c = input_shape[1]
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if input_dims != 2 and input_dims != 4:
            input, _ = _C_ops.reshape2(input, None, 'shape', [n, c, 1, -1])
            label, _ = _C_ops.reshape2(label, None, 'shape', [n, 1, -1])
@@ -995,7 +991,7 @@ def mse_loss(input, label, reduction='mean', name=None):
            "'reduction' in 'mse_loss' should be 'sum', 'mean' or 'none', "
            "but received {}.".format(reduction))
-    if not paddle.fluid.framework.in_dygraph_mode():
+    if not in_dynamic_mode():
        paddle.fluid.data_feeder.check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'mse_loss')
        paddle.fluid.data_feeder.check_variable_and_dtype(
@@ -1099,7 +1095,7 @@ def ctc_loss(log_probs,
    loss_out = fluid.layers.warpctc(log_probs, labels, blank, norm_by_times,
                                    input_lengths, label_lengths)
-    loss_out = fluid.layers.squeeze(loss_out, [-1])
+    loss_out = paddle.squeeze(loss_out, [-1])
    assert reduction in ['mean', 'sum', 'none']
    if reduction == 'mean':
        loss_out = paddle.mean(loss_out / label_lengths)
@@ -1319,7 +1315,7 @@ def margin_cross_entropy(logits,
    if input_dims - 1 == label_dims:
        label = paddle.unsqueeze(label, axis=-1)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        softmax, loss = _C_ops.margin_cross_entropy(
            logits, label, 'ring_id', ring_id, 'rank', rank, 'nranks', nranks,
            'margin1', margin1, 'margin2', margin2, 'margin3', margin3, 'scale',
@@ -1664,7 +1660,7 @@ def cross_entropy(input,
             (got nput_dims{}, label_dims{})'.format(input_dims, label_dims))
    if input_dims - 1 == label_dims:
        label = paddle.unsqueeze(label, axis=axis)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if soft_label == False:
            valid_label = paddle.cast(
                label != ignore_index, dtype=label.dtype) * label
@@ -1978,7 +1974,7 @@ def sigmoid_focal_loss(logit,
                "Expected one dimension of normalizer in sigmoid_focal_loss but got {}.".
                format(normalizer_dims))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        one = _varbase_creator(dtype=logit.dtype)
        _C_ops.fill_constant(one, 'value',
                             float(1.0), 'force_cpu', False, 'dtype', one.dtype,
@@ -2025,7 +2021,7 @@ def sigmoid_focal_loss(logit,
    loss = paddle.nn.functional.binary_cross_entropy_with_logits(
        logit, label, reduction='none', name=bce_name)
-    pred = fluid.layers.sigmoid(logit)
+    pred = paddle.nn.functional.sigmoid(logit)
    p_t = pred * label + (1 - pred) * (1 - label)
    alpha_t = alpha * label + (1 - alpha) * (1 - label)
@@ -2125,7 +2121,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None):
            "'reduction' in 'hinge_embedding_loss' should be 'sum', 'mean' or 'none', "
            "but received {}.".format(reduction))
-    if not paddle.fluid.framework.in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                 'hinge_embedding_loss')
        check_variable_and_dtype(label, 'label', ['float32', 'float64'],

--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -17,13 +17,13 @@ import paddle
 import paddle.fluid as fluid
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode, core
 from ...framework import create_parameter
 from ..initializer import Constant
 from ...framework import ParamAttr
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 import numbers
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 __all__ = []
@@ -78,7 +78,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
            # [[0.         0.24253564 0.37139067]
            # [1.         0.97014254 0.9284767 ]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        eps = fluid.dygraph.base.to_variable([epsilon], dtype=x.dtype)
        out = _C_ops.p_norm(x, 'axis', axis, 'porder',
                            float(p), 'keepdim', True, 'epsilon', epsilon)
@@ -104,7 +104,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
    helper.append_op(
        type='p_norm', inputs={'X': x}, outputs={'Out': out}, attrs=attrs)
    eps = out.block.create_var(dtype=out.dtype)
-    paddle.fluid.layers.fill_constant([1], out.dtype, epsilon, out=eps)
+    eps = paddle.full(shape=[1], fill_value=epsilon, dtype=out.dtype)
    return paddle.divide(x, paddle.maximum(out, eps), name=name)
@@ -180,7 +180,7 @@ def batch_norm(x,
    else:
        trainable_statistics = not use_global_stats
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        # for dygraph need tuple
        attrs = ("momentum", momentum, "epsilon", epsilon, "is_test",
                 not training, "data_layout", data_format, "use_mkldnn", False,
@@ -301,7 +301,7 @@ def layer_norm(x,
                         str_normalized_shape[
                             1:] + ', but got input shape ' + str(input_shape))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pre_act, _, _ = _C_ops.layer_norm(x, weight, bias, 'epsilon', epsilon,
                                          'begin_norm_axis', begin_norm_axis)
        return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
@@ -385,7 +385,7 @@ def instance_norm(x,
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        out, _, _ = _C_ops.instance_norm(x, weight, bias, "epsilon", eps,
                                         "momentum", momentum, "data_format",
                                         data_format)
@@ -474,7 +474,7 @@ def local_response_norm(x,
            y = paddle.nn.functional.local_response_norm(x, size=5)
            print(y.shape)  # [3, 3, 112, 112]
        """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32'], 'local_response_norm')
    if data_format not in ['NCL', 'NLC', 'NCHW', 'NHWC', 'NCDHW', 'NDHWC']:
        raise ValueError(

--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 # TODO: define pooling functions
-from ...fluid import core
-from ...fluid.framework import in_dygraph_mode
 from ...fluid.layers import utils, LayerHelper
 from ...tensor.manipulation import unsqueeze, squeeze
 from ...fluid.data_feeder import check_type, check_variable_and_dtype
 from paddle import _C_ops
-from paddle import _C_ops
+from paddle import in_dynamic_mode
 __all__ = []
@@ -210,7 +208,7 @@ def avg_pool1d(x,
    """
    """NCL to NCHW"""
    data_format = "NCHW"
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d')
    _check_input(x, 3)
    x = unsqueeze(x, [2])
@@ -232,7 +230,7 @@ def avg_pool1d(x,
    # use 2d to implenment 1d should expand padding in advance.
    padding = _expand_low_nd_padding(padding)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.pool2d(
            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
            False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
@@ -346,7 +344,7 @@ def avg_pool2d(x,
    padding, padding_algorithm = _update_padding_nd(
        padding, 2, channel_last, ceil_mode=ceil_mode)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', kernel_size,
                               'global_pooling', False, 'padding_algorithm',
                               padding_algorithm, 'strides', stride, 'paddings',
@@ -468,7 +466,7 @@ def avg_pool3d(x,
    _check_value_limitation(kernel_size, "kernel_size", min_limit=1e-3)
    _check_value_limitation(stride, "stride", min_limit=1e-3)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.pool3d(
            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
            'paddings', padding, 'global_pooling', False, 'padding_algorithm',
@@ -571,7 +569,7 @@ def max_pool1d(x,
    """
    """NCL to NCHW"""
    data_format = "NCHW"
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d')
    _check_input(x, 3)
    x = unsqueeze(x, [2])
@@ -587,7 +585,7 @@ def max_pool1d(x,
    # use 2d to implenment 1d should expand padding in advance.
    padding = _expand_low_nd_padding(padding)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if return_mask:
            pool_out = _C_ops.max_pool2d_with_index(
                x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
@@ -746,7 +744,7 @@ def max_unpool1d(x,
    output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                      output_size)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                               kernel_size, 'strides', stride, 'paddings',
                               padding, "output_size", output_size,
@@ -861,7 +859,7 @@ def max_unpool2d(x,
    output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                      output_size)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                               kernel_size, 'strides', stride, 'paddings',
                               padding, "output_size", output_size,
@@ -973,7 +971,7 @@ def max_unpool3d(x,
    output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                      output_size)
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.unpool3d(x, indices, 'unpooling_type', 'max', 'ksize',
                                 kernel_size, 'strides', stride, 'paddings',
                                 padding, "output_size", output_size,
@@ -1029,7 +1027,7 @@ def max_pool2d(x,
            "When setting return_mask to true, data_format must be set to NCHW in API:max_pool2d"
        )
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if return_mask:
            output = _C_ops.max_pool2d_with_index(
                x, 'ksize', kernel_size, 'global_pooling', False, 'strides',
@@ -1160,7 +1158,7 @@ def max_pool3d(x,
            "When setting return_mask to true, data_format must be set to NCDHW in API:max_pool3d"
        )
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        if return_mask:
            output = _C_ops.max_pool3d_with_index(
                x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides',
@@ -1250,7 +1248,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
              # pool_out shape: [1, 3, 16])
    """
    pool_type = 'avg'
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'adaptive_pool2d')
        check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
@@ -1258,7 +1256,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
    x = unsqueeze(x, [2])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pool_out = _C_ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
                                 pool_size, 'adaptive', True)
        return squeeze(pool_out, [2])
@@ -1333,7 +1331,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
                            output_size=[3, 3])
            # out.shape is [2, 3, 3, 3]
    """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'adaptive_avg_pool2d')
        check_type(data_format, 'data_format', str, 'adaptive_avg_pool2d')
@@ -1357,7 +1355,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
        if output_size[1] == None:
            output_size[1] = in_w
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', output_size,
                               'global_pooling', False, 'adaptive', True,
                               'data_format', data_format)
@@ -1437,7 +1435,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
                            output_size=[3, 3, 3])
            # out.shape is [2, 3, 3, 3, 3]
    """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                 'adaptive_avg_pool3d')
        check_type(data_format, 'data_format', str, 'adaptive_avg_pool3d')
@@ -1463,7 +1461,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
        if output_size[2] == None:
            output_size[2] = in_w
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        output = _C_ops.pool3d(x, 'pooling_type', 'avg', 'ksize', output_size,
                               'global_pooling', False, 'adaptive', True,
                               'data_format', data_format)
@@ -1537,7 +1535,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
              # pool_out shape: [1, 3, 16] indices  shape: [1, 3, 16]
    """
    pool_type = 'max'
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                 'adaptive_max_pool1d')
        check_type(output_size, 'pool_size', int, 'adaptive_max_pool1d')
@@ -1547,7 +1545,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
    x = unsqueeze(x, [2])
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pool_out = _C_ops.max_pool2d_with_index(
            x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
        return (squeeze(pool_out[0], [2]), squeeze(
@@ -1619,7 +1617,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
                            output_size=[3, 3])
              # out.shape is [2, 3, 3, 3]
    """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                 'adaptive_max_pool2d')
        check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool2d')
@@ -1636,7 +1634,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
        if output_size[1] == None:
            output_size[1] = in_w
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pool_out = _C_ops.max_pool2d_with_index(
            x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
        return pool_out if return_mask else pool_out[0]
@@ -1710,7 +1708,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
              # out.shape is [2, 3, 3, 3, 3]
    """
-    if not in_dygraph_mode():
+    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
                                 'adaptive_max_pool3d')
        check_type(return_mask, 'return_mask', bool, 'adaptive_max_pool3d')
@@ -1729,7 +1727,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
        if output_size[2] == None:
            output_size[2] = in_w
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        pool_out = _C_ops.max_pool3d_with_index(
            x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
        return pool_out if return_mask else pool_out[0]

--- a/python/paddle/nn/functional/sparse_attention.py
+++ b/python/paddle/nn/functional/sparse_attention.py
@@ -14,10 +14,10 @@
 import warnings
 import paddle
-from ...fluid.framework import in_dygraph_mode, default_main_program
+from ...fluid.framework import default_main_program
 from paddle.fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 def sparse_attention(query,
@@ -143,7 +143,7 @@ def sparse_attention(query,
            #       [1.60885942, 2.60885954],
            #       [1.99830270, 2.99830270]]]]
    """
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        result_attention, result_sdd, result_softmax = _C_ops.sparse_attention(
            query, key, value, sparse_csr_offset, sparse_csr_columns,
            key_padding_mask, attn_mask)

--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 from ...device import get_cudnn_version
-from ...fluid.framework import core, in_dygraph_mode
 from ...static import Variable
 from ...fluid.layer_helper import LayerHelper
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid import dygraph_utils
 import numpy as np
 from paddle import _C_ops
+from ...device import is_compiled_with_rocm
+from paddle import in_dynamic_mode
 __all__ = []
@@ -83,14 +84,14 @@ def affine_grid(theta, out_shape, align_corners=True, name=None):
        use_cudnn = True
    else:
        use_cudnn = False
-    if core.is_compiled_with_rocm():
+    if is_compiled_with_rocm():
        use_cudnn = False  # ROCM platform do not have MIOPEN kernel for affine_grid
    if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \
            isinstance(out_shape, Variable)):
        raise ValueError("The out_shape should be a list, tuple or Tensor.")
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        _out_shape = out_shape.numpy().tolist() if isinstance(
            out_shape, Variable) else out_shape
        return _C_ops.affine_grid(theta, "output_shape", _out_shape,
@@ -263,7 +264,7 @@ def grid_sample(x,
    cudnn_version = get_cudnn_version()
    use_cudnn = False
-    if not core.is_compiled_with_rocm() and (
+    if not is_compiled_with_rocm() and (
            cudnn_version is not None
    ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros':
        use_cudnn = True
@@ -271,7 +272,7 @@ def grid_sample(x,
        x.stop_gradient = False
        grid.stop_gradient = False
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners',
                 align_corners, 'use_cudnn', use_cudnn)
        out = getattr(_C_ops, 'grid_sampler')(x, grid, *attrs)
@@ -329,7 +330,7 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
                         "But recevie Attr(data_format): {} ".format(
                             data_format))
-    if in_dygraph_mode():
+    if in_dynamic_mode():
        return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
                                    "data_format", data_format)

--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@@ -11,11 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import paddle
-from ...fluid import framework
-from ...fluid import core
-from ...fluid import unique_name
-from ...fluid.core import VarDesc
 from ...fluid.data_feeder import check_type
 from ...fluid.initializer import NumpyArrayInitializer
@@ -88,13 +84,14 @@ class Assign(NumpyArrayInitializer):
    def __init__(self, value, name=None):
        import numpy
        check_type(value, 'value',
-                   (numpy.ndarray, list, tuple, framework.Variable), 'Assign')
+                   (numpy.ndarray, list, tuple, paddle.static.Variable),
+                   'Assign')
        if (isinstance(value, (list, tuple))):
            value = numpy.array(value)
        # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
-        if (isinstance(value, framework.Variable)):
+        if (isinstance(value, paddle.static.Variable)):
            value = value.numpy()
        super(Assign, self).__init__(value)
--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -15,7 +15,9 @@
 from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.core import VarDesc
-from ...fluid import unique_name, framework
+from ...fluid import framework
+from paddle import in_dynamic_mode
+from paddle.utils import unique_name
 __all__ = []
@@ -221,6 +223,6 @@ class Dirac(Initializer):
                       "out_dtype": var.dtype},
                stop_gradient=True)
-        if not framework.in_dygraph_mode():
+        if not in_dynamic_mode():
            var.op = op
        return op
--- a/python/paddle/nn/initializer/orthogonal.py
+++ b/python/paddle/nn/initializer/orthogonal.py
@@ -14,9 +14,9 @@
 from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid.core import VarDesc
+from ...fluid import framework
-from ...fluid import unique_name, framework
 from ...tensor import diag, transpose, sign, qr, reshape
+from paddle.utils import unique_name
 __all__ = []

--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -14,8 +14,6 @@
 # TODO: define activation functions of neural network
-from ...fluid import core
-from ...fluid.framework import in_dygraph_mode
 from ...framework import ParamAttr
 from ..initializer import Constant
 from paddle.framework import get_default_dtype

--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -15,10 +15,10 @@
 # TODO: define the common classes to build a neural network
 import paddle
 from ...fluid.dygraph import Flatten  # noqa: F401
-from ...fluid.framework import in_dygraph_mode
 from .. import functional as F
 from ...fluid.framework import _dygraph_tracer
 from paddle.nn import Layer
+from paddle import in_dynamic_mode
 __all__ = []
@@ -1456,7 +1456,7 @@ class Embedding(Layer):
            dtype=self._dtype,
            is_bias=False)
-        if in_dygraph_mode() and padding_idx != -1:
+        if in_dynamic_mode() and padding_idx != -1:
            with paddle.no_grad():
                self.weight[padding_idx] = 0.0

--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -16,14 +16,15 @@
 import numpy as np
-from ...fluid import get_flags
+from paddle import get_flags
-from ...fluid import core
 from ...device import get_cudnn_version
 from .. import Layer
 from ..initializer import Normal
 from .. import functional as F
 from ...fluid.layers import utils
 from ..functional.conv import _update_padding_nd
+from ...device import is_compiled_with_cuda
+from ...device import is_compiled_with_rocm
 __all__ = []
@@ -138,7 +139,7 @@ class _ConvNd(Layer):
        cudnn_version = get_cudnn_version()
-        self._use_cudnn = True if (core.is_compiled_with_cuda() and
+        self._use_cudnn = True if (is_compiled_with_cuda() and
                                   cudnn_version is not None) else False
        self._op_type = "conv" + str(dims) + 'd'
@@ -146,13 +147,13 @@ class _ConvNd(Layer):
                                          in_channels != 1 and
                                          out_channels % in_channels == 0):
            self._op_type = 'depthwise_conv2d'
-            if core.is_compiled_with_rocm():
+            if is_compiled_with_rocm():
                self._use_cudnn = True
            else:
                self._use_cudnn = False
-        if (core.is_compiled_with_cuda() and get_flags(
+        if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[
-                "FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
+                "FLAGS_conv2d_disable_cudnn"]):
            self._use_cudnn = False
    def extra_repr(self):

--- a/python/paddle/nn/layer/distance.py
+++ b/python/paddle/nn/layer/distance.py
@@ -16,10 +16,10 @@ import numpy as np
 import paddle
 from .. import Layer
-from ...fluid.framework import core, in_dygraph_mode
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
 from ...fluid.layer_helper import LayerHelper
 from paddle import _C_ops
+from paddle import in_dynamic_mode
 __all__ = []
@@ -78,7 +78,7 @@ class PairwiseDistance(Layer):
        check_type(self.keepdim, 'keepdim', (bool), 'PairwiseDistance')
    def forward(self, x, y):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            sub = _C_ops.elementwise_sub(x, y)
            return _C_ops.p_norm(sub, 'axis', 1, 'porder', self.p, 'keepdim',
                                 self.keepdim, 'epsilon', self.epsilon)

--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -16,11 +16,11 @@
 # TODO: define loss functions of neural network
 import numpy as np
 import paddle.fluid as fluid
-import paddle.fluid.core as core
 import paddle
 from .. import functional as F
-from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator
+from paddle.fluid.framework import _varbase_creator
 from .. import Layer
+from paddle import in_dynamic_mode
 __all__ = []
@@ -591,7 +591,7 @@ class MSELoss(Layer):
        self.reduction = reduction
    def forward(self, input, label):
-        if not fluid.framework.in_dygraph_mode():
+        if not in_dynamic_mode():
            fluid.data_feeder.check_variable_and_dtype(
                input, 'input', ['float32', 'float64'], 'MSELoss')
            fluid.data_feeder.check_variable_and_dtype(

--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -33,12 +33,11 @@ from ...fluid.dygraph import BatchNorm  # noqa: F401
 from ...fluid.dygraph import SpectralNorm  # noqa: F401
 from ...framework import get_default_dtype, set_default_dtype
-from ...fluid.framework import in_dygraph_mode
 from ..initializer import Constant
 from ...framework import ParamAttr
 from ...fluid.data_feeder import check_variable_and_dtype, check_type
-from ...fluid import core, dygraph_utils
+from ...fluid import dygraph_utils
 from ..functional import batch_norm, layer_norm, instance_norm
@@ -49,6 +48,7 @@ from ...framework import no_grad
 from .. import functional as F
 from paddle import _C_ops
 from .. import Layer
+from paddle import in_dynamic_mode
 __all__ = []
@@ -1087,7 +1087,7 @@ class SyncBatchNorm(_BatchNormBase):
        ### train mode: use mini-batch stats, eval mode: use global stats
        ### use_global_stats only support False in sync_batch_norm
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
                     "is_test", not self.training, "data_layout",
                     self._data_format, "use_mkldnn", False, "fuse_with_relu",

--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -33,6 +33,11 @@ from paddle.fluid.layers import utils
 from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
 from paddle.fluid.data_feeder import convert_dtype
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.framework import core
+from paddle.static import default_startup_program
+from paddle.static import program_guard
 __all__ = []
@@ -970,8 +975,8 @@ class RNNBase(LayerList):
            # dropout state may also can be hided and avoid saving
            # should dropout state be persistable for static-graph
            self._dropout_state = self.create_variable(
-                dtype=fluid.core.VarDesc.VarType.UINT8)
+                dtype=core.VarDesc.VarType.UINT8)
-            if fluid.framework.in_dygraph_mode():
+            if in_dynamic_mode():
                with paddle.no_grad():
                    _C_ops.coalesce_tensor(self._all_weights, self._all_weights,
                                           self._flat_weight[0], "copy_data",
@@ -979,8 +984,8 @@ class RNNBase(LayerList):
                                           params[0].dtype)
                    return
            # for static-graph, append coalesce_tensor into startup program
-            with fluid.program_guard(fluid.default_startup_program(),
+            with program_guard(default_startup_program(),
-                                     fluid.default_startup_program()):
+                               default_startup_program()):
                with paddle.no_grad():
                    self._helper.append_op(
                        type="coalesce_tensor",
@@ -999,7 +1004,7 @@ class RNNBase(LayerList):
        if not self.time_major:
            inputs = paddle.tensor.transpose(inputs, [1, 0, 2])
-        if fluid.framework.in_dygraph_mode():
+        if in_dynamic_mode():
            _, _, out, state = _C_ops.rnn(
                inputs, initial_states, self._all_weights, sequence_length,
                self._dropout_state, self.state_components, 'dropout_prob',
@@ -1014,7 +1019,7 @@ class RNNBase(LayerList):
                for i in range(self.state_components)
            ]
            reserve = self._helper.create_variable_for_type_inference(
-                dtype=fluid.core.VarDesc.VarType.UINT8, stop_gradient=True)
+                dtype=core.VarDesc.VarType.UINT8, stop_gradient=True)
            inputs = {
                'Input': inputs,

--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ...fluid.dygraph import layers
 from ...tensor import math, manipulation
+from .. import Layer
 __all__ = []
-class FloatFunctionalLayer(layers.Layer):
+class FloatFunctionalLayer(Layer):
    def __init__(self):
        super(FloatFunctionalLayer, self).__init__()

--- a/python/paddle/nn/quant/quant_layers.py
+++ b/python/paddle/nn/quant/quant_layers.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle.fluid.dygraph import layers
+from paddle.framework import core
-from paddle.fluid import core
 from paddle.fluid import dygraph_utils
-from paddle.fluid import unique_name
+from paddle.utils import unique_name
-from paddle.fluid.param_attr import ParamAttr
+from paddle.framework import ParamAttr
 from paddle.fluid.framework import _varbase_creator
-from paddle.fluid.framework import in_dygraph_mode
+from paddle.nn.initializer import Constant
-from paddle.fluid.initializer import Constant
 from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.nn import functional as F
 import logging
 from paddle.fluid.log_helper import get_logger
 from paddle import _C_ops
+from paddle import in_dynamic_mode
+from paddle.nn import Layer
 __all__ = [
    'FakeQuantAbsMax',
@@ -43,7 +43,7 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-class FakeQuantAbsMax(layers.Layer):
+class FakeQuantAbsMax(Layer):
    r"""
    FakeQuantAbsMax layer does the abs_max quant and then dequant.
    Its computational formula is described as below:
@@ -76,7 +76,7 @@ class FakeQuantAbsMax(layers.Layer):
            self._scale = None
    def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            attrs = ('bit_length', self._quant_bits)
            quant_out = _varbase_creator(
                type=input.type,
@@ -125,7 +125,7 @@ class FakeQuantAbsMax(layers.Layer):
        return quant_out
-class FakeQuantMovingAverageAbsMax(layers.Layer):
+class FakeQuantMovingAverageAbsMax(Layer):
    r"""
    FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
    Its computational formula is described as below:
@@ -175,7 +175,7 @@ class FakeQuantMovingAverageAbsMax(layers.Layer):
        self._accum.stop_gradient = True
    def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            attrs = ('moving_rate', self._moving_rate, 'bit_length',
                     self._quant_bits, 'is_test', not self.training)
            quant_out = _varbase_creator(
@@ -223,7 +223,7 @@ class FakeQuantMovingAverageAbsMax(layers.Layer):
        return quant_out
-class FakeQuantChannelWiseAbsMax(layers.Layer):
+class FakeQuantChannelWiseAbsMax(Layer):
    def __init__(self,
                 name=None,
                 channel_num=None,
@@ -253,7 +253,7 @@ class FakeQuantChannelWiseAbsMax(layers.Layer):
            self._scale = None
    def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            attrs = ('bit_length', self._quant_bits, 'quant_axis',
                     self._quant_axis)
            quant_out = _varbase_creator(
@@ -306,7 +306,7 @@ class FakeQuantChannelWiseAbsMax(layers.Layer):
        return quant_out
-class MovingAverageAbsMaxScale(layers.Layer):
+class MovingAverageAbsMaxScale(Layer):
    def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
        r"""
        MovingAverageMaxScale layer is used to calculating the output quantization
@@ -345,7 +345,7 @@ class MovingAverageAbsMaxScale(layers.Layer):
        self._accum.stop_gradient = True
    def forward(self, input):
-        if in_dygraph_mode():
+        if in_dynamic_mode():
            attrs = ('moving_rate', self._moving_rate, 'is_test',
                     not self.training)
            state = self._state if self.training else None
@@ -393,7 +393,7 @@ class MovingAverageAbsMaxScale(layers.Layer):
 QuantStub = MovingAverageAbsMaxScale
-class QuantizedConv2D(layers.Layer):
+class QuantizedConv2D(Layer):
    """
    The computational logic of QuantizedConv2D is the same with Conv2D.
    The only difference is that its inputs are all fake quantized.
@@ -482,7 +482,7 @@ class QuantizedConv2D(layers.Layer):
            data_format=self._data_format)
-class QuantizedConv2DTranspose(layers.Layer):
+class QuantizedConv2DTranspose(Layer):
    """
    The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
    The only difference is that its inputs are all fake quantized.
@@ -588,7 +588,7 @@ class QuantizedConv2DTranspose(layers.Layer):
            data_format=self._data_format)
-class QuantizedLinear(layers.Layer):
+class QuantizedLinear(Layer):
    """
    The computational logic of QuantizedLinear is the same with Linear.
    The only difference is that its inputs are all fake quantized.
@@ -657,7 +657,7 @@ class QuantizedLinear(layers.Layer):
        return out
-class MAOutputScaleLayer(layers.Layer):
+class MAOutputScaleLayer(Layer):
    """
    Add MovingAverageMaxScale layer to the behind of the input layer.
    Calculate the scale (moving average abs max) for the output of the input layer.
@@ -684,7 +684,7 @@ class MAOutputScaleLayer(layers.Layer):
            return self._ma_output_scale(out)
-class FakeQuantMAOutputScaleLayer(layers.Layer):
+class FakeQuantMAOutputScaleLayer(Layer):
    """
    Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
    """

--- a/python/paddle/nn/utils/weight_norm_hook.py
+++ b/python/paddle/nn/utils/weight_norm_hook.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import paddle
 import numpy as np
 from ... import fluid
 from ...fluid import dygraph
@@ -39,25 +39,25 @@ def l2_norm(x, axis, epsilon=1e-12, name=None):
            "axis": 1 if axis is None else axis,
            "epsilon": epsilon,
        })
-    return F.squeeze(norm, axes=[axis])
+    return paddle.squeeze(norm, axis=[axis])
 def norm_except_dim(p, dim):
    shape = p.shape
    ndims = len(shape)
    if dim == -1:
-        return F.sqrt(F.reduce_sum(F.square(p)) + 1e-12)
+        return paddle.sqrt(paddle.sum(paddle.square(p)) + 1e-12)
    elif dim == 0:
-        p_matrix = F.reshape(p, (shape[0], -1))
+        p_matrix = paddle.reshape(p, (shape[0], -1))
        return l2_norm(p_matrix, axis=1)
    elif dim == ndims - 1:
-        p_matrix = F.reshape(p, (-1, shape[-1]))
+        p_matrix = paddle.reshape(p, (-1, shape[-1]))
        return l2_norm(p_matrix, axis=0)
    else:
        perm = list(range(ndims))
        perm[0] = dim
        perm[dim] = 0
-        p_transposed = F.transpose(p, perm)
+        p_transposed = paddle.transpose(p, perm)
        return norm_except_dim(p_transposed, 0)
@@ -66,25 +66,25 @@ def _weight_norm(v, g, dim):
    ndims = len(shape)
    if dim == -1:
-        v_normalized = v / (F.sqrt(F.reduce_sum(F.square(v))) + 1e-12)
+        v_normalized = v / (paddle.sqrt(paddle.sum(paddle.square(v))) + 1e-12)
    elif dim == 0:
-        p_matrix = F.reshape(v, (shape[0], -1))
+        p_matrix = paddle.reshape(v, (shape[0], -1))
        v_normalized = F.l2_normalize(p_matrix, axis=1)
-        v_normalized = F.reshape(v_normalized, shape)
+        v_normalized = paddle.reshape(v_normalized, shape)
    elif dim == ndims - 1:
-        p_matrix = F.reshape(v, (-1, shape[-1]))
+        p_matrix = paddle.reshape(v, (-1, shape[-1]))
        v_normalized = F.l2_normalize(p_matrix, axis=0)
-        v_normalized = F.reshape(v_normalized, shape)
+        v_normalized = paddle.reshape(v_normalized, shape)
    else:
        perm = list(range(ndims))
        perm[0] = dim
        perm[dim] = 0
-        p_transposed = F.transpose(v, perm)
+        p_transposed = paddle.transpose(v, perm)
        transposed_shape = p_transposed.shape
-        p_matrix = F.reshape(p_transposed, (p_transposed.shape[0], -1))
+        p_matrix = paddle.reshape(p_transposed, (p_transposed.shape[0], -1))
        v_normalized = F.l2_normalize(p_matrix, axis=1)
-        v_normalized = F.reshape(v_normalized, transposed_shape)
+        v_normalized = paddle.reshape(v_normalized, transposed_shape)
-        v_normalized = F.transpose(v_normalized, perm)
+        v_normalized = paddle.transpose(v_normalized, perm)
    weight = F.elementwise_mul(
        v_normalized, g, axis=dim if dim is not None else -1)
    return weight
@@ -130,9 +130,9 @@ class WeightNorm(object):
        layer.add_parameter(name + "_v", v)
        g = layer.create_parameter(g_var.shape, dtype=g_var.dtype)
        layer.add_parameter(name + '_g', g)
-        with dygraph.no_grad():
+        with paddle.no_grad():
-            F.assign(w, v)
+            paddle.assign(w, v)
-            F.assign(g_var, g)
+            paddle.assign(g_var, g)
        setattr(layer, name, fn.compute_weight(layer))
        layer.register_forward_pre_hook(fn)
@@ -145,8 +145,8 @@ class WeightNorm(object):
        del layer._parameters[self.name + '_v']
        w = layer.create_parameter(w_var.shape, dtype=w_var.dtype)
        layer.add_parameter(self.name, w)
-        with dygraph.no_grad():
+        with paddle.no_grad():
-            F.assign(w_var, w)
+            paddle.assign(w_var, w)
    def __call__(self, layer, inputs):
        setattr(layer, self.name, self.compute_weight(layer))

--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -30,6 +30,7 @@ from ..fluid.layers import unstack  # noqa: F401
 from ..fluid.layers import scatter_nd  # noqa: F401
 from ..fluid.layers import shard_index  # noqa: F401
+from ..fluid.layers import crop_tensor as crop  # noqa: F401
 from ..fluid.layers.nn import _elementwise_op_in_dygraph
 from ..fluid import layers
 from ..fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only