diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 53d50a8b4a3ed378aa203f9458a4dc440e080716..8045c8cb5a62c49ff32c2e758c3985cecb568a51 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -99,7 +99,6 @@ from . import compiler
 from .compiler import *
 from paddle.fluid.layers.math_op_patch import monkey_patch_variable
 from . import install_check
-from .dygraph.nn import *
 from .dygraph.layers import *
 from .dygraph.base import enable_dygraph, disable_dygraph
 from .io import save, load, load_program_state, set_program_state
diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py
index aebcc09eaa14ba8242b7ea4041f5816a5259cc7c..b98c188ae4f6ab3ecd191940431bb86d84ddccc7 100644
--- a/python/paddle/fluid/dygraph/__init__.py
+++ b/python/paddle/fluid/dygraph/__init__.py
@@ -21,9 +21,6 @@ from .layers import *
 from . import container
 from .container import *
 
-from . import nn
-from .nn import *
-
 from . import tracer
 from .tracer import *
 
@@ -45,7 +42,6 @@ __all__ = []
 __all__ += layers.__all__
 __all__ += base.__all__
 __all__ += container.__all__
-__all__ += nn.__all__
 __all__ += parallel.__all__
 __all__ += checkpoint.__all__
 __all__ += learning_rate_scheduler.__all__
diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
deleted file mode 100644
index f6009912bee9062a4a8478237a41c9168af50782..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/dygraph/nn.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-from .. import core
-from ..layers import utils
-from ..layers import nn as F
-from .. import dygraph_utils
-from . import layers
-from ..framework import (
-    Variable,
-    OpProtoHolder,
-    Parameter,
-    _dygraph_tracer,
-    _varbase_creator,
-    default_main_program,
-    _global_flags,
-    in_dygraph_mode,
-)
-
-from ..data_feeder import (
-    convert_dtype,
-    check_variable_and_dtype,
-    check_type,
-    check_dtype,
-)
-
-from ..param_attr import ParamAttr
-from ..initializer import Normal, Constant, NumpyArrayInitializer
-from .. import unique_name
-from .layer_object_helper import LayerObjectHelper
-from ..data_feeder import check_variable_and_dtype, check_type
-import numpy as np
-import numbers
-import logging
-import os
-import paddle.utils.deprecated as deprecated
-from paddle import _C_ops, _legacy_C_ops
-
-__all__ = []
-
-
-class BatchNorm(layers.Layer):
-    r"""
-
-    This interface is used to construct a callable object of the ``BatchNorm`` class.
-    For more details, refer to code examples.
-    It implements the function of the Batch Normalization Layer and can be used
-    as a normalizer function for conv2d and fully connected operations.
-    The data is normalized by the mean and variance of the channel based on the current batch data.
-    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
-    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
-    for more details.
-
-    When use_global_stats = False, the :math:`\mu_{\beta}`
-    and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch.
-    Calculated as follows:
-
-    ..  math::
-
-        \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad &
-        //\ mini-batch\ mean \\
-        \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \mu_{\beta})^2 \qquad &
-        //\ mini-batch\ variance \\
-
-    - :math:`x` : mini-batch data
-    - :math:`m` : the size of the mini-batch data
-
-    When use_global_stats = True, the :math:`\\mu_{\\beta}`
-    and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
-    They are global or running statistics (moving_mean and moving_variance). It usually got from the
-    pre-trained model. Calculated as follows:
-
-    .. math::
-        moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\
-        moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\
-
-    The normalization function formula is as follows:
-
-    ..  math::
-
-        \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\
-        \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\
-        y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift
-
-
-    - :math:`\epsilon` : add a smaller value to the variance to prevent division by zero
-    - :math:`\gamma` : trainable proportional parameter
-    - :math:`\beta` : trainable deviation parameter
-
-    Parameters:
-        num_channels(int): Indicate the number of channels of the input ``Tensor``.
-        act(str, optional): Activation to be applied to the output of batch normalization. Default: None.
-        is_test (bool, optional): A flag indicating whether it is in test phrase or not.
-             This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``.
-             Default: False.
-        momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
-        epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5.
-        param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale`
-             of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
-             will create ParamAttr as param_attr. If the Initializer of the param_attr
-             is not set, the parameter is initialized with Xavier. Default: None.
-        bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm.
-             If it is set to None or one attribute of ParamAttr, batch_norm
-             will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-             is not set, the bias is initialized zero. Default: None.
-        dtype(str, optional): Indicate the data type of the input ``Tensor``,
-             which can be float32 or float64. Default: float32.
-        data_layout(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC", where `N` is batch size, `C` is the number of the feature map, `H` is the height of the feature map, `W` is the width of the feature map. Default: NCHW.
-        in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False.
-        moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None.
-        moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None.
-        do_model_average_for_mean_and_var(bool, optional): Whether parameter mean and variance should do model
-            average when model average is enabled. Default: True.
-        use_global_stats(bool, optional): Whether to use global mean and
-            variance. In inference or test mode, set use_global_stats to true
-            or is_test to true, and the behavior is equivalent.
-            In train mode, when setting use_global_stats True, the global mean
-            and variance are also used during train period. Default: False.
-        trainable_statistics(bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when
-            setting trainable_statistics True, mean and variance will be calculated by current batch statistics.
-            Default: False.
-
-    Returns:
-        None
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph.base import to_variable
-
-          x = paddle.rand([3, 10, 3, 7], 'float32')
-          with fluid.dygraph.guard():
-              x = to_variable(x)
-              batch_norm = fluid.BatchNorm(10)
-              hidden1 = batch_norm(x)
-    """
-
-    def __init__(
-        self,
-        num_channels,
-        act=None,
-        is_test=False,
-        momentum=0.9,
-        epsilon=1e-05,
-        param_attr=None,
-        bias_attr=None,
-        dtype='float32',
-        data_layout='NCHW',
-        in_place=False,
-        moving_mean_name=None,
-        moving_variance_name=None,
-        do_model_average_for_mean_and_var=True,
-        use_global_stats=False,
-        trainable_statistics=False,
-    ):
-        super().__init__()
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self._use_mkldnn = _global_flags()["FLAGS_use_mkldnn"]
-
-        assert (
-            bias_attr is not False
-        ), "bias_attr should not be False in batch_norm."
-
-        if dtype == "float16":
-            self._dtype = "float32"
-        else:
-            self._dtype = dtype
-
-        param_shape = [num_channels]
-
-        # create parameter
-        self.weight = self.create_parameter(
-            attr=self._param_attr,
-            shape=param_shape,
-            dtype=self._dtype,
-            default_initializer=Constant(1.0),
-        )
-        self.weight.stop_gradient = (
-            use_global_stats and self._param_attr.learning_rate == 0.0
-        )
-
-        self.bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=param_shape,
-            dtype=self._dtype,
-            is_bias=True,
-        )
-        self.bias.stop_gradient = (
-            use_global_stats and self._param_attr.learning_rate == 0.0
-        )
-
-        self._mean = self.create_parameter(
-            attr=ParamAttr(
-                name=moving_mean_name,
-                initializer=Constant(0.0),
-                trainable=False,
-                do_model_average=do_model_average_for_mean_and_var,
-            ),
-            shape=param_shape,
-            dtype=self._dtype,
-        )
-        self._mean.stop_gradient = True
-
-        self._variance = self.create_parameter(
-            attr=ParamAttr(
-                name=moving_variance_name,
-                initializer=Constant(1.0),
-                trainable=False,
-                do_model_average=do_model_average_for_mean_and_var,
-            ),
-            shape=param_shape,
-            dtype=self._dtype,
-        )
-        self._variance.stop_gradient = True
-
-        self._in_place = in_place
-        self._data_layout = data_layout
-        self._momentum = momentum
-        self._epsilon = epsilon
-        self._is_test = is_test
-        self._fuse_with_relu = False
-        self._use_global_stats = use_global_stats
-        self._trainable_statistics = trainable_statistics
-
-    def forward(self, input):
-        # create output
-        # mean and mean_out share the same memory
-        mean_out = self._mean
-        # variance and variance out share the same memory
-        variance_out = self._variance
-
-        if in_dygraph_mode():
-            batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
-                input,
-                self._mean,
-                self._variance,
-                self.weight,
-                self.bias,
-                not self.training,
-                self._momentum,
-                self._epsilon,
-                self._data_layout,
-                self._use_global_stats,
-                self._trainable_statistics,
-            )
-            return dygraph_utils._append_activation_in_dygraph(
-                batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
-            )
-        else:
-            check_variable_and_dtype(
-                input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
-            )
-
-            attrs = {
-                "momentum": self._momentum,
-                "epsilon": self._epsilon,
-                "is_test": self._is_test,
-                "data_layout": self._data_layout,
-                "use_mkldnn": False,
-                "fuse_with_relu": self._fuse_with_relu,
-                "use_global_stats": self._use_global_stats,
-                "trainable_statistics": self._trainable_statistics,
-            }
-
-            inputs = {
-                "X": [input],
-                "Scale": [self.weight],
-                "Bias": [self.bias],
-                "Mean": [self._mean],
-                "Variance": [self._variance],
-            }
-
-            saved_mean = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype, stop_gradient=True
-            )
-            saved_variance = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype, stop_gradient=True
-            )
-            reserve_space = self._helper.create_variable_for_type_inference(
-                dtype=self._helper.input_dtype(input), stop_gradient=True
-            )
-
-            batch_norm_out = (
-                input
-                if self._in_place
-                else self._helper.create_variable_for_type_inference(
-                    self._dtype
-                )
-            )
-
-            outputs = {
-                "Y": [batch_norm_out],
-                "MeanOut": [mean_out],
-                "VarianceOut": [variance_out],
-                "SavedMean": [saved_mean],
-                "SavedVariance": [saved_variance],
-            }
-            if reserve_space is not None:
-                outputs["ReserveSpace"] = [reserve_space]
-
-            self._helper.append_op(
-                type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
-            )
-
-            # Currently, we don't support inplace in dygraph mode
-            return self._helper.append_activation(batch_norm_out, self._act)
diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py
index 5756361f89e46f005072d2136b2e13de4762525b..bf1ad9b107f74694c80472f583287d617fdf0616 100644
--- a/python/paddle/fluid/install_check.py
+++ b/python/paddle/fluid/install_check.py
@@ -25,7 +25,7 @@ from .param_attr import ParamAttr
 from .initializer import Constant
 from . import layers
 from . import backward
-from .dygraph import Layer, nn
+from .dygraph import Layer
 from . import executor
 from . import optimizer
 from . import core