From cf475f95dfa1c28362b4fdb94930824054c3dce4 Mon Sep 17 00:00:00 2001
From: zhongpu <2013000149@qq.com>
Date: Wed, 8 Jan 2020 12:35:04 +0800
Subject: [PATCH] Remove FC in dygraph, modify FC to Linear in sample code
 (#22082)

* modify fc to linear in sample code, test=develop

* remove FC, test=develop

* remove warnings, test=develop

* drop fluid/imperative/README.md , test=develop

* change fc to linear, test=develop

* polish code style, test=develop
---
 paddle/fluid/pybind/imperative.cc             |  12 +-
 python/paddle/fluid/dygraph/base.py           |  20 +-
 python/paddle/fluid/dygraph/nn.py             | 224 +-----------------
 python/paddle/fluid/dygraph/parallel.py       |  14 +-
 .../fluid/dygraph/varbase_patch_methods.py    |  12 +-
 python/paddle/fluid/dygraph_grad_clip.py      |  18 +-
 python/paddle/fluid/framework.py              |  34 +--
 python/paddle/fluid/install_check.py          |  20 +-
 python/paddle/fluid/layers/nn.py              |   2 +-
 .../fluid/tests/unittests/test_detach.py      | 131 +++++-----
 .../unittests/test_dygraph_mnist_fp16.py      |  30 +--
 .../unittests/test_imperative_auto_prune.py   | 213 +++++++++--------
 .../tests/unittests/test_imperative_basic.py  |  60 ++---
 .../unittests/test_imperative_debug_string.py |  18 +-
 .../unittests/test_imperative_framework.py    |  18 +-
 .../test_imperative_partitial_backward.py     |  20 +-
 .../test_imperative_reinforcement.py          |  14 +-
 .../unittests/test_imperative_save_load.py    |  26 +-
 .../fluid/tests/unittests/test_layers.py      | 137 +----------
 19 files changed, 355 insertions(+), 668 deletions(-)

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index fe4debda1f9..b7c68610cc2 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) {
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
                 data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                 with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                     data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                     print(x.numpy())
 
        )DOC")
@@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) {
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
                 data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                 with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                     data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                     y = x.detach()
 
        )DOC")
diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
index 674c556daa2..a59fad7e556 100644
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -84,12 +84,12 @@ def _no_grad_(func):
         @fluid.dygraph.no_grad
         def test_layer():
             with fluid.dygraph.guard():
-                inp = np.ones([3, 32, 32], dtype='float32')
+                inp = np.ones([3, 1024], dtype='float32')
                 t = fluid.dygraph.base.to_variable(inp)
-                fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-                fc2 = fluid.FC('fc2', size=4)
-                ret = fc1(t)
-                dy_ret = fc2(ret)
+                linear1 = fluid.Linear(1024, 4, bias_attr=False)
+                linear2 = fluid.Linear(4, 4)
+                ret = linear1(t)
+                dy_ret = linear2(ret)
 
         test_layer()
 
@@ -127,12 +127,12 @@ def guard(place=None):
         import paddle.fluid as fluid
 
         with fluid.dygraph.guard():
-            inp = np.ones([3, 32, 32], dtype='float32')
+            inp = np.ones([3, 1024], dtype='float32')
             t = fluid.dygraph.base.to_variable(inp)
-            fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            fc2 = fluid.FC('fc2', size=4)
-            ret = fc1(t)
-            dy_ret = fc2(ret)
+            linear1 = fluid.Linear(1024, 4, bias_attr=False)
+            linear2 = fluid.Linear(4, 4)
+            ret = linear1(t)
+            dy_ret = linear2(ret)
 
     """
     train = framework.Program()
diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index 08a36a9265d..b38e405ff62 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -29,10 +29,9 @@ import numbers
 import logging
 
 __all__ = [
-    'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'Linear', 'BatchNorm', 'Embedding',
-    'GRUUnit', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct',
-    'Conv2DTranspose', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm',
-    'TreeConv'
+    'Conv2D', 'Conv3D', 'Pool2D', 'Linear', 'BatchNorm', 'Embedding', 'GRUUnit',
+    'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose',
+    'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv'
 ]
 
 
@@ -865,7 +864,7 @@ class Linear(layers.Layer):
 
     where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
 
-    Different from FC layer, Linear layer takes only one ``Tensor`` input.
+    Linear layer takes only one ``Tensor`` input.
     The Linear layer multiplies input tensor with weight matrix and
     produces an output Tensor of shape [N, *, `output_dim`],
     where N is batch size and `*` means any number of additional dimensions.
@@ -959,221 +958,6 @@ class Linear(layers.Layer):
         return self._helper.append_activation(pre_activation, act=self._act)
 
 
-class FC(layers.Layer):
-    """
-    This interface is used to construct a callable object of the ``FC`` class.
-    For more details, refer to code examples.
-    It creates a fully connected layer in the network. It can take
-    one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
-    which represents a fully connected weight matrix from each input unit to
-    each output unit. The fully connected layer multiplies each input tensor
-    with its corresponding weight to produce an output Tensor with shape [N, `size`],
-    where N is batch size. If multiple input tensors are given, the results of
-    multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
-    is not None, a bias variable will be created and added to the output.
-    Finally, if ``act`` is not None, it will be applied to the output as well.
-
-    When the input is single ``Tensor`` :
-
-    .. math::
-
-        Out = Act({XW + b})
-
-    When the input are multiple ``Tensor`` :
-
-    .. math::
-
-        Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
-
-    In the above equation:
-
-    * :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
-    * :math:`X_i`: The i-th input ``Tensor`` .
-    * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
-    * :math:`b`: The bias parameter created by this layer (if needed).
-    * :math:`Act`: The activation function.
-    * :math:`Out`: The output ``Tensor`` .
-
-    See below for an example.
-
-    .. code-block:: text
-
-        Given:
-            data_1.data = [[[0.1, 0.2]]]
-            data_1.shape = (1, 1, 2) # 1 is batch_size
-
-            data_2.data = [[[0.1, 0.2, 0.3]]]
-            data_2.shape = (1, 1, 3) # 1 is batch_size
-
-            fc = FC("fc", 2, num_flatten_dims=2)
-            out = fc(input=[data_1, data_2])
-
-        Then:
-            out.data = [[[0.182996 -0.474117]]]
-            out.shape = (1, 1, 2)
-
-    Parameters:
-        name_scope(str): The name of this class.
-        size(int): The number of output units in this layer.
-        num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
-            two dimensions. If this happens, the multi-dimension tensor will first be flattened
-            into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
-            tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
-            dimensions will be flatten to form the first dimension of the final matrix (height of
-            the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
-            form the second dimension of the final matrix (width of the matrix). For example, suppose
-            `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
-            Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
-        param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
-            weights(Parameter) of this layer. Default: None.
-        bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
-            of this layer. If it is set to False, no bias will be added to the output units.
-            If it is set to None, the bias is initialized zero. Default: None.
-        act (str, optional): Activation to be applied to the output of this layer. Default: None.
-        is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
-        dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
-
-    Attribute:
-        **weight** (list of Parameter): the learnable weights of this layer.
-
-        **bias** (Parameter or None): the learnable bias of this layer.
-
-    Returns:
-        None
-    
-    Examples:
-        .. code-block:: python
-
-          from paddle.fluid.dygraph.base import to_variable
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph import FC
-          import numpy as np
-
-          data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
-          with fluid.dygraph.guard():
-              fc = FC("fc", 64, num_flatten_dims=2)
-              data = to_variable(data)
-              conv = fc(data)
-
-    """
-
-    def __init__(self,
-                 name_scope,
-                 size,
-                 num_flatten_dims=1,
-                 param_attr=None,
-                 bias_attr=None,
-                 act=None,
-                 is_test=False,
-                 dtype="float32"):
-        super(FC, self).__init__(name_scope, dtype)
-
-        self._size = size
-        self._num_flatten_dims = num_flatten_dims
-        self._dtype = dtype
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self.__w = list()
-
-    def _build_once(self, input):
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            input_shape = inp.shape
-
-            param_shape = [
-                reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
-                       1)
-            ] + [self._size]
-            self.__w.append(
-                self.add_parameter(
-                    '_w%d' % i,
-                    self.create_parameter(
-                        attr=param,
-                        shape=param_shape,
-                        dtype=self._dtype,
-                        is_bias=False)))
-            i += 1
-
-        size = list([self._size])
-        self._b = self.create_parameter(
-            attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
-
-    # TODO(songyouwei): We should remove _w property
-    @property
-    def _w(self, i=0):
-        return self.__w[i]
-
-    @_w.setter
-    def _w(self, value, i=0):
-        assert isinstance(self.__w[i], Variable)
-        self.__w[i].set_value(value)
-
-    @property
-    def weight(self):
-        if len(self.__w) > 1:
-            return self.__w
-        else:
-            return self.__w[0]
-
-    @weight.setter
-    def weight(self, value):
-        if len(self.__w) == 1:
-            self.__w[0] = value
-
-    @property
-    def bias(self):
-        return self._b
-
-    @bias.setter
-    def bias(self, value):
-        self._b = value
-
-    def forward(self, input):
-        mul_results = list()
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            tmp = self._helper.create_variable_for_type_inference(self._dtype)
-            self._helper.append_op(
-                type="mul",
-                inputs={"X": inp,
-                        "Y": self.__w[i]},
-                outputs={"Out": tmp},
-                attrs={
-                    "x_num_col_dims": self._num_flatten_dims,
-                    "y_num_col_dims": 1
-                })
-            i += 1
-            mul_results.append(tmp)
-
-        if len(mul_results) == 1:
-            pre_bias = mul_results[0]
-        else:
-            pre_bias = self._helper.create_variable_for_type_inference(
-                self._dtype)
-            self._helper.append_op(
-                type="sum",
-                inputs={"X": mul_results},
-                outputs={"Out": pre_bias},
-                attrs={"use_mkldnn": False})
-
-        if self._b:
-            pre_activation = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype)
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias],
-                        'Y': [self._b]},
-                outputs={'Out': [pre_activation]},
-                attrs={'axis': self._num_flatten_dims})
-        else:
-            pre_activation = pre_bias
-        # Currently, we don't support inplace in dygraph mode
-        return self._helper.append_activation(pre_activation, act=self._act)
-
-
 class BatchNorm(layers.Layer):
     """
     This interface is used to construct a callable object of the ``BatchNorm`` class.
diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py
index e8746a860c5..76a3d2c5dcb 100644
--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -97,7 +97,7 @@ class DataParallel(layers.Layer):
            import paddle.fluid as fluid
            import paddle.fluid.dygraph as dygraph
            from paddle.fluid.optimizer import AdamOptimizer
-           from paddle.fluid.dygraph.nn import FC
+           from paddle.fluid.dygraph.nn import Linear
            from paddle.fluid.dygraph.base import to_variable
 
            place = fluid.CUDAPlace(0)
@@ -106,28 +106,28 @@ class DataParallel(layers.Layer):
                # prepare the data parallel context
                strategy=dygraph.parallel.prepare_context()
 
-               fc_layer = FC("FC", 10, act="softmax")
+               linear = Linear(1, 10, act="softmax")
                adam = fluid.optimizer.AdamOptimizer()
 
                # make the module become the data parallelism module
-               fc_layer = dygraph.parallel.DataParallel(fc_layer, strategy)
+               linear = dygraph.parallel.DataParallel(linear, strategy)
 
                x_data = np.random.random(size=[10, 1]).astype(np.float32)
                data = to_variable(x_data)
 
-               hidden = fc_layer(data)
+               hidden = linear(data)
                avg_loss = fluid.layers.mean(hidden)
 
                # scale the loss according to the number of trainers.
-               avg_loss = fc_layer.scale_loss(avg_loss)
+               avg_loss = linear.scale_loss(avg_loss)
 
                avg_loss.backward()
 
                # collect the gradients of trainers.
-               fc_layer.apply_collective_grads()
+               linear.apply_collective_grads()
 
                adam.minimize(avg_loss)
-               fc_layer.clear_gradients()
+               linear.clear_gradients()
 
     Args:
         layers(Layer): The module that should be executed by data parallel.
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index 10f448fe807..1390919151a 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -39,17 +39,17 @@ def monkey_patch_varbase():
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
-                data = np.ones([3, 32, 32], dtype='float32')
+                data = np.ones([3, 1024], dtype='float32')
                 with fluid.dygraph.guard():
-                    fc = fluid.dygraph.FC("fc", 4)
+                    linear = fluid.dygraph.Linear(1024, 4)
                     t = to_variable(data)
-                    fc(t)  # call with default weight
+                    linear(t)  # call with default weight
                     custom_weight = np.random.randn(1024, 4).astype("float32")
-                    fc.weight.set_value(custom_weight)  # change existing weight
-                    out = fc(t)  # call with different weight
+                    linear.weight.set_value(custom_weight)  # change existing weight
+                    out = linear(t)  # call with different weight
 
         """
         assert isinstance(value, (np.ndarray, core.VarBase)), \
diff --git a/python/paddle/fluid/dygraph_grad_clip.py b/python/paddle/fluid/dygraph_grad_clip.py
index 5e9c2a87b93..db7a76615f8 100644
--- a/python/paddle/fluid/dygraph_grad_clip.py
+++ b/python/paddle/fluid/dygraph_grad_clip.py
@@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase):
             import paddle.fluid as fluid
 
             from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
 
             from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
 
@@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase):
                 
                 init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
 
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
 
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
 
                 loss = fluid.layers.reduce_mean( out )
 
@@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase):
             import paddle.fluid as fluid
 
             from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
 
             from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
 
@@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase):
                 
                 init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
 
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
 
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
 
                 loss = fluid.layers.reduce_mean( out )
 
@@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase):
             import paddle.fluid as fluid
 
             from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
 
             from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
 
@@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase):
                 
                 init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
 
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
 
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
 
                 loss = fluid.layers.reduce_mean( out )
 
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 907ac3209c1..35950f74305 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -959,14 +959,14 @@ class Variable(object):
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
                 data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                 with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                     data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                     y = x.detach()
 
         """
@@ -991,14 +991,14 @@ class Variable(object):
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
                 data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                 with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                     data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                     print(x.numpy())
 
         """
@@ -1020,17 +1020,17 @@ class Variable(object):
 
                 import paddle.fluid as fluid
                 from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                 import numpy as np
 
-                data = np.ones([3, 32, 32], dtype='float32')
+                data = np.ones([3, 1024], dtype='float32')
                 with fluid.dygraph.guard():
-                    fc = fluid.dygraph.FC("fc", 4)
+                    linear = fluid.dygraph.Linear(1024, 4)
                     t = to_variable(data)
-                    fc(t)  # call with default weight
+                    linear(t)  # call with default weight
                     custom_weight = np.random.randn(1024, 4).astype("float32")
-                    fc.weight.set_value(custom_weight)  # change existing weight
-                    out = fc(t)  # call with different weight
+                    linear.weight.set_value(custom_weight)  # change existing weight
+                    out = linear(t)  # call with different weight
 
         """
         pass
@@ -1223,18 +1223,18 @@ class Variable(object):
                 value0 = np.arange(26).reshape(2, 13).astype("float32")
                 value1 = np.arange(6).reshape(2, 3).astype("float32")
                 value2 = np.arange(10).reshape(2, 5).astype("float32")
-                fc = fluid.FC("fc1", size=5, dtype="float32")
-                fc2 = fluid.FC("fc2", size=3, dtype="float32")
+                linear = fluid.Linear(13, 5, dtype="float32")
+                linear2 = fluid.Linear(3, 3, dtype="float32")
                 a = fluid.dygraph.to_variable(value0)
                 b = fluid.dygraph.to_variable(value1)
                 c = fluid.dygraph.to_variable(value2)
-                out1 = fc(a)
-                out2 = fc2(b)
+                out1 = linear(a)
+                out2 = linear2(b)
                 out1.stop_gradient = True
                 out = fluid.layers.concat(input=[out1, out2, c], axis=1)
                 out.backward()
 
-                assert (fc._w.gradient() == 0).all()
+                assert (linear.weight.gradient() == 0).all()
                 assert (out1.gradient() == 0).all()
         """
         if in_dygraph_mode():
diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py
index 31755e22212..42366aad88e 100644
--- a/python/paddle/fluid/install_check.py
+++ b/python/paddle/fluid/install_check.py
@@ -30,14 +30,15 @@ __all__ = ['run_check']
 
 
 class SimpleLayer(Layer):
-    def __init__(self, name_scope):
-        super(SimpleLayer, self).__init__(name_scope)
-        self._fc1 = nn.FC(self.full_name(),
-                          3,
-                          param_attr=ParamAttr(initializer=Constant(value=0.1)))
+    def __init__(self, input_size):
+        super(SimpleLayer, self).__init__()
+        self._linear1 = nn.Linear(
+            input_size,
+            3,
+            param_attr=ParamAttr(initializer=Constant(value=0.1)))
 
     def forward(self, inputs):
-        x = self._fc1(inputs)
+        x = self._linear1(inputs)
         x = layers.reduce_sum(x)
         return x
 
@@ -79,7 +80,7 @@ def run_check():
                     build_strategy = compiler.BuildStrategy()
                     build_strategy.enable_inplace = True
                     inp = layers.data(name="inp", shape=[2, 2])
-                    simple_layer = SimpleLayer("simple_layer")
+                    simple_layer = SimpleLayer(input_size=2)
                     out = simple_layer(inp)
                     exe = executor.Executor(
                         core.CUDAPlace(0) if core.is_compiled_with_cuda() and
@@ -108,10 +109,11 @@ def run_check():
                 with unique_name.guard():
                     inp0 = layers.data(
                         name="inp", shape=[2, 2], append_batch_size=False)
-                    simple_layer0 = SimpleLayer("simple_layer")
+                    simple_layer0 = SimpleLayer(input_size=2)
                     out0 = simple_layer0(inp0)
                     param_grads = backward.append_backward(
-                        out0, parameter_list=[simple_layer0._fc1._w.name])[0]
+                        out0,
+                        parameter_list=[simple_layer0._linear1.weight.name])[0]
                     exe0 = executor.Executor(
                         core.CUDAPlace(0) if core.is_compiled_with_cuda() and
                         (core.get_cuda_device_count() > 0) else core.CPUPlace())
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index f61a4d04163..4b34c365aa4 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -3002,7 +3002,7 @@ def layer_norm(input,
             print(output)
     """
     assert in_dygraph_mode(
-    ) is not True, "please use FC instead of fc in dygraph mode!"
+    ) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
     helper = LayerHelper('layer_norm', **locals())
     dtype = helper.input_dtype()
 
diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py
index 6b163ee56e1..59e9e9e4127 100644
--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -17,8 +17,7 @@ from __future__ import print_function
 import numpy as np
 import paddle.fluid as fluid
 
-from paddle.fluid import FC
-from paddle.fluid.dygraph import FC
+from paddle.fluid.dygraph import Linear
 from paddle.fluid.dygraph.base import to_variable
 
 import unittest
@@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase):
     def no_detach_multi(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
-                    10,
-                    num_flatten_dims=1,
-                    param_attr=fc_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear = Linear(
+                4,
+                10,
+                param_attr=linear_w_param_attrs,
+                bias_attr=linear_b_param_attrs)
+            linear1_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
-                     1,
-                     num_flatten_dims=1,
-                     param_attr=fc1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
-            fc2_w_param_attrs = fluid.ParamAttr(
+            linear1 = Linear(
+                10,
+                1,
+                param_attr=linear1_w_param_attrs,
+                bias_attr=linear1_b_param_attrs)
+            linear2_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(9.0))
-            fc2_b_param_attrs = fluid.ParamAttr(
+            linear2_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(10.0))
-            fc2 = FC("fc",
-                     1,
-                     num_flatten_dims=1,
-                     param_attr=fc2_w_param_attrs,
-                     bias_attr=fc2_b_param_attrs)
+            linear2 = Linear(
+                10,
+                1,
+                param_attr=linear2_w_param_attrs,
+                bias_attr=linear2_b_param_attrs)
             data = to_variable(data)
-            x = fc(data)
-            x1 = fc1(x)
-            x2 = fc2(x)
+            x = linear(data)
+            x1 = linear1(x)
+            x2 = linear2(x)
             loss = x1 + x2
             # print(loss, loss.shape)
             loss.backward()
@@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase):
     def no_detach_single(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
-                    10,
-                    num_flatten_dims=1,
-                    param_attr=fc_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear = Linear(
+                4,
+                10,
+                param_attr=linear_w_param_attrs,
+                bias_attr=linear_b_param_attrs)
+            linear1_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
-                     1,
-                     num_flatten_dims=1,
-                     param_attr=fc1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
+            linear1 = Linear(
+                10,
+                1,
+                param_attr=linear1_w_param_attrs,
+                bias_attr=linear1_b_param_attrs)
             data = to_variable(data)
-            x = fc(data)
-            x1 = fc1(x)
+            x = linear(data)
+            x1 = linear1(x)
             loss = x1
             # print(loss, loss.shape)
             loss.backward()
@@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase):
     def detach_multi(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
-                    10,
-                    num_flatten_dims=1,
-                    param_attr=fc_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear = Linear(
+                4,
+                10,
+                param_attr=linear_w_param_attrs,
+                bias_attr=linear_b_param_attrs)
+            linear1_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
-                     1,
-                     num_flatten_dims=1,
-                     param_attr=fc1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
-            fc2_w_param_attrs = fluid.ParamAttr(
+            linear1 = Linear(
+                10,
+                1,
+                param_attr=linear1_w_param_attrs,
+                bias_attr=linear1_b_param_attrs)
+            linear2_w_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(9.0))
-            fc2_b_param_attrs = fluid.ParamAttr(
+            linear2_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(10.0))
-            fc2 = FC("fc",
-                     1,
-                     num_flatten_dims=1,
-                     param_attr=fc2_w_param_attrs,
-                     bias_attr=fc2_b_param_attrs)
+            linear2 = Linear(
+                10,
+                1,
+                param_attr=linear2_w_param_attrs,
+                bias_attr=linear2_b_param_attrs)
             data = to_variable(data)
-            x = fc(data)
+            x = linear(data)
             x_detach = x.detach()
-            x1 = fc1(x)
-            x2 = fc2(x_detach)
+            x1 = linear1(x)
+            x2 = linear2(x_detach)
             loss = x1 + x2
             # print(loss, loss.shape)
             loss.backward()
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
index 34a63e6a953..0a5d8e0cdd3 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
@@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
 
 
 class MNIST(fluid.dygraph.Layer):
-    def __init__(self, name_scope, dtype="float32"):
-        super(MNIST, self).__init__(name_scope)
+    def __init__(self, dtype="float32"):
+        super(MNIST, self).__init__()
 
         self._simple_img_conv_pool_1 = SimpleImgConvPool(
             num_channels=3,
@@ -94,21 +94,23 @@ class MNIST(fluid.dygraph.Layer):
             dtype=dtype,
             use_cudnn=True)
 
-        pool_2_shape = 50 * 4 * 4
+        self.pool_2_shape = 50 * 53 * 53
         SIZE = 10
-        scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
-        self._fc = FC(self.full_name(),
-                      10,
-                      param_attr=fluid.param_attr.ParamAttr(
-                          initializer=fluid.initializer.NormalInitializer(
-                              loc=0.0, scale=scale)),
-                      act="softmax",
-                      dtype=dtype)
+        scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
+        self._linear = Linear(
+            self.pool_2_shape,
+            10,
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=scale)),
+            act="softmax",
+            dtype=dtype)
 
     def forward(self, inputs, label):
         x = self._simple_img_conv_pool_1(inputs)
         x = self._simple_img_conv_pool_2(x)
-        cost = self._fc(x)
+        x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
+        cost = self._linear(x)
         loss = fluid.layers.cross_entropy(cost, label)
         avg_loss = fluid.layers.mean(loss)
         return avg_loss
@@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase):
         x = np.random.randn(1, 3, 224, 224).astype("float16")
         y = np.random.randn(1, 1).astype("int64")
         with fluid.dygraph.guard(fluid.CUDAPlace(0)):
-            model = MNIST("mnist", dtype="float16")
+            model = MNIST(dtype="float16")
             x = fluid.dygraph.to_variable(x)
             y = fluid.dygraph.to_variable(y)
             loss = model(x, y)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
index 6ab4a72e836..3134984f4f6 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -18,44 +18,44 @@ import numpy as np
 
 
 class AutoPruneLayer0(fluid.Layer):
-    def __init__(self, name_scope):
-        super(AutoPruneLayer0, self).__init__(name_scope)
-        self.fc1 = fluid.dygraph.FC(
-            "FC_1",
+    def __init__(self, input_size):
+        super(AutoPruneLayer0, self).__init__()
+        self.linear1 = fluid.dygraph.Linear(
+            input_size,
             5,
             param_attr=fluid.initializer.ConstantInitializer(value=2),
             bias_attr=False)
-        self.fc2 = fluid.dygraph.FC(
-            "FC_2",
+        self.linear2 = fluid.dygraph.Linear(
+            5,
             5,
             param_attr=fluid.initializer.ConstantInitializer(value=2),
             bias_attr=False)
 
     def forward(self, x, y):
-        a = self.fc1(x)
-        b = self.fc2(y)
+        a = self.linear1(x)
+        b = self.linear2(y)
         c = fluid.layers.mul(a, b)
         d = fluid.layers.reduce_mean(c)
         return d
 
 
 class AutoPruneLayer1(fluid.Layer):
-    def __init__(self, name_scope):
-        super(AutoPruneLayer1, self).__init__(name_scope)
-        self.fc1 = fluid.dygraph.FC(
-            "FC_1",
+    def __init__(self, input_size):
+        super(AutoPruneLayer1, self).__init__()
+        self.linear1 = fluid.dygraph.Linear(
+            input_size,
             5,
             param_attr=fluid.initializer.ConstantInitializer(value=2),
             bias_attr=False)
-        self.fc2 = fluid.dygraph.FC(
-            "FC_2",
+        self.linear2 = fluid.dygraph.Linear(
+            5,
             5,
             param_attr=fluid.initializer.ConstantInitializer(value=2),
             bias_attr=False)
 
     def forward(self, x, y):
-        a = self.fc1(x)
-        b = self.fc2(y)
+        a = self.linear1(x)
+        b = self.linear2(y)
         b.stop_gradient = True
         c = fluid.layers.mul(a, b)
         d = fluid.layers.reduce_mean(c)
@@ -63,14 +63,14 @@ class AutoPruneLayer1(fluid.Layer):
 
 
 class AutoPruneLayer2(fluid.Layer):
-    def __init__(self, name_scope):
-        super(AutoPruneLayer2, self).__init__(name_scope)
-        self.fc = fluid.dygraph.FC("FC1", size=10, act=None)
-        self.fc2 = fluid.dygraph.FC("FC2", size=1, act=None)
+    def __init__(self, input_size):
+        super(AutoPruneLayer2, self).__init__()
+        self.linear = fluid.dygraph.Linear(input_size, 10, act=None)
+        self.linear2 = fluid.dygraph.Linear(1, 1, act=None)
 
     def forward(self, x, label):
-        feature = self.fc(x)
-        label = self.fc2(label)
+        feature = self.linear(x)
+        label = self.linear2(label)
         label = fluid.layers.cast(label, dtype="float32")
         label = fluid.layers.cast(label, dtype='int64')
         # Note that the label is not persistable in fluid.layers.cross_entropy.
@@ -80,12 +80,12 @@ class AutoPruneLayer2(fluid.Layer):
 
 
 class AutoPruneLayer3(fluid.Layer):
-    def __init__(self, name_scope):
-        super(AutoPruneLayer3, self).__init__(name_scope)
-        self.fc = fluid.dygraph.FC("FC1", size=20, act=None)
+    def __init__(self, input_size):
+        super(AutoPruneLayer3, self).__init__()
+        self.linear = fluid.dygraph.Linear(input_size, 20, act=None)
 
     def forward(self, x, label, test_num):
-        feature = self.fc(x)
+        feature = self.linear(x)
         part1, part2 = fluid.layers.split(
             feature, num_or_sections=[10, 10], dim=1)
         # Note that: part2 is not used.
@@ -98,67 +98,68 @@ class AutoPruneLayer3(fluid.Layer):
 
 
 class MyLayer(fluid.Layer):
-    def __init__(self, name_scope, vocab_size, size, dtype="float32"):
-        super(MyLayer, self).__init__(name_scope, dtype)
+    def __init__(self, input_size, vocab_size, size, dtype="float32"):
+        super(MyLayer, self).__init__(dtype=dtype)
         self.embed0 = fluid.Embedding(size=(vocab_size, size))
         self.embed1 = fluid.Embedding(size=(vocab_size, size))
-        self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
-        self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
+        self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
+        self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
 
     def forward(self, x):
-        # this method involves only the fc layers
-        loss = fluid.layers.reduce_mean(self.fc0(x) + self.fc1(x))
+        # this method involves only the linear layers
+        loss = fluid.layers.reduce_mean(self.linear_0(x) + self.linear_1(x))
         return loss
 
     def linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.fc0(x))
+        loss = fluid.layers.reduce_mean(self.linear_0(x))
         return loss
 
     def embed_linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
+        loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
         return loss
 
 
 class MyLayer2(fluid.Layer):
-    def __init__(self, name_scope, vocab_size, size, dtype="float32"):
-        super(MyLayer2, self).__init__(name_scope, dtype)
+    def __init__(self, input_size, vocab_size, size, dtype="float32"):
+        super(MyLayer2, self).__init__(dtype=dtype)
         self.embed0 = fluid.Embedding(size=(vocab_size, size))
         self.embed1 = fluid.Embedding(size=(vocab_size, size))
-        self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
-        self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
+        self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
+        self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
 
     def forward(self, indices):
         # mind the difference with MyLayer
         # In this example, the forward method involes all params
         loss = fluid.layers.reduce_mean(
-            self.fc0(self.embed0(indices)) + self.fc1(self.embed1(indices)))
+            self.linear_0(self.embed0(indices)) + self.linear_1(
+                self.embed1(indices)))
         return loss
 
     def linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.fc0(x))
+        loss = fluid.layers.reduce_mean(self.linear_0(x))
         return loss
 
     def embed_linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
+        loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
         return loss
 
 
 class TestImperativeAutoPrune(unittest.TestCase):
     def test_auto_prune(self):
         with fluid.dygraph.guard():
-            case1 = AutoPruneLayer0("l1")
+            case1 = AutoPruneLayer0(input_size=5)
             value1 = np.arange(25).reshape(5, 5).astype("float32")
             value2 = np.arange(25).reshape(5, 5).astype("float32")
             v1 = fluid.dygraph.to_variable(value1)
             v2 = fluid.dygraph.to_variable(value2)
             loss = case1(v1, v2)
             loss.backward()
-            self.assertTrue(case1.fc2.weight._grad_ivar() is not None)
-            self.assertTrue(case1.fc1.weight._grad_ivar() is not None)
+            self.assertTrue(case1.linear2.weight._grad_ivar() is not None)
+            self.assertTrue(case1.linear1.weight._grad_ivar() is not None)
 
     def test_auto_prune2(self):
         with fluid.dygraph.guard():
-            case2 = AutoPruneLayer1("l1")
+            case2 = AutoPruneLayer1(input_size=5)
             value1 = np.arange(25).reshape(5, 5).astype("float32")
             value2 = np.arange(25).reshape(5, 5).astype("float32")
             v1 = fluid.dygraph.to_variable(value1)
@@ -166,43 +167,43 @@ class TestImperativeAutoPrune(unittest.TestCase):
             loss = case2(v1, v2)
 
             loss.backward()
-            self.assertTrue(case2.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case2.fc1.weight._grad_ivar() is not None)
+            self.assertTrue(case2.linear2.weight._grad_ivar() is None)
+            self.assertTrue(case2.linear1.weight._grad_ivar() is not None)
 
     def test_auto_prune3(self):
         with fluid.dygraph.guard():
-            case3 = AutoPruneLayer3("l3")
+            case3 = AutoPruneLayer3(input_size=784)
             value1 = np.arange(784).reshape(1, 784).astype("float32")
             value2 = np.arange(1).reshape(1, 1).astype("int64")
             v1 = fluid.dygraph.to_variable(value1)
             v2 = fluid.dygraph.to_variable(value2)
             loss, part2 = case3(v1, v2, 1)
             loss.backward()
-            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
+            self.assertTrue(case3.linear.weight._grad_ivar() is not None)
             self.assertTrue((part2.gradient() == 0).all())
 
     def test_auto_prune4(self):
         with fluid.dygraph.guard():
-            case4 = AutoPruneLayer3("l3")
+            case4 = AutoPruneLayer3(input_size=784)
             value1 = np.arange(784).reshape(1, 784).astype("float32")
             value2 = np.arange(1).reshape(1, 1).astype("int64")
             v1 = fluid.dygraph.to_variable(value1)
             v2 = fluid.dygraph.to_variable(value2)
             loss, part2 = case4(v1, v2, 1)
             part2.backward()
-            self.assertTrue(case4.fc.weight._grad_ivar() is not None)
+            self.assertTrue(case4.linear.weight._grad_ivar() is not None)
             self.assertTrue((part2.gradient() == 1).all())
 
     def test_auto_prune5(self):
         with fluid.dygraph.guard():
-            case4 = AutoPruneLayer3("l3")
+            case4 = AutoPruneLayer3(input_size=784)
             value1 = np.arange(784).reshape(1, 784).astype("float32")
             value2 = np.arange(1).reshape(1, 1).astype("int64")
             v1 = fluid.dygraph.to_variable(value1)
             v2 = fluid.dygraph.to_variable(value2)
             loss, part1, part2 = case4(v1, v2, 2)
             part1.backward()
-            self.assertTrue(case4.fc.weight._grad_ivar() is not None)
+            self.assertTrue(case4.linear.weight._grad_ivar() is not None)
             self.assertTrue((part2.gradient() == 0).all())
 
     def test_auto_prune6(self):
@@ -210,17 +211,17 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            fc = fluid.FC("fc1", size=5, dtype="float32")
-            fc2 = fluid.FC("fc2", size=3, dtype="float32")
+            linear = fluid.Linear(13, 5, dtype="float32")
+            linear2 = fluid.Linear(3, 3, dtype="float32")
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
-            out1 = fc(a)
-            out2 = fc2(b)
+            out1 = linear(a)
+            out2 = linear2(b)
             out1.stop_gradient = True
             out = fluid.layers.concat(input=[out1, out2, c], axis=1)
             out.backward()
-            self.assertTrue((fc.weight.gradient() == 0).all())
+            self.assertTrue((linear.weight.gradient() == 0).all())
             self.assertTrue((out1.gradient() == 0).all())
 
     def test_auto_prune7(self):
@@ -228,18 +229,18 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            fc = fluid.FC("fc1", size=5, dtype="float32")
-            fc2 = fluid.FC("fc2", size=3, dtype="float32")
+            linear = fluid.Linear(13, 5, dtype="float32")
+            linear2 = fluid.Linear(3, 3, dtype="float32")
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
-            out1 = fc(a)
-            out2 = fc2(b)
+            out1 = linear(a)
+            out2 = linear2(b)
             out1.stop_gradient = True
             out = fluid.layers.concat(input=[out1, out2, c], axis=1)
             backward_strategy = fluid.dygraph.BackwardStrategy()
             out.backward(backward_strategy)
-            self.assertTrue((fc.weight.gradient() == 0).all())
+            self.assertTrue((linear.weight.gradient() == 0).all())
             self.assertTrue((out1.gradient() == 0).all())
 
     def test_auto_prune8(self):
@@ -247,48 +248,52 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            fc = fluid.FC("fc1", size=5, dtype="float32")
-            fc2 = fluid.FC("fc2", size=3, dtype="float32")
+            linear = fluid.Linear(13, 5, dtype="float32")
+            linear2 = fluid.Linear(5, 3, dtype="float32")
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
-            out1 = fc(a)
-            fc_origin = fc.weight.numpy()
-            out2 = fc2(out1)
-            fc2_origin = fc2.weight.numpy()
-            fc2.weight.stop_gradient = True
+            out1 = linear(a)
+            linear_origin = linear.weight.numpy()
+            out2 = linear2(out1)
+            linear2_origin = linear2.weight.numpy()
+            linear2.weight.stop_gradient = True
             out2.backward()
             optimizer = fluid.optimizer.SGD(
                 learning_rate=0.003,
-                parameter_list=(fc.parameters() + fc2.parameters()))
+                parameter_list=(linear.parameters() + linear2.parameters()))
             optimizer.minimize(out2)
-            self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
-            self.assertFalse(np.array_equal(fc_origin, fc.weight.numpy()))
+            self.assertTrue(
+                np.array_equal(linear2_origin, linear2.weight.numpy()))
+            self.assertFalse(
+                np.array_equal(linear_origin, linear.weight.numpy()))
 
     def test_auto_prune9(self):
         with fluid.dygraph.guard():
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            fc = fluid.FC("fc1", size=5, dtype="float32")
-            fc2 = fluid.FC("fc2", size=3, dtype="float32")
+            linear = fluid.Linear(13, 5, dtype="float32")
+            linear2 = fluid.Linear(5, 3, dtype="float32")
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
-            out1 = fc(a)
-            fc_origin = fc.weight.numpy()
-            out2 = fc2(out1)
-            fc2_origin = fc2.weight.numpy()
+            out1 = linear(a)
+            linear_origin = linear.weight.numpy()
+            out2 = linear2(out1)
+            linear2_origin = linear2.weight.numpy()
             out2.stop_gradient = True
             out2.backward()
             optimizer = fluid.optimizer.SGD(
                 learning_rate=0.003,
-                parameter_list=(fc.parameters() + fc2.parameters()))
+                parameter_list=(linear.parameters() + linear2.parameters()))
             optimizer.minimize(out2)
-            self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
-            self.assertTrue(np.array_equal(fc_origin, fc.weight.numpy()))
+            self.assertTrue(
+                np.array_equal(linear2_origin, linear2.weight.numpy()))
+            self.assertTrue(
+                np.array_equal(linear_origin, linear.weight.numpy()))
             try:
-                fc2.weight.gradient()
+                linear2.weight.gradient()
             except ValueError as e:
                 assert type(e) == ValueError
 
@@ -297,19 +302,19 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            fc = fluid.FC("fc1", size=5, dtype="float32")
-            fc2 = fluid.FC("fc2", size=3, dtype="float32")
+            linear = fluid.Linear(13, 5, dtype="float32")
+            linear2 = fluid.Linear(3, 3, dtype="float32")
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
-            out1 = fc(a)
-            out2 = fc2(b)
+            out1 = linear(a)
+            out2 = linear2(b)
             out1.stop_gradient = True
             out = fluid.layers.concat(input=[out1, out2, c], axis=1)
             backward_strategy = fluid.dygraph.BackwardStrategy()
             backward_strategy.sort_sum_gradient = True
             out.backward(backward_strategy)
-            self.assertTrue((fc.weight.gradient() == 0).all())
+            self.assertTrue((linear.weight.gradient() == 0).all())
             self.assertTrue((out1.gradient() == 0).all())
 
     def test_auto_prune_with_optimizer(self):
@@ -323,13 +328,13 @@ class TestImperativeAutoPrune(unittest.TestCase):
 
         place = fluid.CPUPlace()
         with fluid.dygraph.guard(place):
-            model = MyLayer("mylayer", vocab_size, size)
+            model = MyLayer(size, vocab_size, size)
             optimizer = fluid.optimizer.AdamOptimizer(
                 0.001, parameter_list=model.parameters())
             grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
 
             indices = fluid.dygraph.to_variable(indices)
-            emebd = fluid.dygraph.to_variable(embed)
+            embed = fluid.dygraph.to_variable(embed)
             dummy_loss = model(embed)
 
             loss = model.embed_linear0(indices)
@@ -337,12 +342,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
             _, params_grads = optimizer.minimize(loss, grad_clip=grad_clip)
             for items in params_grads:
                 assert items[0].name is not model.embed1.weight.name
-                assert items[0].name is not model.fc1.weight.name
+                assert items[0].name is not model.linear_1.weight.name
             assert model.embed1.weight._grad_ivar() is None
-            assert model.fc1.weight._grad_ivar() is None
+            assert model.linear_1.weight._grad_ivar() is None
 
         with fluid.dygraph.guard(place):
-            model = MyLayer2("mylayer", vocab_size, size)
+            model = MyLayer2(size, vocab_size, size)
             optimizer = fluid.optimizer.AdamOptimizer(
                 0.001, parameter_list=model.parameters())
             grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
@@ -356,9 +361,9 @@ class TestImperativeAutoPrune(unittest.TestCase):
             optimizer.minimize(loss, grad_clip=grad_clip)
             for items in params_grads:
                 assert items[0].name is not model.embed1.weight.name
-                assert items[0].name is not model.fc1.weight.name
+                assert items[0].name is not model.linear_1.weight.name
             assert model.embed1.weight._grad_ivar() is None
-            assert model.fc1.weight._grad_ivar() is None
+            assert model.linear_1.weight._grad_ivar() is None
 
     def test_case2_prune_no_grad_branch(self):
         with fluid.dygraph.guard():
@@ -366,11 +371,11 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value2 = np.arange(1).reshape(1, 1)
             v1 = fluid.dygraph.to_variable(value1).astype("float32")
             v2 = fluid.dygraph.to_variable(value2).astype("float32")
-            case3 = AutoPruneLayer2("l2")
+            case3 = AutoPruneLayer2(input_size=784)
             loss = case3(v1, v2)
             loss.backward()
-            self.assertTrue(case3.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
+            self.assertTrue(case3.linear2.weight._grad_ivar() is None)
+            self.assertTrue(case3.linear.weight._grad_ivar() is not None)
 
     def test_case2_prune_no_grad_branch(self):
         with fluid.dygraph.guard():
@@ -378,24 +383,24 @@ class TestImperativeAutoPrune(unittest.TestCase):
             value2 = np.arange(1).reshape(1, 1)
             v1 = fluid.dygraph.to_variable(value1).astype("float32")
             v2 = fluid.dygraph.to_variable(value2).astype("float32")
-            case3 = AutoPruneLayer2("l2")
+            case3 = AutoPruneLayer2(input_size=784)
             loss = case3(v1, v2)
             loss.backward()
-            self.assertTrue(case3.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
+            self.assertTrue(case3.linear2.weight._grad_ivar() is None)
+            self.assertTrue(case3.linear.weight._grad_ivar() is not None)
 
     def test_case3_prune_no_grad_branch2(self):
         with fluid.dygraph.guard():
             value1 = np.arange(1).reshape(1, 1)
-            fc = fluid.dygraph.FC("FC1", size=1, act=None)
+            linear = fluid.dygraph.Linear(1, 1, act=None)
             label = fluid.dygraph.to_variable(value1).astype("float32")
-            label = fc(label)
+            label = linear(label)
             label = fluid.layers.cast(label, dtype="float32")
             label = fluid.layers.cast(label, dtype='int64')
             out = fluid.layers.one_hot(input=label, depth=100)
             loss = fluid.layers.mean(out)
             loss.backward()
-            self.assertTrue(fc.weight._grad_ivar() is None)
+            self.assertTrue(linear.weight._grad_ivar() is None)
 
     def test_case4_with_no_grad_op_maker(self):
         with fluid.dygraph.guard():
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index 8f1e2fdd2a3..14e5e20b92a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -18,7 +18,7 @@ import numpy as np
 
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid import FC
+from paddle.fluid import Linear
 from test_imperative_base import new_program_scope
 
 
@@ -35,24 +35,26 @@ class MyLayer(fluid.Layer):
 
 
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
-        super(MLP, self).__init__(name_scope)
-        self._fc1 = FC(self.full_name(),
-                       3,
-                       param_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Constant(value=0.1)),
-                       bias_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Constant(value=0.1)))
-        self._fc2 = FC(self.full_name(),
-                       4,
-                       param_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Constant(value=0.1)),
-                       bias_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Constant(value=0.1)))
+    def __init__(self, input_size):
+        super(MLP, self).__init__()
+        self._linear1 = Linear(
+            input_size,
+            3,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)),
+            bias_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)))
+        self._linear2 = Linear(
+            3,
+            4,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)),
+            bias_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)))
 
     def forward(self, inputs):
-        x = self._fc1(inputs)
-        x = self._fc2(x)
+        x = self._linear1(inputs)
+        x = self._linear2(x)
         x = fluid.layers.reduce_sum(x)
         return x
 
@@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase):
         np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
         with fluid.dygraph.guard():
             var_inp = fluid.dygraph.base.to_variable(np_inp)
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
             out = mlp(var_inp)
             dy_out = out.numpy()
             out.backward()
-            dy_grad = mlp._fc1.weight.gradient()
+            dy_grad = mlp._linear1.weight.gradient()
 
         with fluid.dygraph.guard():
             var_inp2 = fluid.dygraph.base.to_variable(np_inp)
-            mlp2 = MLP("mlp")
+            mlp2 = MLP(input_size=2)
             out2 = mlp2(var_inp2)
             dy_out2 = out2.numpy()
             backward_strategy = fluid.dygraph.BackwardStrategy()
             backward_strategy.sort_sum_gradient = True
             out2.backward(backward_strategy)
-            dy_grad2 = mlp2._fc1.weight.gradient()
+            dy_grad2 = mlp2._linear1.weight.gradient()
 
         with new_program_scope():
             inp = fluid.layers.data(
                 name="inp", shape=[2, 2], append_batch_size=False)
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
             out = mlp(inp)
             param_grads = fluid.backward.append_backward(
-                out, parameter_list=[mlp._fc1.weight.name])[0]
+                out, parameter_list=[mlp._linear1.weight.name])[0]
             exe = fluid.Executor(fluid.CPUPlace(
             ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
             exe.run(fluid.default_startup_program())
@@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase):
         self.assertTrue(np.allclose(dy_grad2, static_grad))
 
         params = mlp.parameters(True)
-        self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name)
-        self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name)
-        self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name)
-        self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name)
+        self.assertEqual("linear_0.w_0", params[0].name)
+        self.assertEqual("linear_0.b_0", params[1].name)
+        self.assertEqual("linear_1.w_0", params[2].name)
+        self.assertEqual("linear_1.b_0", params[3].name)
         self.assertEqual(len(params), 4)
 
         sublayers = mlp.sublayers(True)
-        self.assertEqual(mlp._fc1, sublayers[0])
-        self.assertEqual(mlp._fc2, sublayers[1])
+        self.assertEqual(mlp._linear1, sublayers[0])
+        self.assertEqual(mlp._linear2, sublayers[1])
         self.assertEqual(len(sublayers), 2)
 
     def test_dygraph_vs_static(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py
index dbd5296e5f1..171687283bc 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py
@@ -20,17 +20,17 @@ import numpy as np
 
 
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
-        super(MLP, self).__init__(name_scope)
-        self._fc1 = fluid.dygraph.FC(
-            self.full_name(),
+    def __init__(self, input_size):
+        super(MLP, self).__init__()
+        self._linear1 = fluid.dygraph.Linear(
+            input_size,
             3,
             param_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)),
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)))
-        self._fc2 = fluid.dygraph.FC(
-            self.full_name(),
+        self._linear2 = fluid.dygraph.Linear(
+            3,
             4,
             param_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)),
@@ -38,8 +38,8 @@ class MLP(fluid.Layer):
                 initializer=fluid.initializer.Constant(value=0.1)))
 
     def forward(self, inputs):
-        x = self._fc1(inputs)
-        x = self._fc2(x)
+        x = self._linear1(inputs)
+        x = self._linear2(x)
         x = fluid.layers.reduce_sum(x)
         return x
 
@@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase):
         trace_var = 0
         alive_var = 0
         with fluid.dygraph.guard():
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
             for i in range(10):
                 var_inp = fluid.dygraph.base.to_variable(np_inp)
                 out = mlp(var_inp)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index d68d362f0be..78ad00fb9a7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope
 
 
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
-        super(MLP, self).__init__(name_scope)
-        self._fc1 = fluid.dygraph.FC(
-            self.full_name(),
+    def __init__(self, input_size):
+        super(MLP, self).__init__()
+        self._linear1 = fluid.dygraph.Linear(
+            input_size,
             3,
             param_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)),
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)))
-        self._fc2 = fluid.dygraph.FC(
-            self.full_name(),
+        self._linear2 = fluid.dygraph.Linear(
+            3,
             4,
             param_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.1)),
@@ -39,8 +39,8 @@ class MLP(fluid.Layer):
                 initializer=fluid.initializer.Constant(value=0.1)))
 
     def forward(self, inputs):
-        x = self._fc1(inputs)
-        x = self._fc2(x)
+        x = self._linear1(inputs)
+        x = self._linear2(x)
         x = fluid.layers.reduce_sum(x)
         return x
 
@@ -48,7 +48,7 @@ class MLP(fluid.Layer):
 class TestDygraphFramework(unittest.TestCase):
     def test_dygraph_backward(self):
         with new_program_scope():
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
             var_inp = fluid.layers.data(
                 "input", shape=[2, 2], dtype="float32", append_batch_size=False)
             out = mlp(var_inp)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
index ed721503a14..5e3d3c81188 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase):
         with fluid.dygraph.guard():
             x = np.random.randn(2, 4, 5).astype("float32")
             x = fluid.dygraph.to_variable(x)
-            fc1 = fluid.dygraph.FC("fc1", 10, num_flatten_dims=2)
-            fc2 = fluid.dygraph.FC("fc2", 10, num_flatten_dims=2)
+            linear1 = fluid.dygraph.Linear(5, 10)
+            linear2 = fluid.dygraph.Linear(5, 10)
 
-            y = fc1(x[:, :2])
-            z = fc2(x[:, 2:])
+            y = linear1(x[:, :2])
+            z = linear2(x[:, 2:])
             loss = fluid.layers.reduce_mean(y)
             loss.backward()
 
-            for param in fc1.parameters():
+            for param in linear1.parameters():
                 self.assertIsNotNone(param._grad_ivar())
 
-            for param in fc2.parameters():
+            for param in linear2.parameters():
                 self.assertIsNone(param._grad_ivar())
 
             optimizer = fluid.optimizer.AdamOptimizer(parameter_list=(
-                fc1.parameters() + fc2.parameters()))
+                linear1.parameters() + linear2.parameters()))
             _, params_grads = optimizer.minimize(loss)
 
             self.assertListEqual(
-                sorted([p.name for p in fc1.parameters()]),
+                sorted([p.name for p in linear1.parameters()]),
                 sorted([p_g[0].name for p_g in params_grads]))
 
-            fc1.clear_gradients()
-            fc2.clear_gradients()
+            linear1.clear_gradients()
+            linear2.clear_gradients()
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 983fe23f448..735ec4d3f1e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -23,18 +23,18 @@ import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
 import paddle.fluid.dygraph.nn as nn
 from paddle.fluid.dygraph.base import to_variable
 from test_imperative_base import new_program_scope
 
 
 class Policy(fluid.dygraph.Layer):
-    def __init__(self, name_scope):
-        super(Policy, self).__init__(name_scope)
+    def __init__(self, input_size):
+        super(Policy, self).__init__()
 
-        self.affine1 = nn.FC(self.full_name(), size=128)
-        self.affine2 = nn.FC(self.full_name(), size=2)
+        self.affine1 = nn.Linear(input_size, 128)
+        self.affine2 = nn.Linear(128, 2)
         self.dropout_ratio = 0.6
 
         self.saved_log_probs = []
@@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase):
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
 
-            policy = Policy("PolicyModel")
+            policy = Policy(input_size=4)
 
             dy_state = fluid.dygraph.base.to_variable(state)
             dy_state.stop_gradient = True
@@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase):
             exe = fluid.Executor(fluid.CPUPlace(
             ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
 
-            policy = Policy("PolicyModel")
+            policy = Policy(input_size=4)
 
             st_sgd = SGDOptimizer(learning_rate=1e-3)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 3d2868a9765..01327ac647f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer):
 
 class PtbModel(fluid.Layer):
     def __init__(self,
-                 name_scope,
                  hidden_size,
                  vocab_size,
                  num_layers=2,
                  num_steps=20,
                  init_scale=0.1,
                  dropout=None):
-        super(PtbModel, self).__init__(name_scope)
+        super(PtbModel, self).__init__()
         self.hidden_size = hidden_size
         self.vocab_size = vocab_size
         self.init_scale = init_scale
@@ -160,7 +159,18 @@ class PtbModel(fluid.Layer):
                 initializer=fluid.initializer.UniformInitializer(
                     low=-init_scale, high=init_scale)))
 
-        self.out_project = Linear(self.hidden_size, self.vocab_size)
+        self.softmax_weight = self.create_parameter(
+            attr=fluid.ParamAttr(),
+            shape=[self.hidden_size, self.vocab_size],
+            dtype="float32",
+            default_initializer=fluid.initializer.UniformInitializer(
+                low=-self.init_scale, high=self.init_scale))
+        self.softmax_bias = self.create_parameter(
+            attr=fluid.ParamAttr(),
+            shape=[self.vocab_size],
+            dtype="float32",
+            default_initializer=fluid.initializer.UniformInitializer(
+                low=-self.init_scale, high=self.init_scale))
 
     def forward(self, input, label, init_hidden, init_cell):
         init_h = fluid.layers.reshape(
@@ -182,7 +192,8 @@ class PtbModel(fluid.Layer):
         rnn_out = fluid.layers.reshape(
             rnn_out, shape=[-1, self.num_steps, self.hidden_size])
 
-        projection = self.out_project(rnn_out)
+        projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
+        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
         projection = fluid.layers.reshape(
             projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
@@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
@@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
             fluid.default_main_program().random_seed = seed
             # TODO: marsyang1993 Change seed to
             ptb_model = PtbModel(
-                "ptb_model",
                 hidden_size=hidden_size,
                 vocab_size=vocab_size,
                 num_layers=num_layers,
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 44e6d8a8c35..fa345be1ff0 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase):
 class TestLayer(LayerTest):
     def test_custom_layer_with_kwargs(self):
         class CustomLayer(fluid.Layer):
-            def __init__(self, name_scope, fc1_size=4):
-                super(CustomLayer, self).__init__(name_scope)
-                self.fc1 = nn.FC('fc1',
-                                 size=fc1_size,
-                                 bias_attr=False,
-                                 num_flatten_dims=1)
-                self.fc2 = nn.FC('fc2',
-                                 size=1,
-                                 bias_attr=False,
-                                 num_flatten_dims=1)
-
-            def forward(self, x, do_fc2=False):
-                ret = self.fc1(x)
-                if do_fc2:
-                    ret = self.fc2(ret)
+            def __init__(self, input_size, linear1_size=4):
+                super(CustomLayer, self).__init__()
+                self.linear1 = nn.Linear(
+                    input_size, linear1_size, bias_attr=False)
+                self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False)
+
+            def forward(self, x, do_linear2=False):
+                ret = self.linear1(x)
+                if do_linear2:
+                    ret = self.linear2(ret)
                 return ret
 
         with self.dynamic_graph():
             inp = np.ones([3, 3], dtype='float32')
             x = base.to_variable(inp)
-            custom = CustomLayer('custom', fc1_size=2)
-            ret = custom(x, do_fc2=False)
+            custom = CustomLayer(input_size=3, linear1_size=2)
+            ret = custom(x, do_linear2=False)
             self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2]))
-            ret = custom(x, do_fc2=True)
+            ret = custom(x, do_linear2=True)
             self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1]))
 
     def test_linear(self):
@@ -133,112 +128,6 @@ class TestLayer(LayerTest):
 
         self.assertTrue(np.array_equal(static_ret, dy_ret_value))
 
-        inp = np.ones([3, 32], dtype='float32')
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            linear = nn.Linear(32, 4, bias_attr=False)
-            dy_ret = linear(t)
-            dy_ret_value = dy_ret.numpy()
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            fc = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            dy_ret2 = fc(t)
-            dy_ret_value2 = dy_ret2.numpy()
-        self.assertTrue(np.array_equal(dy_ret_value, dy_ret_value2))
-
-    def test_fc(self):
-        inp = np.ones([3, 32, 32], dtype='float32')
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False)
-            ret = layers.fc(t, size=4, bias_attr=False, num_flatten_dims=1)
-            ret2 = layers.fc(ret, size=4)
-            static_ret = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret2])[0]
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False)
-            fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            fc2 = nn.FC('fc2', size=4)
-            ret = fc1(t)
-            ret2 = fc2(ret)
-            static_ret2 = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret2])[0]
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            fc2 = nn.FC('fc2', size=4)
-            ret = fc1(t)
-            dy_ret = fc2(ret)
-            dy_ret_value = dy_ret.numpy()
-
-        self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(static_ret, dy_ret_value))
-
-        with self.dynamic_graph():
-            custom_weight = np.random.randn(1024, 4).astype("float32")
-            weight_attr1 = fluid.ParamAttr(
-                initializer=fluid.initializer.NumpyArrayInitializer(
-                    custom_weight))
-            fc1 = fluid.dygraph.FC("fc1",
-                                   4,
-                                   num_flatten_dims=1,
-                                   param_attr=weight_attr1)
-            out1 = fc1(base.to_variable(inp))
-            loss1 = fluid.layers.reduce_mean(out1)
-
-            fc1_weight_init = fc1.weight.detach()
-            fc1_bias_init = fc1.bias.detach()
-
-            loss1.backward()
-            optimizer1 = fluid.optimizer.SGD(learning_rate=0.1,
-                                             parameter_list=fc1.parameters())
-            optimizer1.minimize(loss1)
-
-            fc1_weight_updated = fc1.weight.detach()
-
-        with self.dynamic_graph():
-            weight_attr2 = fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform())
-            fc2 = fluid.dygraph.FC("fc2",
-                                   4,
-                                   num_flatten_dims=1,
-                                   param_attr=weight_attr2)
-            out2 = fc2(base.to_variable(inp))
-
-            self.assertFalse(
-                np.array_equal(fc1_weight_init.numpy(), fc2.weight.numpy()))
-            self.assertFalse(np.array_equal(out1.numpy(), out2.numpy()))
-
-            mismatched_weight = np.random.randn(4, 4).astype("float32")
-            with self.assertRaises(AssertionError):
-                fc2.weight.set_value(mismatched_weight)
-            fc2.weight.set_value(fc1_weight_init)
-            fc2.bias.set_value(fc1_bias_init)
-
-            out2 = fc2(base.to_variable(inp))
-            loss2 = fluid.layers.reduce_mean(out2)
-            loss2.backward()
-            optimizer2 = fluid.optimizer.SGD(learning_rate=0.1,
-                                             parameter_list=fc2.parameters())
-            optimizer2.minimize(loss2)
-
-            self.assertTrue(
-                np.array_equal(fc2.weight.numpy(), fc1_weight_updated.numpy()))
-            self.assertTrue(np.array_equal(out1.numpy(), out2.numpy()))
-
-            fc2.weight = fc1.weight
-            fc2.bias = fc1.bias
-            self.assertTrue(
-                np.array_equal(fc2.weight.numpy(), fc1.weight.numpy()))
-            self.assertTrue(np.array_equal(fc2.bias.numpy(), fc1.bias.numpy()))
-
     def test_layer_norm(self):
         inp = np.ones([3, 32, 32], dtype='float32')
         with self.static_graph():
-- 
GitLab