Remove FC in dygraph, modify FC to Linear in sample code (#22082)

* modify fc to linear in sample code, test=develop * remove FC, test=develop * remove warnings, test=develop * drop fluid/imperative/README.md , test=develop * change fc to linear, test=develop * polish code style, test=develop

Remove FC in dygraph, modify FC to Linear in sample code (#22082)
* modify fc to linear in sample code, test=develop * remove FC, test=develop * remove warnings, test=develop * drop fluid/imperative/README.md , test=develop * change fc to linear, test=develop * polish code style, test=develop
cf475f95 · zhongpu · hong · 64a40442 · cf475f95 · cf475f95
19 changed file
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) {
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                    data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                    print(x.numpy())
       )DOC")
@@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) {
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                    data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                    y = x.detach()
       )DOC")

--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -84,12 +84,12 @@ def _no_grad_(func):
        @fluid.dygraph.no_grad
        def test_layer():
            with fluid.dygraph.guard():
-                inp = np.ones([3, 32, 32], dtype='float32')
+                inp = np.ones([3, 1024], dtype='float32')
                t = fluid.dygraph.base.to_variable(inp)
-                fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
+                linear1 = fluid.Linear(1024, 4, bias_attr=False)
-                fc2 = fluid.FC('fc2', size=4)
+                linear2 = fluid.Linear(4, 4)
-                ret = fc1(t)
+                ret = linear1(t)
-                dy_ret = fc2(ret)
+                dy_ret = linear2(ret)
        test_layer()
@@ -127,12 +127,12 @@ def guard(place=None):
        import paddle.fluid as fluid
        with fluid.dygraph.guard():
-            inp = np.ones([3, 32, 32], dtype='float32')
+            inp = np.ones([3, 1024], dtype='float32')
            t = fluid.dygraph.base.to_variable(inp)
-            fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
+            linear1 = fluid.Linear(1024, 4, bias_attr=False)
-            fc2 = fluid.FC('fc2', size=4)
+            linear2 = fluid.Linear(4, 4)
-            ret = fc1(t)
+            ret = linear1(t)
-            dy_ret = fc2(ret)
+            dy_ret = linear2(ret)
    """
    train = framework.Program()

--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -29,10 +29,9 @@ import numbers
 import logging
 __all__ = [
-    'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'Linear', 'BatchNorm', 'Embedding',
+    'Conv2D', 'Conv3D', 'Pool2D', 'Linear', 'BatchNorm', 'Embedding', 'GRUUnit',
-    'GRUUnit', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct',
+    'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose',
-    'Conv2DTranspose', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm',
+    'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv'
-    'TreeConv'
 ]
@@ -865,7 +864,7 @@ class Linear(layers.Layer):
    where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
-    Different from FC layer, Linear layer takes only one ``Tensor`` input.
+    Linear layer takes only one ``Tensor`` input.
    The Linear layer multiplies input tensor with weight matrix and
    produces an output Tensor of shape [N, *, `output_dim`],
    where N is batch size and `*` means any number of additional dimensions.
@@ -959,221 +958,6 @@ class Linear(layers.Layer):
        return self._helper.append_activation(pre_activation, act=self._act)
-class FC(layers.Layer):
-    """
-    This interface is used to construct a callable object of the ``FC`` class.
-    For more details, refer to code examples.
-    It creates a fully connected layer in the network. It can take
-    one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
-    which represents a fully connected weight matrix from each input unit to
-    each output unit. The fully connected layer multiplies each input tensor
-    with its corresponding weight to produce an output Tensor with shape [N, `size`],
-    where N is batch size. If multiple input tensors are given, the results of
-    multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
-    is not None, a bias variable will be created and added to the output.
-    Finally, if ``act`` is not None, it will be applied to the output as well.
-    When the input is single ``Tensor`` :
-    .. math::
-        Out = Act({XW + b})
-    When the input are multiple ``Tensor`` :
-    .. math::
-        Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
-    In the above equation:
-    * :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
-    * :math:`X_i`: The i-th input ``Tensor`` .
-    * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
-    * :math:`b`: The bias parameter created by this layer (if needed).
-    * :math:`Act`: The activation function.
-    * :math:`Out`: The output ``Tensor`` .
-    See below for an example.
-    .. code-block:: text
-        Given:
-            data_1.data = [[[0.1, 0.2]]]
-            data_1.shape = (1, 1, 2) # 1 is batch_size
-            data_2.data = [[[0.1, 0.2, 0.3]]]
-            data_2.shape = (1, 1, 3) # 1 is batch_size
-            fc = FC("fc", 2, num_flatten_dims=2)
-            out = fc(input=[data_1, data_2])
-        Then:
-            out.data = [[[0.182996 -0.474117]]]
-            out.shape = (1, 1, 2)
-    Parameters:
-        name_scope(str): The name of this class.
-        size(int): The number of output units in this layer.
-        num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
-            two dimensions. If this happens, the multi-dimension tensor will first be flattened
-            into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
-            tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
-            dimensions will be flatten to form the first dimension of the final matrix (height of
-            the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
-            form the second dimension of the final matrix (width of the matrix). For example, suppose
-            `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
-            Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
-        param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
-            weights(Parameter) of this layer. Default: None.
-        bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
-            of this layer. If it is set to False, no bias will be added to the output units.
-            If it is set to None, the bias is initialized zero. Default: None.
-        act (str, optional): Activation to be applied to the output of this layer. Default: None.
-        is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
-        dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
-    Attribute:
-        **weight** (list of Parameter): the learnable weights of this layer.
-        **bias** (Parameter or None): the learnable bias of this layer.
-    Returns:
-        None
-    Examples:
-        .. code-block:: python
-          from paddle.fluid.dygraph.base import to_variable
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph import FC
-          import numpy as np
-          data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
-          with fluid.dygraph.guard():
-              fc = FC("fc", 64, num_flatten_dims=2)
-              data = to_variable(data)
-              conv = fc(data)
-    """
-    def __init__(self,
-                 name_scope,
-                 size,
-                 num_flatten_dims=1,
-                 param_attr=None,
-                 bias_attr=None,
-                 act=None,
-                 is_test=False,
-                 dtype="float32"):
-        super(FC, self).__init__(name_scope, dtype)
-        self._size = size
-        self._num_flatten_dims = num_flatten_dims
-        self._dtype = dtype
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self.__w = list()
-    def _build_once(self, input):
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            input_shape = inp.shape
-            param_shape = [
-                reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
-                       1)
-            ] + [self._size]
-            self.__w.append(
-                self.add_parameter(
-                    '_w%d' % i,
-                    self.create_parameter(
-                        attr=param,
-                        shape=param_shape,
-                        dtype=self._dtype,
-                        is_bias=False)))
-            i += 1
-        size = list([self._size])
-        self._b = self.create_parameter(
-            attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
-    # TODO(songyouwei): We should remove _w property
-    @property
-    def _w(self, i=0):
-        return self.__w[i]
-    @_w.setter
-    def _w(self, value, i=0):
-        assert isinstance(self.__w[i], Variable)
-        self.__w[i].set_value(value)
-    @property
-    def weight(self):
-        if len(self.__w) > 1:
-            return self.__w
-        else:
-            return self.__w[0]
-    @weight.setter
-    def weight(self, value):
-        if len(self.__w) == 1:
-            self.__w[0] = value
-    @property
-    def bias(self):
-        return self._b
-    @bias.setter
-    def bias(self, value):
-        self._b = value
-    def forward(self, input):
-        mul_results = list()
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            tmp = self._helper.create_variable_for_type_inference(self._dtype)
-            self._helper.append_op(
-                type="mul",
-                inputs={"X": inp,
-                        "Y": self.__w[i]},
-                outputs={"Out": tmp},
-                attrs={
-                    "x_num_col_dims": self._num_flatten_dims,
-                    "y_num_col_dims": 1
-                })
-            i += 1
-            mul_results.append(tmp)
-        if len(mul_results) == 1:
-            pre_bias = mul_results[0]
-        else:
-            pre_bias = self._helper.create_variable_for_type_inference(
-                self._dtype)
-            self._helper.append_op(
-                type="sum",
-                inputs={"X": mul_results},
-                outputs={"Out": pre_bias},
-                attrs={"use_mkldnn": False})
-        if self._b:
-            pre_activation = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype)
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias],
-                        'Y': [self._b]},
-                outputs={'Out': [pre_activation]},
-                attrs={'axis': self._num_flatten_dims})
-        else:
-            pre_activation = pre_bias
-        # Currently, we don't support inplace in dygraph mode
-        return self._helper.append_activation(pre_activation, act=self._act)
 class BatchNorm(layers.Layer):
    """
    This interface is used to construct a callable object of the ``BatchNorm`` class.

--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -97,7 +97,7 @@ class DataParallel(layers.Layer):
           import paddle.fluid as fluid
           import paddle.fluid.dygraph as dygraph
           from paddle.fluid.optimizer import AdamOptimizer
-           from paddle.fluid.dygraph.nn import FC
+           from paddle.fluid.dygraph.nn import Linear
           from paddle.fluid.dygraph.base import to_variable
           place = fluid.CUDAPlace(0)
@@ -106,28 +106,28 @@ class DataParallel(layers.Layer):
               # prepare the data parallel context
               strategy=dygraph.parallel.prepare_context()
-               fc_layer = FC("FC", 10, act="softmax")
+               linear = Linear(1, 10, act="softmax")
               adam = fluid.optimizer.AdamOptimizer()
               # make the module become the data parallelism module
-               fc_layer = dygraph.parallel.DataParallel(fc_layer, strategy)
+               linear = dygraph.parallel.DataParallel(linear, strategy)
               x_data = np.random.random(size=[10, 1]).astype(np.float32)
               data = to_variable(x_data)
-               hidden = fc_layer(data)
+               hidden = linear(data)
               avg_loss = fluid.layers.mean(hidden)
               # scale the loss according to the number of trainers.
-               avg_loss = fc_layer.scale_loss(avg_loss)
+               avg_loss = linear.scale_loss(avg_loss)
               avg_loss.backward()
               # collect the gradients of trainers.
-               fc_layer.apply_collective_grads()
+               linear.apply_collective_grads()
               adam.minimize(avg_loss)
-               fc_layer.clear_gradients()
+               linear.clear_gradients()
    Args:
        layers(Layer): The module that should be executed by data parallel.

--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -39,17 +39,17 @@ def monkey_patch_varbase():
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
-                data = np.ones([3, 32, 32], dtype='float32')
+                data = np.ones([3, 1024], dtype='float32')
                with fluid.dygraph.guard():
-                    fc = fluid.dygraph.FC("fc", 4)
+                    linear = fluid.dygraph.Linear(1024, 4)
                    t = to_variable(data)
-                    fc(t)  # call with default weight
+                    linear(t)  # call with default weight
                    custom_weight = np.random.randn(1024, 4).astype("float32")
-                    fc.weight.set_value(custom_weight)  # change existing weight
+                    linear.weight.set_value(custom_weight)  # change existing weight
-                    out = fc(t)  # call with different weight
+                    out = linear(t)  # call with different weight
        """
        assert isinstance(value, (np.ndarray, core.VarBase)), \

--- a/python/paddle/fluid/dygraph_grad_clip.py
+++ b/python/paddle/fluid/dygraph_grad_clip.py
@@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase):
            import paddle.fluid as fluid
            from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
            from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
@@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase):
                init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
                loss = fluid.layers.reduce_mean( out )
@@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase):
            import paddle.fluid as fluid
            from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
            from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
@@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase):
                init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
                loss = fluid.layers.reduce_mean( out )
@@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase):
            import paddle.fluid as fluid
            from paddle.fluid.dygraph.base import to_variable
-            from paddle.fluid.dygraph.nn import FC
+            from paddle.fluid.dygraph.nn import Linear
            from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
@@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase):
                init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
-                fc = FC( "fc", 10)
+                linear = Linear( 10, 10)
-                out = fc( to_variable(init_value) )
+                out = linear( to_variable(init_value) )
                loss = fluid.layers.reduce_mean( out )

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -959,14 +959,14 @@ class Variable(object):
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                    data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                    y = x.detach()
        """
@@ -991,14 +991,14 @@ class Variable(object):
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
                with fluid.dygraph.guard():
-                    fc = FC("fc", 64, num_flatten_dims=2)
+                    linear = Linear(32, 64)
                    data = to_variable(data)
-                    x = fc(data)
+                    x = linear(data)
                    print(x.numpy())
        """
@@ -1020,17 +1020,17 @@ class Variable(object):
                import paddle.fluid as fluid
                from paddle.fluid.dygraph.base import to_variable
-                from paddle.fluid.dygraph import FC
+                from paddle.fluid.dygraph import Linear
                import numpy as np
-                data = np.ones([3, 32, 32], dtype='float32')
+                data = np.ones([3, 1024], dtype='float32')
                with fluid.dygraph.guard():
-                    fc = fluid.dygraph.FC("fc", 4)
+                    linear = fluid.dygraph.Linear(1024, 4)
                    t = to_variable(data)
-                    fc(t)  # call with default weight
+                    linear(t)  # call with default weight
                    custom_weight = np.random.randn(1024, 4).astype("float32")
-                    fc.weight.set_value(custom_weight)  # change existing weight
+                    linear.weight.set_value(custom_weight)  # change existing weight
-                    out = fc(t)  # call with different weight
+                    out = linear(t)  # call with different weight
        """
        pass
@@ -1223,18 +1223,18 @@ class Variable(object):
                value0 = np.arange(26).reshape(2, 13).astype("float32")
                value1 = np.arange(6).reshape(2, 3).astype("float32")
                value2 = np.arange(10).reshape(2, 5).astype("float32")
-                fc = fluid.FC("fc1", size=5, dtype="float32")
+                linear = fluid.Linear(13, 5, dtype="float32")
-                fc2 = fluid.FC("fc2", size=3, dtype="float32")
+                linear2 = fluid.Linear(3, 3, dtype="float32")
                a = fluid.dygraph.to_variable(value0)
                b = fluid.dygraph.to_variable(value1)
                c = fluid.dygraph.to_variable(value2)
-                out1 = fc(a)
+                out1 = linear(a)
-                out2 = fc2(b)
+                out2 = linear2(b)
                out1.stop_gradient = True
                out = fluid.layers.concat(input=[out1, out2, c], axis=1)
                out.backward()
-                assert (fc._w.gradient() == 0).all()
+                assert (linear.weight.gradient() == 0).all()
                assert (out1.gradient() == 0).all()
        """
        if in_dygraph_mode():

--- a/python/paddle/fluid/install_check.py
+++ b/python/paddle/fluid/install_check.py
@@ -30,14 +30,15 @@ __all__ = ['run_check']
 class SimpleLayer(Layer):
-    def __init__(self, name_scope):
+    def __init__(self, input_size):
-        super(SimpleLayer, self).__init__(name_scope)
+        super(SimpleLayer, self).__init__()
-        self._fc1 = nn.FC(self.full_name(),
+        self._linear1 = nn.Linear(
-                          3,
+            input_size,
-                          param_attr=ParamAttr(initializer=Constant(value=0.1)))
+            3,
+            param_attr=ParamAttr(initializer=Constant(value=0.1)))
    def forward(self, inputs):
-        x = self._fc1(inputs)
+        x = self._linear1(inputs)
        x = layers.reduce_sum(x)
        return x
@@ -79,7 +80,7 @@ def run_check():
                    build_strategy = compiler.BuildStrategy()
                    build_strategy.enable_inplace = True
                    inp = layers.data(name="inp", shape=[2, 2])
-                    simple_layer = SimpleLayer("simple_layer")
+                    simple_layer = SimpleLayer(input_size=2)
                    out = simple_layer(inp)
                    exe = executor.Executor(
                        core.CUDAPlace(0) if core.is_compiled_with_cuda() and
@@ -108,10 +109,11 @@ def run_check():
                with unique_name.guard():
                    inp0 = layers.data(
                        name="inp", shape=[2, 2], append_batch_size=False)
-                    simple_layer0 = SimpleLayer("simple_layer")
+                    simple_layer0 = SimpleLayer(input_size=2)
                    out0 = simple_layer0(inp0)
                    param_grads = backward.append_backward(
-                        out0, parameter_list=[simple_layer0._fc1._w.name])[0]
+                        out0,
+                        parameter_list=[simple_layer0._linear1.weight.name])[0]
                    exe0 = executor.Executor(
                        core.CUDAPlace(0) if core.is_compiled_with_cuda() and
                        (core.get_cuda_device_count() > 0) else core.CPUPlace())

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -3002,7 +3002,7 @@ def layer_norm(input,
            print(output)
    """
    assert in_dygraph_mode(
-    ) is not True, "please use FC instead of fc in dygraph mode!"
+    ) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
    helper = LayerHelper('layer_norm', **locals())
    dtype = helper.input_dtype()

--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -17,8 +17,7 @@ from __future__ import print_function
 import numpy as np
 import paddle.fluid as fluid
-from paddle.fluid import FC
+from paddle.fluid.dygraph import Linear
-from paddle.fluid.dygraph import FC
 from paddle.fluid.dygraph.base import to_variable
 import unittest
@@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase):
    def no_detach_multi(self):
        data = self.generate_Data()
        with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
+            linear = Linear(
-                    10,
+                4,
-                    num_flatten_dims=1,
+                10,
-                    param_attr=fc_w_param_attrs,
+                param_attr=linear_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
+                bias_attr=linear_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear1_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
+            linear1 = Linear(
-                     1,
+                10,
-                     num_flatten_dims=1,
+                1,
-                     param_attr=fc1_w_param_attrs,
+                param_attr=linear1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
+                bias_attr=linear1_b_param_attrs)
-            fc2_w_param_attrs = fluid.ParamAttr(
+            linear2_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(9.0))
-            fc2_b_param_attrs = fluid.ParamAttr(
+            linear2_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(10.0))
-            fc2 = FC("fc",
+            linear2 = Linear(
-                     1,
+                10,
-                     num_flatten_dims=1,
+                1,
-                     param_attr=fc2_w_param_attrs,
+                param_attr=linear2_w_param_attrs,
-                     bias_attr=fc2_b_param_attrs)
+                bias_attr=linear2_b_param_attrs)
            data = to_variable(data)
-            x = fc(data)
+            x = linear(data)
-            x1 = fc1(x)
+            x1 = linear1(x)
-            x2 = fc2(x)
+            x2 = linear2(x)
            loss = x1 + x2
            # print(loss, loss.shape)
            loss.backward()
@@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase):
    def no_detach_single(self):
        data = self.generate_Data()
        with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
+            linear = Linear(
-                    10,
+                4,
-                    num_flatten_dims=1,
+                10,
-                    param_attr=fc_w_param_attrs,
+                param_attr=linear_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
+                bias_attr=linear_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear1_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
+            linear1 = Linear(
-                     1,
+                10,
-                     num_flatten_dims=1,
+                1,
-                     param_attr=fc1_w_param_attrs,
+                param_attr=linear1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
+                bias_attr=linear1_b_param_attrs)
            data = to_variable(data)
-            x = fc(data)
+            x = linear(data)
-            x1 = fc1(x)
+            x1 = linear1(x)
            loss = x1
            # print(loss, loss.shape)
            loss.backward()
@@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase):
    def detach_multi(self):
        data = self.generate_Data()
        with fluid.dygraph.guard():
-            fc_w_param_attrs = fluid.ParamAttr(
+            linear_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(5.0))
-            fc_b_param_attrs = fluid.ParamAttr(
+            linear_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(6.0))
-            fc = FC("fc",
+            linear = Linear(
-                    10,
+                4,
-                    num_flatten_dims=1,
+                10,
-                    param_attr=fc_w_param_attrs,
+                param_attr=linear_w_param_attrs,
-                    bias_attr=fc_b_param_attrs)
+                bias_attr=linear_b_param_attrs)
-            fc1_w_param_attrs = fluid.ParamAttr(
+            linear1_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(7.0))
-            fc1_b_param_attrs = fluid.ParamAttr(
+            linear1_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(8.0))
-            fc1 = FC("fc",
+            linear1 = Linear(
-                     1,
+                10,
-                     num_flatten_dims=1,
+                1,
-                     param_attr=fc1_w_param_attrs,
+                param_attr=linear1_w_param_attrs,
-                     bias_attr=fc1_b_param_attrs)
+                bias_attr=linear1_b_param_attrs)
-            fc2_w_param_attrs = fluid.ParamAttr(
+            linear2_w_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(9.0))
-            fc2_b_param_attrs = fluid.ParamAttr(
+            linear2_b_param_attrs = fluid.ParamAttr(
                initializer=fluid.initializer.Constant(10.0))
-            fc2 = FC("fc",
+            linear2 = Linear(
-                     1,
+                10,
-                     num_flatten_dims=1,
+                1,
-                     param_attr=fc2_w_param_attrs,
+                param_attr=linear2_w_param_attrs,
-                     bias_attr=fc2_b_param_attrs)
+                bias_attr=linear2_b_param_attrs)
            data = to_variable(data)
-            x = fc(data)
+            x = linear(data)
            x_detach = x.detach()
-            x1 = fc1(x)
+            x1 = linear1(x)
-            x2 = fc2(x_detach)
+            x2 = linear2(x_detach)
            loss = x1 + x2
            # print(loss, loss.shape)
            loss.backward()

--- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
 class SimpleImgConvPool(fluid.dygraph.Layer):
@@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
 class MNIST(fluid.dygraph.Layer):
-    def __init__(self, name_scope, dtype="float32"):
+    def __init__(self, dtype="float32"):
-        super(MNIST, self).__init__(name_scope)
+        super(MNIST, self).__init__()
        self._simple_img_conv_pool_1 = SimpleImgConvPool(
            num_channels=3,
@@ -94,21 +94,23 @@ class MNIST(fluid.dygraph.Layer):
            dtype=dtype,
            use_cudnn=True)
-        pool_2_shape = 50 * 4 * 4
+        self.pool_2_shape = 50 * 53 * 53
        SIZE = 10
-        scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
+        scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
-        self._fc = FC(self.full_name(),
+        self._linear = Linear(
-                      10,
+            self.pool_2_shape,
-                      param_attr=fluid.param_attr.ParamAttr(
+            10,
-                          initializer=fluid.initializer.NormalInitializer(
+            param_attr=fluid.param_attr.ParamAttr(
-                              loc=0.0, scale=scale)),
+                initializer=fluid.initializer.NormalInitializer(
-                      act="softmax",
+                    loc=0.0, scale=scale)),
-                      dtype=dtype)
+            act="softmax",
+            dtype=dtype)
    def forward(self, inputs, label):
        x = self._simple_img_conv_pool_1(inputs)
        x = self._simple_img_conv_pool_2(x)
-        cost = self._fc(x)
+        x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
+        cost = self._linear(x)
        loss = fluid.layers.cross_entropy(cost, label)
        avg_loss = fluid.layers.mean(loss)
        return avg_loss
@@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase):
        x = np.random.randn(1, 3, 224, 224).astype("float16")
        y = np.random.randn(1, 1).astype("int64")
        with fluid.dygraph.guard(fluid.CUDAPlace(0)):
-            model = MNIST("mnist", dtype="float16")
+            model = MNIST(dtype="float16")
            x = fluid.dygraph.to_variable(x)
            y = fluid.dygraph.to_variable(y)
            loss = model(x, y)

--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -18,7 +18,7 @@ import numpy as np
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid import FC
+from paddle.fluid import Linear
 from test_imperative_base import new_program_scope
@@ -35,24 +35,26 @@ class MyLayer(fluid.Layer):
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
+    def __init__(self, input_size):
-        super(MLP, self).__init__(name_scope)
+        super(MLP, self).__init__()
-        self._fc1 = FC(self.full_name(),
+        self._linear1 = Linear(
-                       3,
+            input_size,
-                       param_attr=fluid.ParamAttr(
+            3,
-                           initializer=fluid.initializer.Constant(value=0.1)),
+            param_attr=fluid.ParamAttr(
-                       bias_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)),
-                           initializer=fluid.initializer.Constant(value=0.1)))
+            bias_attr=fluid.ParamAttr(
-        self._fc2 = FC(self.full_name(),
+                initializer=fluid.initializer.Constant(value=0.1)))
-                       4,
+        self._linear2 = Linear(
-                       param_attr=fluid.ParamAttr(
+            3,
-                           initializer=fluid.initializer.Constant(value=0.1)),
+            4,
-                       bias_attr=fluid.ParamAttr(
+            param_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Constant(value=0.1)))
+                initializer=fluid.initializer.Constant(value=0.1)),
+            bias_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Constant(value=0.1)))
    def forward(self, inputs):
-        x = self._fc1(inputs)
+        x = self._linear1(inputs)
-        x = self._fc2(x)
+        x = self._linear2(x)
        x = fluid.layers.reduce_sum(x)
        return x
@@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase):
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
        with fluid.dygraph.guard():
            var_inp = fluid.dygraph.base.to_variable(np_inp)
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
            out = mlp(var_inp)
            dy_out = out.numpy()
            out.backward()
-            dy_grad = mlp._fc1.weight.gradient()
+            dy_grad = mlp._linear1.weight.gradient()
        with fluid.dygraph.guard():
            var_inp2 = fluid.dygraph.base.to_variable(np_inp)
-            mlp2 = MLP("mlp")
+            mlp2 = MLP(input_size=2)
            out2 = mlp2(var_inp2)
            dy_out2 = out2.numpy()
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            out2.backward(backward_strategy)
-            dy_grad2 = mlp2._fc1.weight.gradient()
+            dy_grad2 = mlp2._linear1.weight.gradient()
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[2, 2], append_batch_size=False)
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
            out = mlp(inp)
            param_grads = fluid.backward.append_backward(
-                out, parameter_list=[mlp._fc1.weight.name])[0]
+                out, parameter_list=[mlp._linear1.weight.name])[0]
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(fluid.default_startup_program())
@@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase):
        self.assertTrue(np.allclose(dy_grad2, static_grad))
        params = mlp.parameters(True)
-        self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name)
+        self.assertEqual("linear_0.w_0", params[0].name)
-        self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name)
+        self.assertEqual("linear_0.b_0", params[1].name)
-        self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name)
+        self.assertEqual("linear_1.w_0", params[2].name)
-        self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name)
+        self.assertEqual("linear_1.b_0", params[3].name)
        self.assertEqual(len(params), 4)
        sublayers = mlp.sublayers(True)
-        self.assertEqual(mlp._fc1, sublayers[0])
+        self.assertEqual(mlp._linear1, sublayers[0])
-        self.assertEqual(mlp._fc2, sublayers[1])
+        self.assertEqual(mlp._linear2, sublayers[1])
        self.assertEqual(len(sublayers), 2)
    def test_dygraph_vs_static(self):

--- a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py
@@ -20,17 +20,17 @@ import numpy as np
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
+    def __init__(self, input_size):
-        super(MLP, self).__init__(name_scope)
+        super(MLP, self).__init__()
-        self._fc1 = fluid.dygraph.FC(
+        self._linear1 = fluid.dygraph.Linear(
-            self.full_name(),
+            input_size,
            3,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
-        self._fc2 = fluid.dygraph.FC(
+        self._linear2 = fluid.dygraph.Linear(
-            self.full_name(),
+            3,
            4,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
@@ -38,8 +38,8 @@ class MLP(fluid.Layer):
                initializer=fluid.initializer.Constant(value=0.1)))
    def forward(self, inputs):
-        x = self._fc1(inputs)
+        x = self._linear1(inputs)
-        x = self._fc2(x)
+        x = self._linear2(x)
        x = fluid.layers.reduce_sum(x)
        return x
@@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase):
        trace_var = 0
        alive_var = 0
        with fluid.dygraph.guard():
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
            for i in range(10):
                var_inp = fluid.dygraph.base.to_variable(np_inp)
                out = mlp(var_inp)

--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope
 class MLP(fluid.Layer):
-    def __init__(self, name_scope):
+    def __init__(self, input_size):
-        super(MLP, self).__init__(name_scope)
+        super(MLP, self).__init__()
-        self._fc1 = fluid.dygraph.FC(
+        self._linear1 = fluid.dygraph.Linear(
-            self.full_name(),
+            input_size,
            3,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
-        self._fc2 = fluid.dygraph.FC(
+        self._linear2 = fluid.dygraph.Linear(
-            self.full_name(),
+            3,
            4,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
@@ -39,8 +39,8 @@ class MLP(fluid.Layer):
                initializer=fluid.initializer.Constant(value=0.1)))
    def forward(self, inputs):
-        x = self._fc1(inputs)
+        x = self._linear1(inputs)
-        x = self._fc2(x)
+        x = self._linear2(x)
        x = fluid.layers.reduce_sum(x)
        return x
@@ -48,7 +48,7 @@ class MLP(fluid.Layer):
 class TestDygraphFramework(unittest.TestCase):
    def test_dygraph_backward(self):
        with new_program_scope():
-            mlp = MLP("mlp")
+            mlp = MLP(input_size=2)
            var_inp = fluid.layers.data(
                "input", shape=[2, 2], dtype="float32", append_batch_size=False)
            out = mlp(var_inp)

--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase):
        with fluid.dygraph.guard():
            x = np.random.randn(2, 4, 5).astype("float32")
            x = fluid.dygraph.to_variable(x)
-            fc1 = fluid.dygraph.FC("fc1", 10, num_flatten_dims=2)
+            linear1 = fluid.dygraph.Linear(5, 10)
-            fc2 = fluid.dygraph.FC("fc2", 10, num_flatten_dims=2)
+            linear2 = fluid.dygraph.Linear(5, 10)
-            y = fc1(x[:, :2])
+            y = linear1(x[:, :2])
-            z = fc2(x[:, 2:])
+            z = linear2(x[:, 2:])
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
-            for param in fc1.parameters():
+            for param in linear1.parameters():
                self.assertIsNotNone(param._grad_ivar())
-            for param in fc2.parameters():
+            for param in linear2.parameters():
                self.assertIsNone(param._grad_ivar())
            optimizer = fluid.optimizer.AdamOptimizer(parameter_list=(
-                fc1.parameters() + fc2.parameters()))
+                linear1.parameters() + linear2.parameters()))
            _, params_grads = optimizer.minimize(loss)
            self.assertListEqual(
-                sorted([p.name for p in fc1.parameters()]),
+                sorted([p.name for p in linear1.parameters()]),
                sorted([p_g[0].name for p_g in params_grads]))
-            fc1.clear_gradients()
+            linear1.clear_gradients()
-            fc2.clear_gradients()
+            linear2.clear_gradients()
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -23,18 +23,18 @@ import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
 import paddle.fluid.dygraph.nn as nn
 from paddle.fluid.dygraph.base import to_variable
 from test_imperative_base import new_program_scope
 class Policy(fluid.dygraph.Layer):
-    def __init__(self, name_scope):
+    def __init__(self, input_size):
-        super(Policy, self).__init__(name_scope)
+        super(Policy, self).__init__()
-        self.affine1 = nn.FC(self.full_name(), size=128)
+        self.affine1 = nn.Linear(input_size, 128)
-        self.affine2 = nn.FC(self.full_name(), size=2)
+        self.affine2 = nn.Linear(128, 2)
        self.dropout_ratio = 0.6
        self.saved_log_probs = []
@@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
-            policy = Policy("PolicyModel")
+            policy = Policy(input_size=4)
            dy_state = fluid.dygraph.base.to_variable(state)
            dy_state.stop_gradient = True
@@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase):
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
-            policy = Policy("PolicyModel")
+            policy = Policy(input_size=4)
            st_sgd = SGDOptimizer(learning_rate=1e-3)

--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer):
 class PtbModel(fluid.Layer):
    def __init__(self,
-                 name_scope,
                 hidden_size,
                 vocab_size,
                 num_layers=2,
                 num_steps=20,
                 init_scale=0.1,
                 dropout=None):
-        super(PtbModel, self).__init__(name_scope)
+        super(PtbModel, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.init_scale = init_scale
@@ -160,7 +159,18 @@ class PtbModel(fluid.Layer):
                initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale)))
-        self.out_project = Linear(self.hidden_size, self.vocab_size)
+        self.softmax_weight = self.create_parameter(
+            attr=fluid.ParamAttr(),
+            shape=[self.hidden_size, self.vocab_size],
+            dtype="float32",
+            default_initializer=fluid.initializer.UniformInitializer(
+                low=-self.init_scale, high=self.init_scale))
+        self.softmax_bias = self.create_parameter(
+            attr=fluid.ParamAttr(),
+            shape=[self.vocab_size],
+            dtype="float32",
+            default_initializer=fluid.initializer.UniformInitializer(
+                low=-self.init_scale, high=self.init_scale))
    def forward(self, input, label, init_hidden, init_cell):
        init_h = fluid.layers.reshape(
@@ -182,7 +192,8 @@ class PtbModel(fluid.Layer):
        rnn_out = fluid.layers.reshape(
            rnn_out, shape=[-1, self.num_steps, self.hidden_size])
-        projection = self.out_project(rnn_out)
+        projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
+        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
        projection = fluid.layers.reshape(
            projection, shape=[-1, self.vocab_size])
        loss = fluid.layers.softmax_with_cross_entropy(
@@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,
@@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(
-                "ptb_model",
                hidden_size=hidden_size,
                vocab_size=vocab_size,
                num_layers=num_layers,

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase):
 class TestLayer(LayerTest):
    def test_custom_layer_with_kwargs(self):
        class CustomLayer(fluid.Layer):
-            def __init__(self, name_scope, fc1_size=4):
+            def __init__(self, input_size, linear1_size=4):
-                super(CustomLayer, self).__init__(name_scope)
+                super(CustomLayer, self).__init__()
-                self.fc1 = nn.FC('fc1',
+                self.linear1 = nn.Linear(
-                                 size=fc1_size,
+                    input_size, linear1_size, bias_attr=False)
-                                 bias_attr=False,
+                self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False)
-                                 num_flatten_dims=1)
-                self.fc2 = nn.FC('fc2',
+            def forward(self, x, do_linear2=False):
-                                 size=1,
+                ret = self.linear1(x)
-                                 bias_attr=False,
+                if do_linear2:
-                                 num_flatten_dims=1)
+                    ret = self.linear2(ret)
-            def forward(self, x, do_fc2=False):
-                ret = self.fc1(x)
-                if do_fc2:
-                    ret = self.fc2(ret)
                return ret
        with self.dynamic_graph():
            inp = np.ones([3, 3], dtype='float32')
            x = base.to_variable(inp)
-            custom = CustomLayer('custom', fc1_size=2)
+            custom = CustomLayer(input_size=3, linear1_size=2)
-            ret = custom(x, do_fc2=False)
+            ret = custom(x, do_linear2=False)
            self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2]))
-            ret = custom(x, do_fc2=True)
+            ret = custom(x, do_linear2=True)
            self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1]))
    def test_linear(self):
@@ -133,112 +128,6 @@ class TestLayer(LayerTest):
        self.assertTrue(np.array_equal(static_ret, dy_ret_value))
-        inp = np.ones([3, 32], dtype='float32')
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            linear = nn.Linear(32, 4, bias_attr=False)
-            dy_ret = linear(t)
-            dy_ret_value = dy_ret.numpy()
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            fc = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            dy_ret2 = fc(t)
-            dy_ret_value2 = dy_ret2.numpy()
-        self.assertTrue(np.array_equal(dy_ret_value, dy_ret_value2))
-    def test_fc(self):
-        inp = np.ones([3, 32, 32], dtype='float32')
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False)
-            ret = layers.fc(t, size=4, bias_attr=False, num_flatten_dims=1)
-            ret2 = layers.fc(ret, size=4)
-            static_ret = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret2])[0]
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False)
-            fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            fc2 = nn.FC('fc2', size=4)
-            ret = fc1(t)
-            ret2 = fc2(ret)
-            static_ret2 = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret2])[0]
-        with self.dynamic_graph():
-            t = base.to_variable(inp)
-            fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
-            fc2 = nn.FC('fc2', size=4)
-            ret = fc1(t)
-            dy_ret = fc2(ret)
-            dy_ret_value = dy_ret.numpy()
-        self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(static_ret, dy_ret_value))
-        with self.dynamic_graph():
-            custom_weight = np.random.randn(1024, 4).astype("float32")
-            weight_attr1 = fluid.ParamAttr(
-                initializer=fluid.initializer.NumpyArrayInitializer(
-                    custom_weight))
-            fc1 = fluid.dygraph.FC("fc1",
-                                   4,
-                                   num_flatten_dims=1,
-                                   param_attr=weight_attr1)
-            out1 = fc1(base.to_variable(inp))
-            loss1 = fluid.layers.reduce_mean(out1)
-            fc1_weight_init = fc1.weight.detach()
-            fc1_bias_init = fc1.bias.detach()
-            loss1.backward()
-            optimizer1 = fluid.optimizer.SGD(learning_rate=0.1,
-                                             parameter_list=fc1.parameters())
-            optimizer1.minimize(loss1)
-            fc1_weight_updated = fc1.weight.detach()
-        with self.dynamic_graph():
-            weight_attr2 = fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform())
-            fc2 = fluid.dygraph.FC("fc2",
-                                   4,
-                                   num_flatten_dims=1,
-                                   param_attr=weight_attr2)
-            out2 = fc2(base.to_variable(inp))
-            self.assertFalse(
-                np.array_equal(fc1_weight_init.numpy(), fc2.weight.numpy()))
-            self.assertFalse(np.array_equal(out1.numpy(), out2.numpy()))
-            mismatched_weight = np.random.randn(4, 4).astype("float32")
-            with self.assertRaises(AssertionError):
-                fc2.weight.set_value(mismatched_weight)
-            fc2.weight.set_value(fc1_weight_init)
-            fc2.bias.set_value(fc1_bias_init)
-            out2 = fc2(base.to_variable(inp))
-            loss2 = fluid.layers.reduce_mean(out2)
-            loss2.backward()
-            optimizer2 = fluid.optimizer.SGD(learning_rate=0.1,
-                                             parameter_list=fc2.parameters())
-            optimizer2.minimize(loss2)
-            self.assertTrue(
-                np.array_equal(fc2.weight.numpy(), fc1_weight_updated.numpy()))
-            self.assertTrue(np.array_equal(out1.numpy(), out2.numpy()))
-            fc2.weight = fc1.weight
-            fc2.bias = fc1.bias
-            self.assertTrue(
-                np.array_equal(fc2.weight.numpy(), fc1.weight.numpy()))
-            self.assertTrue(np.array_equal(fc2.bias.numpy(), fc1.bias.numpy()))
    def test_layer_norm(self):
        inp = np.ones([3, 32, 32], dtype='float32')
        with self.static_graph():