diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index 05269028acc4004b8af000dabd946f9a3d8ce40f..1a488844dec2133d3490cb03a0f610052a14175c 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -895,9 +895,6 @@ class Pool2D(layers.Layer):
 
 class Linear(layers.Layer):
     """
-    :alias_main: paddle.nn.Linear
-	:alias: paddle.nn.Linear,paddle.nn.layer.Linear,paddle.nn.layer.common.Linear
-	:old_api: paddle.fluid.dygraph.Linear
     
     Fully-connected linear transformation layer:
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 7d2ed0cdcf83a4fc5fb78f65c09c5b26d6b1ac23..81c38c0be6557bb6dbc861c70e1a97411c96a20b 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -1406,46 +1406,53 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8):
 def linear(x, weight, bias=None, name=None):
     """
 
-    Fully-connected linear transformation op
+    Fully-connected linear transformation operator. For each input :math:`X` ,
+    the equation is:
 
     .. math::
 
-        Out = {XW + b}
+        Out = XW + b
 
-    where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
+    where :math:`W` is the weight and :math:`b` is the bias.
 
-    The linear op multiplies input tensor with weight matrix and
-    produces an output Tensor of shape [N, *, output_dim], 
-    where N is batch size and `*` means any number of additional dimensions and output_dim is the last dim of ``weight``.
-    If ``bias`` is not None, a bias will be added to the output.
+    If the weight is a 2-D tensor of shape :math:`[in\_features, out\_features]` ,
+    input should be a multi-dimensional tensor of shape
+    :math:`[batch\_size, *, in\_features]` , where :math:`*` means any number of
+    additional dimensions. The linear operator multiplies input tensor with
+    weight and produces an output tensor of shape :math:`[batch\_size, *, out\_features]` , 
+    If :math:`bias` is not None, the bias should be a 1-D tensor of shape
+    :math:`[out\_features]` and will be added to the output.
 
-    Args:
-        x(Tensor): Input tensor, its data type is float16, float32 or float64
-        weight(Tensor): Weight tensor, its data type is float16, float32 or float64
-        bias(Tensor|None, optional): Bias tensor, its data type is float16, float32 or float64. If it is set to None, no bias will be added to the output units.
-        name(str|None, optional): For detailed information, please refer to :ref:`api_guide_Name`. Default: None.
+    Parameters:
+        x (Tensor): Input tensor. The data type should be float16, float32 or float64.
+        weight (Tensor): Weight tensor. The data type should be float16, float32 or float64.
+        bias (Tensor, optional): Bias tensor. The data type should be float16, float32 or float64.
+                                 If it is set to None, no bias will be added to the output units.
+        name (str, optional): Normally there is no need for user to set this parameter.
+                              For detailed information, please refer to :ref:`api_guide_Name` .
 
     Returns:
-        Output tensor
+        Tensor, the shape is :math:`[batch\_size, *, out\_features]` and the
+        data type is the same with input :math:`x` .
 
     Examples:
         .. code-block:: python
           
-          import numpy as np
           import paddle
-          import paddle.nn.functional as F
           
-          input = np.ones((3,1,2), dtype=np.float32)
-          weight = np.ones((2,2), dtype=np.float32)
-          bias = np.ones((2), dtype=np.float32)
-          place = paddle.CPUPlace()
-          paddle.disable_static(place)
-          input = paddle.to_tensor(input)
-          weight = paddle.to_tensor(weight)
-          bias = paddle.to_tensor(bias)
-          out = F.linear(input, weight, bias)
-          print(out) #[3 3 3 3 3 3]
-    
+          x = paddle.randn((3, 2), dtype="float32")
+          # x: [[-0.32342386 -1.200079  ]
+          #     [ 0.7979031  -0.90978354]
+          #     [ 0.40597573  1.8095392 ]]
+          weight = paddle.full(shape=[2, 4], fill_value="0.5", dtype="float32", name="weight")
+          # weight: [[0.5 0.5 0.5 0.5]
+          #          [0.5 0.5 0.5 0.5]]
+          bias = paddle.ones(shape=[4], dtype="float32", name="bias")
+          # bias: [1. 1. 1. 1.]
+          y = paddle.nn.functional.linear(x, weight, bias)
+          # y: [[0.23824859 0.23824859 0.23824859 0.23824859]
+          #     [0.9440598  0.9440598  0.9440598  0.9440598 ]
+          #     [2.1077576  2.1077576  2.1077576  2.1077576 ]]
     """
     if in_dygraph_mode():
         pre_bias = _varbase_creator(dtype=x.dtype)
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index 433443fee1765a3ecd4cf0bbe53a960bbeaefc71..05cbd96863c28574ad9be0edbf0ebbe26fa64483 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -15,7 +15,6 @@
 # TODO: define the common classes to build a neural network
 from ...fluid.dygraph import BilinearTensorProduct  #DEFINE_ALIAS
 from ...fluid.dygraph import Pool2D  #DEFINE_ALIAS
-from ...fluid.dygraph import Linear  #DEFINE_ALIAS
 from ...fluid.dygraph import Flatten  #DEFINE_ALIAS
 from ...fluid.dygraph import layers
 from .. import functional as F
@@ -50,56 +49,74 @@ __all__ = [
 
 class Linear(layers.Layer):
     """
-    
-    Fully-connected linear transformation layer:
+
+    Fully-connected linear transformation layer. For each input :math:`X` ,
+    the equation is:
 
     .. math::
 
-        Out = {XW + b}
+        Out = XW + b
 
-    where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
+    where :math:`W` is the weight and :math:`b` is the bias.
 
-    Linear layer takes only one ``Tensor`` input.
-    The Linear layer multiplies input tensor with weight matrix and
-    produces an output Tensor of shape [N, *, `output_dim`],
-    where N is batch size and `*` means any number of additional dimensions.
-    If ``bias_attr`` is not None, a bias variable will be created and added to the output.
+    Linear layer takes only one multi-dimensional tensor as input with the
+    shape :math:`[batch\_size, *, in\_features]` , where :math:`*` means any
+    number of additional dimensions. It multiplies input tensor with the weight
+    (a 2-D tensor of shape :math:`[in\_features, out\_features]` ) and produces
+    an output tensor of shape :math:`[batch\_size, *, out\_features]` .
+    If :math:`bias\_attr` is not False, the bias (a 1-D tensor of
+    shape :math:`[out\_features]` ) will be created and added to the output.
 
     Parameters:
-        in_features(int): The number of input units in this layer.
-        out_features(int): The number of output units in this layer.
-        weight_attr(ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
-            weights(Parameter) of this layer. Default: None.
-        bias_attr(ParamAttr or list of ParamAttr, optional): The attribute for the bias
-            of this layer. If it is set to False, no bias will be added to the output units.
-            If it is set to None, the bias is initialized zero. Default: None.
-        name(str|None): For detailed information, please refer to :ref:`api_guide_Name`. Default: None.
-
-    Attributes:
-        **weight** (Parameter): the learnable weights of this layer.
+        in_features (int): The number of input units.
+        out_features (int): The number of output units.
+        weight_attr (ParamAttr, optional): The attribute for the learnable
+            weight of this layer. The default value is None and the weight will be
+            initialized to zero. For detailed information, please refer to
+            paddle.ParamAttr.
+        bias_attr (ParamAttr|bool, optional): The attribute for the learnable bias
+            of this layer. If it is set to False, no bias will be added to the output.
+            If it is set to None or one kind of ParamAttr, a bias parameter will
+            be created according to ParamAttr. For detailed information, please refer
+            to paddle.ParamAttr. The default value is None and the bias will be
+            initialized to zero.
+        name (str, optional): Normally there is no need for user to set this parameter.
+            For detailed information, please refer to :ref:`api_guide_Name` .
 
-        **bias** (Parameter or None): the learnable bias of this layer.
+    Attribute:
+        **weight** (Parameter): the learnable weight of this layer.
 
-    Returns:
-        None
+        **bias** (Parameter): the learnable bias of this layer.
+
+    Shape:
+        - input: Multi-dimentional tensor with shape :math:`[batch\_size, *, in\_features]` .
+        - output: Multi-dimentional tensor with shape :math:`[batch\_size, *, out\_features]` .
 
     Examples:
         .. code-block:: python
 
           import paddle
-          from paddle import nn
-          import numpy as np
-
-          data = np.ones((3,1,2), np.float32)
-          place = paddle.CPUPlace()
-          paddle.disable_static(place)
-          data = paddle.to_tensor(data)
-          weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
-          trainable=False, regularizer=None, initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0))
-          bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
-          trainable=False, regularizer=None, initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0))
-          linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
-          res = linear(data)  # [3 3 3 3 3 3]
+
+          # Define the linear layer.
+          weight_attr = paddle.ParamAttr(
+              name="weight",
+              initializer=paddle.nn.initializer.Constant(value=0.5))
+          bias_attr = paddle.ParamAttr(
+              name="bias",
+              initializer=paddle.nn.initializer.Constant(value=1.0))
+          linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr)
+          # linear.weight: [[0.5 0.5 0.5 0.5]
+          #                 [0.5 0.5 0.5 0.5]]
+          # linear.bias: [1. 1. 1. 1.]
+
+          x = paddle.randn((3, 2), dtype="float32")
+          # x: [[-0.32342386 -1.200079  ]
+          #     [ 0.7979031  -0.90978354]
+          #     [ 0.40597573  1.8095392 ]]
+          y = linear(x)
+          # y: [[0.23824859 0.23824859 0.23824859 0.23824859]
+          #     [0.9440598  0.9440598  0.9440598  0.9440598 ]
+          #     [2.1077576  2.1077576  2.1077576  2.1077576 ]]
     """
 
     def __init__(self,