diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py
index 5d4538997358dc610c9b92eaccdbdb8493596740..e9ff925fd8def6ba60d9afcbb82bc63c69d4448f 100644
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -1550,8 +1550,6 @@ def fused_bn_add_act(
 
             import paddle
             import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
 
             paddle.enable_static()
             # required: gpu
@@ -1582,7 +1580,7 @@ def fused_bn_add_act(
                         act=None,
                         data_layout='NHWC')
                     fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn)
-                    prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax')
+                    prediction = paddle.static.nn.fc(x=fused_bn_add_act, size=10, activation='softmax')
                     loss = paddle.nn.functional.cross_entropy(
                         input=prediction, label=y,
                         reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index 117877073adc070281911c9c9ce4af33312ac39d..a17495fde947737cc27250edba12fd4237762c1c 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -94,10 +94,10 @@ def vgg16_bn_drop(input):
     conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
 
     drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
-    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
+    fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None)
     bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
     drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
-    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
+    fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None)
     return fc2
 
 
@@ -124,7 +124,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
         else:
             raise ValueError("%s network is not supported" % net_type)
 
-        logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
+        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
         cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
             logits, label, return_softmax=True
         )
@@ -506,7 +506,9 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase):
                 )
 
                 net = vgg16_bn_drop(image)
-                logits = fluid.layers.fc(input=net, size=10, act="softmax")
+                logits = paddle.static.nn.fc(
+                    x=net, size=10, activation="softmax"
+                )
                 cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
                     logits, label, return_softmax=True
                 )
diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
index 4264a94e9eba6d5054640478dc9fc499d1a215fc..7c6cf13bec5e92ec3774045be0e3a6831e081ca0 100644
--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -107,7 +107,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
         )
         label = fluid.layers.data(name='label', shape=[1], dtype='int64')
         net = resnet_cifar10(images)
-        logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
+        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
         cost = paddle.nn.functional.softmax_with_cross_entropy(
             logits, label, return_softmax=False
         )
@@ -300,7 +300,9 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase):
                         fluid.layers.assign(input=one_var, output=label)
 
                 net = resnet_cifar10(image)
-                logits = fluid.layers.fc(input=net, size=10, act="softmax")
+                logits = paddle.static.nn.fc(
+                    x=net, size=10, activation="softmax"
+                )
 
         block = main_prog.global_block()
         for op in block.ops:
diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
index 4f32eaffc3e52b9d6ab03a57cacb3fc3f2c624af..7af54b7d1573830ebdb244ba520bccf76ad67638 100644
--- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
+++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
@@ -83,9 +83,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py
index fecb1818f47ba263a96dd25a43d175d4ceea07fc..7fee96e6477d40764d8e11467c26083138a2f3ad 100644
--- a/python/paddle/fluid/data_feeder.py
+++ b/python/paddle/fluid/data_feeder.py
@@ -349,7 +349,7 @@ class DataFeeder:
             with fluid.program_guard(main_program, startup_program):
                 data_1 = fluid.data(name='data_1', shape=[None, 2, 2], dtype='float32')
                 data_2 = fluid.data(name='data_2', shape=[None, 1, 3], dtype='float32')
-                out = fluid.layers.fc(input=[data_1, data_2], size=2)
+                out = paddle.static.nn.fc(x=[data_1, data_2], size=2)
                 # ...
             feeder = fluid.DataFeeder([data_1, data_2], place)
 
@@ -584,7 +584,7 @@ class DataFeeder:
                 # a simple network sample
                 data = fluid.data(name='data', shape=[None, 4, 4], dtype='float32')
                 label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
 
                 feeder = fluid.DataFeeder(place=places[0], feed_list=[data, label])
                 reader = feeder.decorate_reader(reader, multi_devices=True, num_places=3, drop_last=True)
diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
index e4f3cc959f314f11ce6dac693ff9ecb1cb68fa3e..e2b79936481b643fb9a3e04a0871de5f519346b0 100644
--- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
+++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
@@ -119,11 +119,11 @@ def model():
     dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, pool_type="sum")
     dnn_out = dnn_pool
     for i, dim in enumerate(dnn_layer_dims[1:]):
-        fc = fluid.layers.fc(
-            input=dnn_out,
+        fc = paddle.static.nn.fc(
+            x=dnn_out,
             size=dim,
-            act="relu",
-            param_attr=fluid.ParamAttr(
+            activation="relu",
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01)
             ),
             name='dnn-fc-%d' % i,
@@ -145,7 +145,7 @@ def model():
 
     merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
-    predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+    predict = paddle.static.nn.fc(x=merge_layer, size=2, activation='softmax')
     acc = paddle.static.accuracy(input=predict, label=label)
     auc_var, batch_auc_var, auc_states = paddle.static.auc(
         input=predict, label=label
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index 849e52d074eefd2c23b3b86fd631bc40a192ffd9..8560d5ac1395a133f991645661d68e8925e2506f 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -150,10 +150,10 @@ class ConstantInitializer(Initializer):
             import paddle.fluid as fluid
             paddle.enable_static()
             x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32")
-            fc = fluid.layers.fc(
-                input=x,
+            fc = paddle.static.nn.fc(
+                x,
                 size=10,
-                param_attr=fluid.initializer.Constant(value=2.0))
+                weight_attr=fluid.initializer.Constant(value=2.0))
 
     """
 
@@ -224,10 +224,12 @@ class UniformInitializer(Initializer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
+            paddle.enable_static()
             x = fluid.data(name='x', shape=[None, 1], dtype='float32')
-            fc = fluid.layers.fc(input=x, size=10,
-                param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5))
+            fc = paddle.static.nn.fc(x, size=10,
+                weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5))
     """
 
     def __init__(
@@ -346,10 +348,12 @@ class NormalInitializer(Initializer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
+            paddle.enable_static()
             x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32")
-            fc = fluid.layers.fc(input=x, size=10,
-                param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
+            fc = paddle.static.nn.fc(x, size=10,
+                weight_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
 
     """
 
@@ -429,10 +433,12 @@ class TruncatedNormalInitializer(Initializer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
+            paddle.enable_static()
             x = fluid.data(name='x', shape=[None, 1], dtype='float32')
-            fc = fluid.layers.fc(input=x, size=10,
-                param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0))
+            fc = paddle.static.nn.fc(x, size=10,
+                weight_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0))
     """
 
     def __init__(self, loc=0.0, scale=1.0, seed=0):
@@ -557,11 +563,13 @@ class XavierInitializer(Initializer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
+            paddle.enable_static()
             queries = fluid.data(name='x', shape=[None,1], dtype='float32')
-            fc = fluid.layers.fc(
-                input=queries, size=10,
-                param_attr=fluid.initializer.Xavier(uniform=False))
+            fc = paddle.static.nn.fc(
+                x=queries, size=10,
+                weight_attr=fluid.initializer.Xavier(uniform=False))
 
     """
 
@@ -732,8 +740,8 @@ class MSRAInitializer(Initializer):
             import paddle.fluid as fluid
             paddle.enable_static()
             x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32")
-            fc = fluid.layers.fc(input=x, size=10,
-                param_attr=fluid.initializer.MSRA(uniform=False))
+            fc = paddle.static.nn.fc(x, size=10,
+                weight_attr=fluid.initializer.MSRA(uniform=False))
 
     """
 
@@ -1044,11 +1052,13 @@ class NumpyArrayInitializer(Initializer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
             import numpy
+            paddle.enable_static()
             x = fluid.data(name="x", shape=[2, 1], dtype='float32')
-            fc = fluid.layers.fc(input=x, size=10,
-                param_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2])))
+            fc = paddle.static.nn.fc(x, size=10,
+                weight_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2])))
     """
 
     def __init__(self, value):
@@ -1282,10 +1292,11 @@ def calculate_gain(nonlinearity, param=None):
 # We short the class name, since users will use the initializer with the package
 # name. The sample code:
 #
+# import paddle
 # import paddle.fluid as fluid
 #
-# hidden = fluid.layers.fc(...,
-#                          param_attr=ParamAttr(fluid.initializer.Xavier()))
+# hidden = paddle.static.nn.fc(...,
+#                          weight_attr=ParamAttr(fluid.initializer.Xavier()))
 #
 # It is no need to add an `Initializer` as the class suffix
 Constant = ConstantInitializer
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 51ea32196a8047a5d1551210f9f0593da5305469..490e9412cb2a76ca8c1535928a26f9227071bd37 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -511,7 +511,7 @@ def save_params(executor, dirname, main_program=None, filename=None):
             image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32')
             label = fluid.data(name='label', shape=[None, 1], dtype='int64')
             feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
-            predict = fluid.layers.fc(input=image, size=10, act='softmax')
+            predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
 
             loss = paddle.nn.functional.cross_entropy(
                 input=predict, label=label,
@@ -750,7 +750,7 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
             label = fluid.data(name='label', shape=[None, 1], dtype='int64')
             feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
 
-            predict = fluid.layers.fc(input=image, size=10, act='softmax')
+            predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
             loss = paddle.nn.functional.cross_entropy(
                 input=predict, label=label,
                 reduction='none', use_softmax=False
@@ -1384,7 +1384,7 @@ def save_inference_model(
             image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32')
             label = fluid.data(name='label', shape=[None, 1], dtype='int64')
             feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
-            predict = fluid.layers.fc(input=image, size=10, act='softmax')
+            predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
 
             loss = paddle.nn.functional.cross_entropy(
                 input=predict, label=label,
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 9e24e18e291f14d2aabfb628fd743dc821b5669c..ec5847d62c09b0144491efd368a14dc69a8a8332 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -353,7 +353,7 @@ class StaticRNN:
                 word = rnn.step_input(x_emb)
                 # create prev memory parameter, batch size comes from word
                 prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                 # use hidden to update prev
                 rnn.update_memory(prev, hidden)
                 # mark hidden as output
@@ -444,7 +444,7 @@ class StaticRNN:
                         word = rnn.step_input(x_emb)
                         # create prev memory parameter, batch size comes from word
                         prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                         # use hidden to update prev
                         rnn.update_memory(prev, hidden)
 
@@ -473,7 +473,7 @@ class StaticRNN:
                         word = rnn.step_input(x_emb)
                         # init memory
                         prev = rnn.memory(init=boot_memory)
-                        hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                         # update hidden with prev
                         rnn.update_memory(prev, hidden)
 
@@ -576,7 +576,7 @@ class StaticRNN:
                         word = rnn.step_input(x_emb)
                         # create prev memory parameter, batch size comes from word
                         prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                         # use hidden to update prev
                         rnn.update_memory(prev, hidden)
 
@@ -629,7 +629,7 @@ class StaticRNN:
                         word = rnn.step_input(x_emb)
                         # create prev memory parameter, batch size comes from word
                         prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                         # use hidden to update prev
                         rnn.update_memory(prev, hidden)
                         rnn.step_output(hidden)
@@ -691,7 +691,7 @@ class StaticRNN:
                         word = rnn.step_input(x_emb)
                         # create prev memory parameter, batch size comes from word
                         prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
-                        hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+                        hidden = paddle.static.nn.fc(x=[word, prev], size=hidden_size, activation='relu')
                         # use hidden to update prev
                         rnn.update_memory(prev, hidden)
                         # mark each step's hidden and word as output
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index c11a541df5326794a72390086442664aee26a142..fa0e4007eaa76ef172522dcda4aec1ad93948a26 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -45,7 +45,6 @@ from .layer_function_generator import (
 from .tensor import concat, assign, fill_constant, zeros
 from . import utils
 from .. import unique_name
-from functools import reduce
 from .. import core
 from ...utils import deprecated
 from ..data_feeder import (
@@ -60,7 +59,6 @@ from collections.abc import Iterable
 
 
 __all__ = [
-    'fc',
     'embedding',
     'autoincreased_step_counter',
 ]
@@ -126,172 +124,6 @@ def _elementwise_op_in_dygraph(
     )
 
 
-def fc(
-    input,
-    size,
-    num_flatten_dims=1,
-    param_attr=None,
-    bias_attr=None,
-    act=None,
-    name=None,
-):
-    r"""
-    :api_attr: Static Graph
-
-    **Fully Connected Layer**
-
-    This operator creates a fully connected layer in the network. It can take
-    a Tensor(or LoDTensor) or a list of Tensor(or LoDTensor) as its inputs(see
-    Args in detail). It creates a variable called weight for each input Tensor,
-    which represents a fully connected weight matrix from each input unit to
-    each output unit. The fully connected layer multiplies each input Tensor
-    with its corresponding weight to produce an output Tensor with shape :math:`[M, size]` ,
-    where M is batch size. If a list of Tensor is given, the results of
-    multiple output Tensors with shape :math:`[M, size]` will be summed up. If :attr:`bias_attr`
-    is not None, a bias variable will be created and added to the output.
-    Finally, if :attr:`act` is not None, it will be applied to the output as well.
-
-    When the input is a single Tensor(or LoDTensor):
-
-    .. math::
-
-        Out = Act({XW + b})
-
-    When the input is a list of Tensor(or LoDTensor):
-
-    .. math::
-
-        Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
-
-    In the above equation:
-
-    * :math:`N`: Number of the input. N equals to len(input) if input is list of Variable.
-    * :math:`X_i`: The i-th input tensor.
-    * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
-    * :math:`b`: The bias parameter created by this layer (if needed).
-    * :math:`Act`: The activation function.
-    * :math:`Out`: The output Tensor.
-
-    .. code-block:: text
-
-        Case 1:
-        Given a single Tensor data_1, and num_flatten_dims = 2:
-            data_1.data = [[[0.1, 0.2],
-                            [0.3, 0.4]]]
-            data_1.shape = (1, 2, 2) # 1 is batch_size
-
-            out = fluid.layers.fc(input=data_1, size=1, num_flatten_dims=2)
-
-        Then output is:
-            out.data = [[0.83234344], [0.34936576]]
-            out.shape = (1, 2, 1)
-
-        Case 2:
-        Given a list of Tensor:
-            data_1.data = [[[0.1, 0.2],
-                           [0.3, 0.4]]]
-            data_1.shape = (1, 2, 2) # 1 is batch_size
-
-            data_2 = [[[0.1, 0.2, 0.3]]]
-            data_2.shape = (1, 1, 3)
-
-            out = fluid.layers.fc(input=[data_1, data_2], size=2)
-
-        Then:
-            out.data = [[0.18669507, 0.1893476]]
-            out.shape = (1, 2)
-
-    Args:
-        input (Variable|list of Variable): A Tensor(or LoDTensor) with shape :math:`[N_1, N_2,..., N_k]` or
-            a list of Tensor(or LoDTensor). The dimensions of the input Tensor is at least 2 and the data
-            type should be float32 or float64.
-        size(int): The number of output units in this layer, which also means the feature size of output
-            Tensor(or LoDTensor).
-        num_flatten_dims (int): The fc layer can accept an input Tensor with more than
-            two dimensions. If this happens, the multidimensional tensor will first be flattened
-            into a 2-D matrix. The parameter :attr:`num_flatten_dims` determines how the input
-            Tensor is flattened: the first :attr:`num_flatten_dims` (inclusive, index starts from 1)
-            dimensions will be flatten to form the first dimension of the final matrix (height of
-            the matrix), and the rest :math:`rank(X) - num\_flatten\_dims` dimensions are flattened to
-            form the second dimension of the final matrix (width of the matrix). For example, assuming that
-            X is a 5-dimensional Tensor with a shape [2, 3, 4, 5, 6], and :attr:`num_flatten_dims` = 3.
-            Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1.
-        param_attr (ParamAttr): To specify the weight parameter property. Default: None, which means the
-            default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` .
-        bias_attr (ParamAttr): To specify the bias parameter property. Default: None, which means the
-            default bias parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` .
-        act (str): Activation to be applied to the output of this layer, such as tanh, softmax,
-            sigmoid, relu. For more information, please refer to :ref:`api_guide_activations_en` . Default: None.
-        name (str, optional): The default value is None.  Normally there is no need for user to set this property.
-            For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-        Variable: Tensor or LoDTensor calculated by fc layer. The data type is same with input.
-
-    Raises:
-        ValueError: If dimensions of the input Tensor is less than 2.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import paddle
-          paddle.enable_static()
-          # when input is single tensor
-          data = fluid.data(name="data", shape=[-1, 32], dtype="float32")
-          fc = fluid.layers.fc(input=data, size=1000, act="tanh")
-
-          # when input are multiple tensors
-          data_1 = fluid.data(name="data_1", shape=[-1, 32], dtype="float32")
-          data_2 = fluid.data(name="data_2", shape=[-1, 36], dtype="float32")
-          fc = fluid.layers.fc(input=[data_1, data_2], size=1000, act="tanh")
-    """
-    helper = LayerHelper("fc", **locals())
-    check_type(input, 'input', (list, tuple, Variable), 'fc')
-    if isinstance(input, (list, tuple)):
-        for i, input_x in enumerate(input):
-            check_type(input_x, 'input[' + str(i) + ']', Variable, 'fc')
-    dtype = helper.input_dtype()
-    check_dtype(
-        dtype, 'input', ['float16', 'uint16', 'float32', 'float64'], 'fc'
-    )
-    mul_results = []
-    for input_var, param_attr in helper.iter_inputs_and_params():
-        input_shape = input_var.shape
-        if num_flatten_dims == -1:
-            num_flatten_dims = len(input_shape) - 1
-        param_shape = [
-            reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
-        ] + [size]
-
-        w = helper.create_parameter(
-            attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False
-        )
-        tmp = helper.create_variable_for_type_inference(dtype)
-        helper.append_op(
-            type="mul",
-            inputs={"X": input_var, "Y": w},
-            outputs={"Out": tmp},
-            attrs={"x_num_col_dims": num_flatten_dims, "y_num_col_dims": 1},
-        )
-        mul_results.append(tmp)
-
-    if len(mul_results) == 1:
-        pre_bias = mul_results[0]
-    else:
-        pre_bias = helper.create_variable_for_type_inference(dtype)
-        helper.append_op(
-            type="sum",
-            inputs={"X": mul_results},
-            outputs={"Out": pre_bias},
-            attrs={"use_mkldnn": False},
-        )
-    # add bias
-    pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims)
-    # add activation
-    return helper.append_activation(pre_activation)
-
-
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.embedding")
 def embedding(
     input,
diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py
index 92efd254300c1def09cbb5b2eb8237055cc4fa0d..e6b68495cf76caedd5ed05e858c070f13b964799 100644
--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
@@ -554,9 +554,13 @@ def scaled_dot_product_attention(
         if num_heads == 1:
             return queries, keys, values
 
-        q = layers.fc(input=queries, size=queries.shape[-1], num_flatten_dims=2)
-        k = layers.fc(input=keys, size=keys.shape[-1], num_flatten_dims=2)
-        v = layers.fc(input=values, size=values.shape[-1], num_flatten_dims=2)
+        q = paddle.static.nn.fc(
+            x=queries, size=queries.shape[-1], num_flatten_dims=2
+        )
+        k = paddle.static.nn.fc(x=keys, size=keys.shape[-1], num_flatten_dims=2)
+        v = paddle.static.nn.fc(
+            x=values, size=values.shape[-1], num_flatten_dims=2
+        )
         return q, k, v
 
     def __split_heads(x, num_heads):
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index d130e0c961de51f232fa9ef54e3d39b9192ac7a3..3b251d5be3e5e07acda5fd018b7d209388d84d17 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -1433,7 +1433,7 @@ class SGDOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -1625,7 +1625,7 @@ class MomentumOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -1774,7 +1774,7 @@ class LarsMomentumOptimizer(Optimizer):
             np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
             inp = fluid.layers.data(
                 name="inp", shape=[2, 2], append_batch_size=False)
-            out = fluid.layers.fc(inp, size=3)
+            out = paddle.static.nn.fc(inp, size=3)
             out = paddle.sum(out)
             optimizer = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
             optimizer.minimize(out)
@@ -2033,7 +2033,7 @@ class AdagradOptimizer(Optimizer):
             paddle.enable_static()
             np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
             inp = fluid.data(name="inp", shape=[2, 2])
-            out = fluid.layers.fc(inp, size=3)
+            out = paddle.static.nn.fc(inp, size=3)
             out = paddle.sum(out)
             optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.2)
             optimizer.minimize(out)
@@ -2191,7 +2191,7 @@ class AdamOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.data(name='x', shape=[None, 13], dtype='float32')
                 y = fluid.data(name='y', shape=[None, 1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -2220,7 +2220,7 @@ class AdamOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.data(name='x', shape=[None, 13], dtype='float32')
                 y = fluid.data(name='y', shape=[None, 1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -2613,7 +2613,7 @@ class AdamaxOptimizer(Optimizer):
           startup_program = fluid.Program()
           with fluid.program_guard(train_program, startup_program):
               data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-              hidden = fluid.layers.fc(input=data, size=10)
+              hidden = paddle.static.nn.fc(x=data, size=10)
               loss = paddle.mean(hidden)
               adam = fluid.optimizer.AdamaxOptimizer(learning_rate=0.2)
               adam.minimize(loss)
@@ -2765,7 +2765,7 @@ class DpsgdOptimizer(Optimizer):
           startup_program = fluid.Program()
           with fluid.program_guard(train_program, startup_program):
               data = fluid.layers.data(name='X', shape=[1], dtype='float32')
-              hidden = fluid.layers.fc(input=data, size=10)
+              hidden = paddle.static.nn.fc(x=data, size=10)
               loss = paddle.mean(hidden)
               optimizer = fluid.optimizer.Dpsgd(learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0)
               optimizer.minimize(loss)
@@ -2909,11 +2909,13 @@ class DecayedAdagradOptimizer(Optimizer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
 
-            x = fluid.data( name='x', shape=[None, 10], dtype='float32' )
-            trans = fluid.layers.fc( x, 100 )
-            cost = fluid.layers.reduce_mean( trans )
+            paddle.enable_static()
+            x = fluid.data(name='x', shape=[None, 10], dtype='float32')
+            trans = paddle.static.nn.fc(x, 100)
+            cost = paddle.mean(trans)
             optimizer = fluid.optimizer.DecayedAdagradOptimizer(learning_rate=0.2)
             optimizer.minimize(cost)
     """
@@ -3031,11 +3033,13 @@ class AdadeltaOptimizer(Optimizer):
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
 
+            paddle.enable_static()
             image = fluid.data(name='image', shape=[None, 28], dtype='float32')
-            fc = fluid.layers.fc(image, size=10)
-            cost = fluid.layers.reduce_mean(fc)
+            fc = paddle.static.nn.fc(image, size=10)
+            cost = paddle.mean(fc)
             optimizer = fluid.optimizer.Adadelta(
                 learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
 
@@ -3215,7 +3219,7 @@ class RMSPropOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -3413,7 +3417,7 @@ class FtrlOptimizer(Optimizer):
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
 
@@ -3589,7 +3593,7 @@ class LambOptimizer(AdamOptimizer):
             paddle.enable_static()
 
             data = fluid.data(name='x', shape=[-1, 5], dtype='float32')
-            hidden = fluid.layers.fc(input=data, size=10)
+            hidden = paddle.static.nn.fc(x=data, size=10)
             cost = paddle.mean(hidden)
 
             def exclude_fn(param):
@@ -3806,7 +3810,7 @@ class ModelAverage(Optimizer):
         with fluid.program_guard(train_program, startup_program):
             # build net
             data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-            hidden = fluid.layers.fc(input=data, size=10)
+            hidden = paddle.static.nn.fc(x=data, size=10)
             loss = paddle.mean(hidden)
             optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1)
             optimizer.minimize(loss)
@@ -3985,7 +3989,7 @@ class ModelAverage(Optimizer):
             with fluid.program_guard(train_program, startup_program):
                 # build net
                 data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 loss = paddle.mean(hidden)
                 optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1)
                 optimizer.minimize(loss)
@@ -4041,7 +4045,7 @@ class ModelAverage(Optimizer):
             with fluid.program_guard(train_program, startup_program):
                 # build net
                 data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 loss = paddle.mean(hidden)
                 optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1)
                 optimizer.minimize(loss)
@@ -4347,9 +4351,11 @@ class PipelineOptimizer:
     Examples:
         .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
             import paddle.fluid.layers as layers
 
+            paddle.enable_static()
             with fluid.device_guard("gpu:0"):
                 x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0)
                 y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0)
@@ -4364,8 +4370,8 @@ class PipelineOptimizer:
 
             with fluid.device_guard("gpu:1"):
                 concat = layers.concat([emb_x, emb_y], axis=1)
-                fc = layers.fc(input=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False)
-                loss = layers.reduce_mean(fc)
+                fc = paddle.static.nn.fc(x=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False)
+                loss = paddle.mean(fc)
             optimizer = fluid.optimizer.SGD(learning_rate=0.5)
             optimizer = fluid.optimizer.PipelineOptimizer(optimizer)
             optimizer.minimize(loss)
@@ -6318,8 +6324,8 @@ class RecomputeOptimizer(Optimizer):
                 "y": np.random.randint(2, size=(32, 1)).astype('int64')}
             def mlp(input_x, input_y, hid_dim=128, label_dim=2):
                 print(input_x)
-                fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-                prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+                fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+                prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                 cost = paddle.nn.functional.cross_entropy(
                     input=prediction, label=input_y,
                     reduction='none', use_softmax=False
@@ -6395,8 +6401,8 @@ class RecomputeOptimizer(Optimizer):
 
                 paddle.enable_static()
                 def mlp(input_x, input_y, hid_dim=128, label_dim=2):
-                    fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-                    prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+                    fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+                    prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                     cost = paddle.nn.functional.cross_entropy(
                         input=prediction, label=input_y,
                         reduction='none', use_softmax=False
@@ -6442,8 +6448,8 @@ class RecomputeOptimizer(Optimizer):
                 paddle.enable_static()
 
                 def mlp(input_x, input_y, hid_dim=128, label_dim=2):
-                    fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-                    prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+                    fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+                    prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                     cost = paddle.nn.functional.cross_entropy(
                         input=prediction, label=input_y,
                         reduction='none', use_softmax=False
@@ -6936,8 +6942,8 @@ class RecomputeOptimizer(Optimizer):
                 paddle.enable_static()
 
                 def mlp(input_x, input_y, hid_dim=128, label_dim=2):
-                    fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-                    prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+                    fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+                    prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                     cost = paddle.nn.functional.cross_entropy(
                         input=prediction, label=input_y,
                         reduction='none', use_softmax=False
@@ -7018,8 +7024,8 @@ class RecomputeOptimizer(Optimizer):
                 paddle.enable_static()
 
                 def mlp(input_x, input_y, hid_dim=128, label_dim=2):
-                    fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-                    prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+                    fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+                    prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                     cost = paddle.nn.functional.cross_entropy(
                         input=prediction, label=input_y,
                         reduction='none', use_softmax=False
@@ -7116,7 +7122,7 @@ class LookaheadOptimizer:
 
             x = fluid.layers.data(name='x', shape=[2], dtype='float32')
             label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-            y = fluid.layers.fc(input=[x], size=2, act="softmax")
+            y = paddle.static.nn.fc(x=[x], size=2, activation="softmax")
             loss = paddle.nn.functional.cross_entropy(
                 input=y, label=label,
                 reduction='none', use_softmax=False
@@ -7296,8 +7302,8 @@ class GradientMergeOptimizer:
                     "y": np.random.random(size=(batch_size, 1)).astype('int64')}
 
         def mlp(input_x, input_y, hid_dim=128, label_dim=2):
-            fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
-            prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
+            prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction, label=input_y,
                 reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py
index 82dee029f523ed608f9497a92e5977b5462fa81d..e6cabbdde92683eaaa19210bbad5f373bf420b24 100644
--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -1691,7 +1691,7 @@ class PyReader(DataLoaderBase):
 
            def network(image, label):
                # User-defined network, here is an example of softmax regression.
-               predict = fluid.layers.fc(input=image, size=10, act='softmax')
+               predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
                return paddle.nn.functional.cross_entropy(
                     input=predict, label=label,
                     reduction='none', use_softmax=False
@@ -1750,7 +1750,7 @@ class PyReader(DataLoaderBase):
 
            def network(image, label):
                # User-defined network, here is an example of softmax regression.
-               predict = fluid.layers.fc(input=image, size=10, act='softmax')
+               predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
                return paddle.nn.functional.cross_entropy(
                    input=predict, label=label,
                    reduction='none', use_softmax=False
@@ -1938,6 +1938,7 @@ class PyReader(DataLoaderBase):
         Example:
             .. code-block:: python
 
+                import paddle
                 import paddle.fluid as fluid
                 import numpy as np
 
@@ -1947,7 +1948,7 @@ class PyReader(DataLoaderBase):
 
                 def network(image, label):
                     # User-defined network, here is an example of softmax regression.
-                    predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                    predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
                     return paddle.nn.functional.cross_entropy(
                         input=predict, label=label,
                         reduction='none', use_softmax=False
@@ -2014,7 +2015,7 @@ class PyReader(DataLoaderBase):
 
                 def network(image, label):
                     # User-defined network, here is an example of softmax regression.
-                    predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                    predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
                     return paddle.nn.functional.cross_entropy(
                         input=predict, label=label,
                         reduction='none', use_softmax=False
@@ -2080,7 +2081,7 @@ class PyReader(DataLoaderBase):
 
                 def network(image, label):
                     # User-defined network, here is an example of softmax regression.
-                    predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                    predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
                     return paddle.nn.functional.cross_entropy(
                         input=predict, label=label,
                         reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py
index 900ba20cf6884bba77bc83155d3ea65a3b4e1495..36e648e3e38edc6d733f7d9952519a6189db104b 100644
--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -76,8 +76,8 @@ class L2DecayRegularizer(WeightDecayRegularizer):
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
                 label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-                hidden = fluid.layers.fc(input=data, size=128, act='relu')
-                prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+                hidden = paddle.static.nn.fc(x=data, size=128, activation='relu')
+                prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
                 loss = paddle.nn.functional.cross_entropy(
                     input=prediction, label=label,
                     reduction='none', use_softmax=False
@@ -101,9 +101,9 @@ class L2DecayRegularizer(WeightDecayRegularizer):
 
             # set L1 regularization in fluid.ParamAttr
             w_param = fluid.ParamAttr(regularizer=l1)
-            hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
-            hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)   # fc_1.w_0(L1), fc_1.b_0
-            predict = fluid.layers.fc(hidden2, 32)    # fc_3.w_0, fc_3.b_0
+            hidden1 = paddle.static.nn.fc(x, 8, weight_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
+            hidden2 = paddle.static.nn.fc(hidden1, 16, weight_attr=w_param)   # fc_1.w_0(L1), fc_1.b_0
+            predict = paddle.static.nn.fc(hidden2, 32)    # fc_3.w_0, fc_3.b_0
             avg_loss = paddle.mean(predict)
 
             # set L2 regularization in optimizer
@@ -195,8 +195,8 @@ class L1DecayRegularizer(WeightDecayRegularizer):
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
                 label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-                hidden = fluid.layers.fc(input=data, size=128, act='relu')
-                prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+                hidden = paddle.static.nn.fc(x=data, size=128, activation='relu')
+                prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
                 loss = paddle.nn.functional.cross_entropy(
                     input=prediction, label=label,
                     reduction='none', use_softmax=False
@@ -219,9 +219,9 @@ class L1DecayRegularizer(WeightDecayRegularizer):
 
             # set L1 regularization in fluid.ParamAttr
             w_param = fluid.ParamAttr(regularizer=l1)
-            hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
-            hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)  # fc_1.w_0(L1), fc_1.b_0
-            predict = fluid.layers.fc(hidden2, 32)   # fc_3.w_0, fc_3.b_0
+            hidden1 = paddle.static.nn.fc(x, 8, weight_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
+            hidden2 = paddle.static.nn.fc(hidden1, 16, weight_attr=w_param)  # fc_1.w_0(L1), fc_1.b_0
+            predict = paddle.static.nn.fc(hidden2, 32)   # fc_3.w_0, fc_3.b_0
             avg_loss = paddle.mean(predict)
 
             # set L2 regularization in optimizer
@@ -289,10 +289,11 @@ class L1DecayRegularizer(WeightDecayRegularizer):
 # We short the class name, since users will use the regulaizer with the package
 # name. The sample code:
 #
+# import paddle
 # import paddle.fluid as fluid
 #
-# hidden = fluid.layers.fc(...,
-#                          param_attr=fluid.regularizer.Xavier())
+# hidden = paddle.static.nn.fc(...,
+#                          weight_attr=fluid.regularizer.Xavier())
 #
 # It is no need to add a `Regularizer` as the class suffix
 L1Decay = L1DecayRegularizer
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index b204fde04b291e09c8f8c0f38aeb67b6f80907f0..c406fae5f811c8edf9dc45209f2e576663e35832 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -44,8 +44,8 @@ def convolution_net(
         act="tanh",
         pool_type="sqrt",
     )
-    prediction = fluid.layers.fc(
-        input=[conv_3, conv_4], size=class_dim, act="softmax"
+    prediction = paddle.static.nn.fc(
+        x=[conv_3, conv_4], size=class_dim, activation="softmax"
     )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index 558ce9febe4e5c4c1063fd7a0ce6d6b3a7bff6f6..b6efc9775efa4572afd1706441ee085345b74aef 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -55,20 +55,20 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
     if use_bf16:
         if not pure_bf16:
             with amp.bf16.bf16_guard():
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x=x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
             avg_cost = paddle.mean(cost)
         else:
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x=x, size=1, activation=None)
             with amp.bf16.bf16_guard():
                 cost = paddle.nn.functional.square_error_cost(
                     input=y_predict, label=y
                 )
                 avg_cost = paddle.mean(cost)
     else:
-        y_predict = fluid.layers.fc(input=x, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x=x, size=1, activation=None)
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index f20beb731c35bd63211495861ca6b0fcaca93726..f1fa47afb809ab3e247cf15d7d2d9379b2185340 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -93,10 +93,10 @@ def vgg16_bn_drop(input):
     conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
 
     drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
-    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
+    fc1 = paddle.static.nn.fc(x=drop, size=4096)
     bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
     drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
-    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
+    fc2 = paddle.static.nn.fc(x=drop2, size=4096)
     return fc2
 
 
@@ -116,7 +116,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
     else:
         raise ValueError("%s network is not supported" % net_type)
 
-    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
+    predict = paddle.static.nn.fc(x=net, size=classdim, activation='softmax')
     cost = paddle.nn.functional.cross_entropy(
         input=predict, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py
index b799dc74679bd862d88337d79be647575e3dad51..85d946e2185846b44fcd852842343eab04020680 100644
--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -29,7 +29,7 @@ BATCH_SIZE = 64
 
 
 def loss_net(hidden, label):
-    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -39,8 +39,8 @@ def loss_net(hidden, label):
 
 
 def mlp(img, label):
-    hidden = fluid.layers.fc(input=img, size=200, act='tanh')
-    hidden = fluid.layers.fc(input=hidden, size=200, act='tanh')
+    hidden = paddle.static.nn.fc(x=img, size=200, activation='tanh')
+    hidden = paddle.static.nn.fc(x=hidden, size=200, activation='tanh')
     return loss_net(hidden, label)
 
 
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index 89da6135a806d3f85e46607d1d0c13bf570aa57e..aad9e7ce01c2c0dc654fcd7cfc09d3521c4aa1bb 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -50,7 +50,7 @@ def get_usr_combined_features():
         is_sparse=IS_SPARSE,
     )
 
-    usr_fc = layers.fc(input=usr_emb, size=32)
+    usr_fc = paddle.static.nn.fc(x=usr_emb, size=32)
 
     USR_GENDER_DICT_SIZE = 2
 
@@ -63,7 +63,7 @@ def get_usr_combined_features():
         is_sparse=IS_SPARSE,
     )
 
-    usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
+    usr_gender_fc = paddle.static.nn.fc(x=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
     usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
@@ -75,7 +75,7 @@ def get_usr_combined_features():
         param_attr='age_table',
     )
 
-    usr_age_fc = layers.fc(input=usr_age_emb, size=16)
+    usr_age_fc = paddle.static.nn.fc(x=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
     usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
@@ -87,13 +87,15 @@ def get_usr_combined_features():
         is_sparse=IS_SPARSE,
     )
 
-    usr_job_fc = layers.fc(input=usr_job_emb, size=16)
+    usr_job_fc = paddle.static.nn.fc(x=usr_job_emb, size=16)
 
     concat_embed = layers.concat(
         input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1
     )
 
-    usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
+    usr_combined_features = paddle.static.nn.fc(
+        x=concat_embed, size=200, activation="tanh"
+    )
 
     return usr_combined_features
 
@@ -112,7 +114,7 @@ def get_mov_combined_features():
         is_sparse=IS_SPARSE,
     )
 
-    mov_fc = layers.fc(input=mov_emb, size=32)
+    mov_fc = paddle.static.nn.fc(x=mov_emb, size=32)
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
@@ -151,7 +153,9 @@ def get_mov_combined_features():
     )
 
     # FIXME(dzh) : need tanh operator
-    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
+    mov_combined_features = paddle.static.nn.fc(
+        x=concat_embed, size=200, activation="tanh"
+    )
 
     return mov_combined_features
 
diff --git a/python/paddle/fluid/tests/book/test_word2vec_book.py b/python/paddle/fluid/tests/book/test_word2vec_book.py
index 640a33751a4740969e77bcd4577bc4faa498aa73..e932394e8cc0952f417d2d155c170c9d0d3a01d2 100644
--- a/python/paddle/fluid/tests/book/test_word2vec_book.py
+++ b/python/paddle/fluid/tests/book/test_word2vec_book.py
@@ -90,11 +90,11 @@ def train(
         concat_embed = fluid.layers.concat(
             input=[embed_first, embed_second, embed_third, embed_forth], axis=1
         )
-        hidden1 = fluid.layers.fc(
-            input=concat_embed, size=HIDDEN_SIZE, act='sigmoid'
+        hidden1 = paddle.static.nn.fc(
+            x=concat_embed, size=HIDDEN_SIZE, activation='sigmoid'
         )
-        predict_word = fluid.layers.fc(
-            input=hidden1, size=dict_size, act='softmax'
+        predict_word = paddle.static.nn.fc(
+            x=hidden1, size=dict_size, activation='softmax'
         )
         cost = paddle.nn.functional.cross_entropy(
             input=predict_word,
diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py
index 65483d1c6adf68dba55e43180e9993d712193811..9dc0771a39fb868792f17dd5497ee7031804147e 100644
--- a/python/paddle/fluid/tests/test_error_clip.py
+++ b/python/paddle/fluid/tests/test_error_clip.py
@@ -25,9 +25,9 @@ prog = fluid.framework.Program()
 with fluid.program_guard(main_program=prog):
     image = fluid.layers.data(name='x', shape=[784], dtype='float32')
 
-    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
-    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
-    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+    hidden1 = paddle.static.nn.fc(x=image, size=128, activation='relu')
+    hidden2 = paddle.static.nn.fc(x=hidden1, size=64, activation='relu')
+    predict = paddle.static.nn.fc(x=hidden2, size=10, activation='softmax')
 
     label = fluid.layers.data(name='y', shape=[1], dtype='int64')
 
diff --git a/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py b/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
index b51f963e9a7b0e3a943454b2ee4440eb954696a4..50fb039974cd0f6eb321adf4039bfb8aac9454fc 100644
--- a/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
+++ b/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
@@ -38,8 +38,10 @@ class TestASPHelperPruningBase(unittest.TestCase):
             hidden = paddle.static.nn.conv2d(
                 input=img, num_filters=4, filter_size=3, padding=2, act="relu"
             )
-            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')
-            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+            hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu')
+            prediction = paddle.static.nn.fc(
+                x=hidden, size=10, activation='softmax'
+            )
             return img, label, prediction
 
         with fluid.program_guard(self.main_program, self.startup_program):
diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py b/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py
index dc89178a53c67fa8700af32f388cd403964f81e5..23c18c34c9d3efa721b3bc0c7cb9d4b5aa5604d8 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py
@@ -205,14 +205,16 @@ class TestASPStaticCustomerizedPruneFunc(unittest.TestCase):
             hidden = paddle.static.nn.conv2d(
                 input=img, num_filters=4, filter_size=3, padding=2, act="relu"
             )
-            hidden = fluid.layers.fc(
-                input=hidden, size=32, act='relu', name=self.customer_prefix
+            hidden = paddle.static.nn.fc(
+                x=hidden, size=32, activation='relu', name=self.customer_prefix
             )
-            hidden = fluid.layers.fc(
-                input=hidden, size=32, act='relu', name=self.customer_prefix
+            hidden = paddle.static.nn.fc(
+                x=hidden, size=32, activation='relu', name=self.customer_prefix
+            )
+            hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu')
+            prediction = paddle.static.nn.fc(
+                x=hidden, size=10, activation='softmax'
             )
-            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')
-            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
             return img, label, prediction
 
         with fluid.program_guard(self.main_program, self.startup_program):
diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py
index 9a743f74190b29b955db5f29addbf412a872c956..81d6bd86ee466bf411677a1f4801c67fcbbeba7a 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py
@@ -38,8 +38,10 @@ class TestASPStaticOptimize(unittest.TestCase):
             hidden = paddle.static.nn.conv2d(
                 input=img, num_filters=4, filter_size=3, padding=2, act="relu"
             )
-            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')
-            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+            hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu')
+            prediction = paddle.static.nn.fc(
+                x=hidden, size=10, activation='softmax'
+            )
             return img, label, prediction
 
         with fluid.program_guard(self.main_program, self.startup_program):
diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py
index 364f5d915892cd279a5ad5fc86dcddc4d936be74..cf011874ea89730029145e0a371f16e9b23f9d99 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py
@@ -38,9 +38,13 @@ class TestASPStaticPruningBase(unittest.TestCase):
             hidden = paddle.static.nn.conv2d(
                 input=img, num_filters=2, filter_size=3, padding=2, act="relu"
             )
-            hidden = fluid.layers.fc(input=hidden, size=32, act='softmax')
-            hidden = fluid.layers.fc(input=hidden, size=3, act='softmax')
-            prediction = fluid.layers.fc(input=hidden, size=3, act='softmax')
+            hidden = paddle.static.nn.fc(
+                x=hidden, size=32, activation='softmax'
+            )
+            hidden = paddle.static.nn.fc(x=hidden, size=3, activation='softmax')
+            prediction = paddle.static.nn.fc(
+                x=hidden, size=3, activation='softmax'
+            )
             return img, label, prediction
 
         with fluid.program_guard(self.main_program, self.startup_program):
diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py b/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py
index 89ded3d73a453262f7ef4cc69f0871d9dbf01ce8..b4876bdce53478e21f2e94e72dfd0b24742648d1 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py
@@ -135,8 +135,10 @@ class TestASPStaticOptimize(unittest.TestCase):
             hidden = paddle.static.nn.conv2d(
                 input=img, num_filters=4, filter_size=3, padding=2, act="relu"
             )
-            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')
-            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+            hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu')
+            prediction = paddle.static.nn.fc(
+                x=hidden, size=10, activation='softmax'
+            )
             return img, label, prediction
 
         with fluid.program_guard(self.main_program, self.startup_program):
diff --git a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py
index 9c8fc3be0e9c5c9f44e56f75db0ef141487f3275..147bd9128bf30aab7886ea41a2cbbb08de6dc9ca 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py
@@ -55,11 +55,13 @@ class TestFleetWithASPSharding(unittest.TestCase):
             )
             input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-            fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            fc_3 = fluid.layers.fc(input=fc_2, size=64, act='tanh')
-            fc_4 = fluid.layers.fc(input=fc_3, size=64, act='tanh')
-            prediction = fluid.layers.fc(input=fc_4, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            fc_3 = paddle.static.nn.fc(x=fc_2, size=64, activation='tanh')
+            fc_4 = paddle.static.nn.fc(x=fc_3, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=fc_4, size=2, activation='softmax'
+            )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
                 label=input_y,
diff --git a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py
index e1121cfcfcea72d1a1f1963eec36036ae7211c80..54c6d50e3981877fd91d0e33cbc90b33fd19e2dd 100644
--- a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py
@@ -47,8 +47,10 @@ class TestFleetWithASPStatic(unittest.TestCase):
             )
             input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-            fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=fc_1, size=2, activation='softmax'
+            )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
                 label=input_y,
@@ -121,8 +123,10 @@ class TestFleetWithASPAMPStatic(unittest.TestCase):
             )
             input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-            fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=fc_1, size=2, activation='softmax'
+            )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
                 label=input_y,
diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
index cb6f8a0a29f98359a67644d7f6e88776af6420d7..a2ce55c32b09ac1dd7967c5402bb90f97438c79b 100644
--- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
+++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
@@ -68,7 +68,7 @@ class AutoCheckpointBase(unittest.TestCase):
             image = fluid.data(name='image', shape=[-1, 4, 4], dtype='float32')
             label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
-            fc_tmp = fluid.layers.fc(image, size=CLASS_NUM)
+            fc_tmp = paddle.static.nn.fc(image, size=CLASS_NUM)
             cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
                 fc_tmp, label
             )
diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
index e9812d11ba7631ca2904664e583a0d5ef2d24e21..551a9eed8baa8f4795801a01c70e8363c2e9830e 100644
--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -60,9 +60,9 @@ def net():
     hidden = x
 
     for i in range(2):
-        hidden = fluid.layers.fc(input=hidden, size=400, act="sigmoid")
+        hidden = paddle.static.nn.fc(x=hidden, size=400, activation="sigmoid")
 
-    hidden = fluid.layers.fc(input=hidden, size=3, act=None)
+    hidden = paddle.static.nn.fc(x=hidden, size=3)
     cost, y_predict = paddle.nn.functional.softmax_with_cross_entropy(
         hidden, y, return_softmax=True
     )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
index f97faed1d584fce94d8715323e525fea7ac57d49..c33e5bc27a0cae34f75c223aa305c8a612041a66 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
@@ -60,20 +60,20 @@ def cnn_model(data):
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
     with fluid.device_guard("gpu:1"):
-        predict = fluid.layers.fc(
-            input=conv_pool_2,
+        predict = paddle.static.nn.fc(
+            x=conv_pool_2,
             size=SIZE,
-            act="softmax",
-            param_attr=fluid.param_attr.ParamAttr(
+            activation="softmax",
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01)
             ),
         )
         # To cover @RENAMED@GRADIENT
-        predict2 = fluid.layers.fc(
-            input=conv_pool_1,
+        predict2 = paddle.static.nn.fc(
+            x=conv_pool_1,
             size=SIZE,
-            act="softmax",
-            param_attr=fluid.param_attr.ParamAttr(
+            activation="softmax",
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01)
             ),
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
index 170243fc962839f063a0aafc39adef62fc0d4737..905df8cd6b71f82337c8a50220ab20d66bc8b74a 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
@@ -60,20 +60,20 @@ def cnn_model(data):
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
     with fluid.device_guard("gpu:1"):
-        predict = fluid.layers.fc(
-            input=conv_pool_2,
+        predict = paddle.static.nn.fc(
+            x=conv_pool_2,
             size=SIZE,
-            act="softmax",
-            param_attr=fluid.param_attr.ParamAttr(
+            activation="softmax",
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01)
             ),
         )
         # To cover @RENAMED@GRADIENT
-        predict2 = fluid.layers.fc(
-            input=conv_pool_1,
+        predict2 = paddle.static.nn.fc(
+            x=conv_pool_1,
             size=SIZE,
-            act="softmax",
-            param_attr=fluid.param_attr.ParamAttr(
+            activation="softmax",
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01)
             ),
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
index a6971e210af0cf994ace8f1dab79c1e8f4870ac7..dbe50789d687c1c92003ee7ac750bbd2cb455a78 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
@@ -59,11 +59,11 @@ def cnn_model(data):
     param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py
index 4adde634b1c4a331c6f2383ee38b1bf5015825da..7f247abc6d9cd54d90ae419398eac745a5f72b83 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py
@@ -62,8 +62,8 @@ def create_model(data, rank):
         )
     else:
         weight_attr, bias_attr = get_param_attr(np_weight, np_bias)
-        result = fluid.layers.fc(
-            data, size=OUT_SIZE, param_attr=weight_attr, bias_attr=bias_attr
+        result = paddle.static.nn.fc(
+            data, size=OUT_SIZE, weight_attr=weight_attr, bias_attr=bias_attr
         )
 
     predict = paddle.sum(result)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py
index 602cecc18f76094792bbeafbafcd132e5d909b52..b63e2065f431b7c4609a474657040388481ab3cd 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py
@@ -61,10 +61,10 @@ def create_model(data, rank):
         )
     else:
         weight_attr, bias_attr = get_param_attr(np_weight, np_bias)
-        result = fluid.layers.fc(
+        result = paddle.static.nn.fc(
             data,
             size=OUT_SIZE,
-            param_attr=paddle.ParamAttr(
+            weight_attr=paddle.ParamAttr(
                 initializer=fluid.initializer.NumpyArrayInitializer(np_weight)
             ),
             bias_attr=bias_attr,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py
index 4862f8f74450d2941caa1140290b2882cec8c8b8..914ee0852a04349323f7969117112526b4fa922b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py
@@ -51,10 +51,10 @@ def create_model(data, rank):
             bias_attr=False,
         )
     else:
-        result = fluid.layers.fc(
+        result = paddle.static.nn.fc(
             data,
             size=OUT_SIZE,
-            param_attr=paddle.ParamAttr(
+            weight_attr=paddle.ParamAttr(
                 initializer=fluid.initializer.NumpyArrayInitializer(np_weight)
             ),
             bias_attr=False,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
index 209233027c8a73596a652ad98a282e10412c2f43..21f7b624f5a4dfb0bd24f6cb069f6b723f335c55 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
@@ -30,7 +30,7 @@ paddle.enable_static()
 class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase):
     def net(self):
         x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-        y_predict = fluid.layers.fc(input=x, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x, size=1, activation=None)
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
index 245aa097b6aee197b2cca14a0dfb1cc967c61056..3e58391ec976882a05f009c3b93bb031e2cbe30b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
@@ -272,7 +272,7 @@ class TestDebugInfo(unittest.TestCase):
     def test_debug_info(self):
         x = fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        y_predict = fluid.layers.fc(input=x, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x, size=1, activation=None)
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_checkpoint.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_checkpoint.py
index b030b6121464d70c025e01ef50e7786077f062f3..305f4777764acd554121d73b96ae3346780a931a 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_checkpoint.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_checkpoint.py
@@ -40,7 +40,7 @@ class FleetTest(unittest.TestCase):
         feeder = fluid.DataFeeder(
             feed_list=[image, label], place=fluid.CPUPlace()
         )
-        predict = fluid.layers.fc(input=image, size=10, act='softmax')
+        predict = paddle.static.nn.fc(x=image, size=10, activation='softmax')
         loss = paddle.nn.functional.cross_entropy(
             input=predict, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
index 595b0508d57cfbd85562b6577b41a42da786b007..0e5ae267f32af44119e0de912b50e22f1e8f3bf7 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
@@ -37,10 +37,10 @@ class TestFleetFP16CompressOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
index f9271832cf27b2102fa214bf7b10cf27e77f0d33..aa8bc8bd2d73989600def09d8bc830924fa375bd 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
@@ -67,10 +67,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
@@ -133,10 +133,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
@@ -211,10 +211,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
@@ -276,10 +276,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
index 8ddfb7b40610c5e61eaa8a82b3e15ea937d81f0d..f5eb234945c757e2577590cd5b75354436dc18b8 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
@@ -54,10 +54,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
index 1a20f09a1689d05fdcc4d9357d0db6ba1911eebd..2623a222d50b42d2c8c6bd68f8a8e8d3b54faca7 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
@@ -40,12 +40,12 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
                     name="y", shape=[1], dtype='int64'
                 )
 
-                fc_1 = paddle.fluid.layers.fc(
-                    input=input_x, size=64, act='tanh'
+                fc_1 = paddle.static.nn.fc(
+                    x=input_x, size=64, activation='tanh'
                 )
-                fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh')
-                prediction = paddle.fluid.layers.fc(
-                    input=[fc_2], size=2, act='softmax'
+                fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
+                prediction = paddle.static.nn.fc(
+                    x=[fc_2], size=2, activation='softmax'
                 )
                 cost = paddle.nn.functional.cross_entropy(
                     input=prediction,
@@ -122,9 +122,9 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
index f0e5f2517dc0fe910cfce2e2e701faff886449f2..509606290144606691a74a93a0877e64c8d374fb 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
@@ -40,12 +40,12 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
                     name="y", shape=[1], dtype='int64'
                 )
 
-                fc_1 = paddle.fluid.layers.fc(
-                    input=input_x, size=64, act='tanh'
+                fc_1 = paddle.static.nn.fc(
+                    x=input_x, size=64, activation='tanh'
                 )
-                fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh')
-                prediction = paddle.fluid.layers.fc(
-                    input=[fc_2], size=2, act='softmax'
+                fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
+                prediction = paddle.static.nn.fc(
+                    x=[fc_2], size=2, activation='softmax'
                 )
                 cost = paddle.nn.functional.cross_entropy(
                     input=prediction,
@@ -127,9 +127,9 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
index 5d545e2268a00b0d283401157fffe4370ad10d1d..76fb129a0494617e3e7c04018756aa88891088c6 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
@@ -36,12 +36,12 @@ class TestFleetMetaOptimizerBase(unittest.TestCase):
                     name="y", shape=[1], dtype='int64'
                 )
 
-                fc_1 = paddle.fluid.layers.fc(
-                    input=input_x, size=64, act='tanh'
+                fc_1 = paddle.static.nn.fc(
+                    x=input_x, size=64, activation='tanh'
                 )
-                fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh')
-                prediction = paddle.fluid.layers.fc(
-                    input=[fc_2], size=2, act='softmax'
+                fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
+                prediction = paddle.static.nn.fc(
+                    x=[fc_2], size=2, activation='softmax'
                 )
                 cost = paddle.nn.functional.cross_entropy(
                     input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
index 900e78675a71322b0451727b36e9e980b821d96a..8fac45b9d249a951a68050759d4821868d7ae5da 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
@@ -45,16 +45,16 @@ class TestFleetMetaOptimizer(unittest.TestCase):
             with static.device_guard("gpu:all"):
                 input_z = input_z * 1.0
                 input_z.stop_gradient = True
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_1 = fc_1 * input_z
 
         with static.device_guard("gpu:1"):
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
             # for pipeline check_pipeline_persist_var coverage
             fc_2.persistable = True
             fc_2 = fc_2 * input_z
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
index c31698ba5ffa6190f63a9fc99f0c7e2a7a99787b..3f22238a3637c474dc224f01fd77c0e262d3f76b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
@@ -40,17 +40,17 @@ class TestFleetMetaOptimizer(unittest.TestCase):
             input_y = paddle.fluid.layers.data(
                 name="y", shape=[1], dtype='int64'
             )
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            fc_3 = paddle.fluid.layers.fc(input=fc_2, size=64, act='tanh')
-            fc_4 = paddle.fluid.layers.fc(input=fc_3, size=64, act='tanh')
-            fc_5 = paddle.fluid.layers.fc(input=fc_4, size=64, act='tanh')
-            fc_6 = paddle.fluid.layers.fc(input=fc_5, size=64, act='tanh')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            fc_3 = paddle.static.nn.fc(x=fc_2, size=64, activation='tanh')
+            fc_4 = paddle.static.nn.fc(x=fc_3, size=64, activation='tanh')
+            fc_5 = paddle.static.nn.fc(x=fc_4, size=64, activation='tanh')
+            fc_6 = paddle.static.nn.fc(x=fc_5, size=64, activation='tanh')
 
         with paddle.fluid.device_guard("gpu:1"):
-            fc_7 = paddle.fluid.layers.fc(input=fc_6, size=64, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_7], size=2, act='softmax'
+            fc_7 = paddle.static.nn.fc(x=fc_6, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_7], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
index 32e4bfb62f9893bdec53532164d0531290fe863a..29ef57518d0c5308071f95b6b2135f4c4269796d 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
@@ -37,10 +37,10 @@ class TestFleetMetaOptimizer(unittest.TestCase):
             name="x", shape=[32], dtype='float32'
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
 
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
index d37ee5d5af13e3c3142598787877f75ab02e2680..196e3425ec65a8c7ce5f0e623bd6042ea8f79926 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
@@ -444,7 +444,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
 
         def net():
             x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
index a4e41147811619b54d5f9cb13861b7244508a975..70d06a95b64aaeee9b95a0a6b06966fc45d47328 100644
--- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
@@ -58,11 +58,11 @@ def cnn_model(data):
     param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py
index 8b910514202ff9d0abad5ba83e18f1c30d84c974..7fd86cadb99eb733c5f37d33725b42e4568282e0 100644
--- a/python/paddle/fluid/tests/unittests/dist_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_ctr.py
@@ -72,11 +72,11 @@ class TestDistCTR2x2(TestDistRunnerBase):
         )
         dnn_out = dnn_pool
         for i, dim in enumerate(dnn_layer_dims[1:]):
-            fc = fluid.layers.fc(
-                input=dnn_out,
+            fc = paddle.static.nn.fc(
+                x=dnn_out,
                 size=dim,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.01)
                 ),
                 name='dnn-fc-%d' % i,
@@ -98,7 +98,9 @@ class TestDistCTR2x2(TestDistRunnerBase):
 
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
-        predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+        predict = paddle.static.nn.fc(
+            x=merge_layer, size=2, activation='softmax'
+        )
         acc = paddle.static.accuracy(input=predict, label=label)
         auc_var, batch_auc_var, auc_states = paddle.static.auc(
             input=predict, label=label
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
index 8129d6104d0c9ae3f040b920e15f5e885ed769ff..360cad434096be3e4705dc9b8367f3c9f6089bf6 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
@@ -120,11 +120,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
         )
         dnn_out = dnn_pool
         for i, dim in enumerate(dnn_layer_dims[1:]):
-            fc = fluid.layers.fc(
-                input=dnn_out,
+            fc = paddle.static.nn.fc(
+                x=dnn_out,
                 size=dim,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.01)
                 ),
                 name='dnn-fc-%d' % i,
@@ -147,7 +147,9 @@ class TestDistCTR2x2(FleetDistRunnerBase):
 
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
-        predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+        predict = paddle.static.nn.fc(
+            x=merge_layer, size=2, activation='softmax'
+        )
         acc = paddle.static.accuracy(input=predict, label=label)
 
         auc_var, batch_auc_var, auc_states = paddle.static.auc(
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
index 5e909773a61752e5710235d15a05d096534fa8d0..8d4efa8c3d11629a49c034b46dab1fb21f9071cd 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
@@ -107,11 +107,11 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
 
         with fluid.device_guard("gpu"):
             for i, dim in enumerate(dnn_layer_dims[1:]):
-                fc = fluid.layers.fc(
-                    input=dnn_out,
+                fc = paddle.static.nn.fc(
+                    x=dnn_out,
                     size=dim,
-                    act="relu",
-                    param_attr=fluid.ParamAttr(
+                    activation="relu",
+                    weight_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(value=0.01)
                     ),
                     name='dnn-fc-%d' % i,
@@ -121,7 +121,9 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
         with fluid.device_guard("cpu"):
             merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
             label = fluid.layers.cast(label, dtype="int64")
-            predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+            predict = paddle.static.nn.fc(
+                x=merge_layer, size=2, activation='softmax'
+            )
 
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
index e45d2c93a869f6807572ca74b774909a3374db7a..539446b6741b3cff2e6359587671f69c42b50f4c 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
@@ -60,11 +60,11 @@ def cnn_model(data):
     param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
index 1a23b86549e0396149ef0d2b1470367aa81cf4bd..efad598c725a44c2bf5edf4ab2eb5d4df99d50cd 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
@@ -60,11 +60,11 @@ def cnn_model(data):
     param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index be2ea401ea93f0147ead7925fd073ae66671e60b..358b3b5e39a5f747ca9856d22addb229990af931 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -133,10 +133,10 @@ def train_network(
     q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
     q_ss = paddle.nn.functional.softsign(q_sum)
     # fc layer after conv
-    q_fc = fluid.layers.fc(
-        input=q_ss,
+    q_fc = paddle.static.nn.fc(
+        x=q_ss,
         size=hid_dim,
-        param_attr=fluid.ParamAttr(
+        weight_attr=fluid.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01),
             name="__q_fc__",
             learning_rate=base_lr,
@@ -160,10 +160,10 @@ def train_network(
     pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
     pt_ss = paddle.nn.functional.softsign(pt_sum)
     # fc layer
-    pt_fc = fluid.layers.fc(
-        input=pt_ss,
+    pt_fc = paddle.static.nn.fc(
+        x=pt_ss,
         size=hid_dim,
-        param_attr=fluid.ParamAttr(
+        weight_attr=fluid.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01), name="__fc__"
         ),
         bias_attr=fluid.ParamAttr(name="__fc_b__"),
@@ -184,10 +184,10 @@ def train_network(
     nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
     nt_ss = paddle.nn.functional.softsign(nt_sum)
     # fc layer
-    nt_fc = fluid.layers.fc(
-        input=nt_ss,
+    nt_fc = paddle.static.nn.fc(
+        x=nt_ss,
         size=hid_dim,
-        param_attr=fluid.ParamAttr(
+        weight_attr=fluid.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01), name="__fc__"
         ),
         bias_attr=fluid.ParamAttr(name="__fc_b__"),
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
index b22403a6fa8cda9b4ef9b8b6b8b12a0a2cae190c..9e398e83b90c48b6be430b642963615d5f770fae 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
@@ -111,11 +111,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
         )
         dnn_out = dnn_pool
         for i, dim in enumerate(dnn_layer_dims[1:]):
-            fc = fluid.layers.fc(
-                input=dnn_out,
+            fc = paddle.static.nn.fc(
+                x=dnn_out,
                 size=dim,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.01)
                 ),
                 name='dnn-fc-%d' % i,
@@ -136,7 +136,9 @@ class TestDistCTR2x2(FleetDistRunnerBase):
 
         lr_pool = fluid.layers.sequence_pool(input=lr_embbding, pool_type="sum")
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
-        predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+        predict = paddle.static.nn.fc(
+            x=merge_layer, size=2, activation='softmax'
+        )
 
         acc = paddle.static.accuracy(input=predict, label=label)
         auc_var, _, _ = paddle.static.auc(input=predict, label=label)
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py
index 4510b57391bebee005ebf1ae6ea55240ece2b4cb..117e178dd9670becda56bcae455f9426de42a9ae 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist.py
@@ -59,11 +59,11 @@ def cnn_model(data):
     param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
index 526b580f409d84d14cd4df399a61ac8dd195eea4..377ad3072277dfda398d255c6ba31089700a679c 100644
--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -116,11 +116,11 @@ class SE_ResNeXt:
         drop = paddle.nn.functional.dropout(x=pool, p=0.2)
 
         stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
-        out = fluid.layers.fc(
-            input=drop,
+        out = paddle.static.nn.fc(
+            x=drop,
             size=class_dim,
-            act='softmax',
-            param_attr=fluid.ParamAttr(
+            activation='softmax',
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.05)
             ),
         )
@@ -183,22 +183,22 @@ class SE_ResNeXt:
     def squeeze_excitation(self, input, num_channels, reduction_ratio):
         pool = paddle.nn.functional.adaptive_avg_pool2d(x=input, output_size=1)
         stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-        squeeze = fluid.layers.fc(
-            input=pool,
+        squeeze = paddle.static.nn.fc(
+            x=pool,
             size=num_channels // reduction_ratio,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.05)
             ),
-            act='relu',
+            activation='relu',
         )
         stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
-        excitation = fluid.layers.fc(
-            input=squeeze,
+        excitation = paddle.static.nn.fc(
+            x=squeeze,
             size=num_channels,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.05)
             ),
-            act='sigmoid',
+            activation='sigmoid',
         )
         scale = paddle.tensor.math._multiply_with_axis(
             x=input, y=excitation, axis=0
diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py
index 81b5733bb9068a7658b7a68e8918f20ac41edd14..1c4f49093df3530cc93a973e223df02be2ff4d9b 100755
--- a/python/paddle/fluid/tests/unittests/dist_sharding_save.py
+++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py
@@ -45,10 +45,10 @@ def runtime_main():
                 name="y", shape=[1], dtype='int64'
             )
 
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh')
-            prediction = paddle.fluid.layers.fc(
-                input=[fc_2], size=2, act='softmax'
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=[fc_2], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py
index 591106ab1ff1c46acf06635cbee3ad255930ced8..de8630cf70b49ef441adac752ca374a5959b16f1 100644
--- a/python/paddle/fluid/tests/unittests/dist_text_classification.py
+++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py
@@ -74,19 +74,19 @@ def conv_net(
         ),
     )
 
-    fc_0 = fluid.layers.fc(
-        input=[conv_3],
+    fc_0 = paddle.static.nn.fc(
+        x=[conv_3],
         size=fc0_dim,
-        param_attr=fluid.ParamAttr(
+        weight_attr=fluid.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
 
-    prediction = fluid.layers.fc(
-        input=[fc_0],
+    prediction = paddle.static.nn.fc(
+        x=[fc_0],
         size=class_dim,
-        act="softmax",
-        param_attr=fluid.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.ParamAttr(
             initializer=fluid.initializer.Constant(value=0.01)
         ),
     )
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 7a66033507db5e938dc9c4bb9663683602138b6f..0db96bee8bba67ed0e63d32a5028c1681dbfdcf5 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -289,7 +289,7 @@ class LearningRateScheduler:
         self.warmup_steps = warmup_steps
         self.d_model = d_model
         self.static_lr = learning_rate
-        self.learning_rate = paddle.static.create_global_var(
+        self.learning_rate = layers.create_global_var(
             name=name,
             shape=[1],
             value=float(learning_rate),
@@ -1107,25 +1107,25 @@ def multi_head_attention(
         """
         Add linear projection to queries, keys, and values.
         """
-        q = layers.fc(
-            input=queries,
+        q = paddle.static.nn.fc(
+            x=queries,
             size=d_key * n_head,
             num_flatten_dims=2,
-            param_attr=const_para_attr,
+            weight_attr=const_para_attr,
             bias_attr=const_bias_attr,
         )
-        k = layers.fc(
-            input=keys,
+        k = paddle.static.nn.fc(
+            x=keys,
             size=d_key * n_head,
             num_flatten_dims=2,
-            param_attr=const_para_attr,
+            weight_attr=const_para_attr,
             bias_attr=const_bias_attr,
         )
-        v = layers.fc(
-            input=values,
+        v = paddle.static.nn.fc(
+            x=values,
             size=d_value * n_head,
             num_flatten_dims=2,
-            param_attr=const_para_attr,
+            weight_attr=const_para_attr,
             bias_attr=const_bias_attr,
         )
         return q, k, v
@@ -1174,16 +1174,18 @@ def multi_head_attention(
         Scaled Dot-Product Attention
         """
         scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
-        product = paddle.matmul(x=scaled_q, y=k, transpose_y=True)
+        product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
         if attn_bias:
             product += attn_bias
         weights = paddle.nn.functional.softmax(product)
         if dropout_rate:
-            weights = paddle.nn.functional.dropout(
+            weights = layers.dropout(
                 weights,
-                p=dropout_rate,
+                dropout_prob=dropout_rate,
+                seed=ModelHyperParams.dropout_seed,
+                is_test=False,
             )
-        out = paddle.matmul(weights, v)
+        out = layers.matmul(weights, v)
         return out
 
     q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
@@ -1203,11 +1205,11 @@ def multi_head_attention(
     out = __combine_heads(ctx_multiheads)
 
     # Project back to the model size.
-    proj_out = layers.fc(
-        input=out,
+    proj_out = paddle.static.nn.fc(
+        x=out,
         size=d_model,
         num_flatten_dims=2,
-        param_attr=const_para_attr,
+        weight_attr=const_para_attr,
         bias_attr=const_bias_attr,
     )
     return proj_out
@@ -1219,19 +1221,19 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid):
     This module consists of two linear transformations with a ReLU activation
     in between, which is applied to each position separately and identically.
     """
-    hidden = layers.fc(
-        input=x,
+    hidden = paddle.static.nn.fc(
+        x=x,
         size=d_inner_hid,
         num_flatten_dims=2,
-        act="relu",
-        param_attr=const_para_attr,
+        activation="relu",
+        weight_attr=const_para_attr,
         bias_attr=const_bias_attr,
     )
-    out = layers.fc(
-        input=hidden,
+    out = paddle.static.nn.fc(
+        x=hidden,
         size=d_hid,
         num_flatten_dims=2,
-        param_attr=const_para_attr,
+        weight_attr=const_para_attr,
         bias_attr=const_bias_attr,
     )
     return out
@@ -1248,7 +1250,7 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0):
         if cmd == "a":  # add residual connection
             out = out + prev_out if prev_out else out
         elif cmd == "n":  # add layer normalization
-            out = paddle.static.nn.layer_norm(
+            out = layers.layer_norm(
                 out,
                 begin_norm_axis=len(out.shape) - 1,
                 param_attr=fluid.initializer.Constant(1.0),
@@ -1256,9 +1258,11 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0):
             )
         elif cmd == "d":  # add dropout
             if dropout_rate:
-                out = paddle.nn.functional.dropout(
+                out = layers.dropout(
                     out,
-                    p=dropout_rate,
+                    dropout_prob=dropout_rate,
+                    seed=ModelHyperParams.dropout_seed,
+                    is_test=False,
                 )
     return out
 
@@ -1314,9 +1318,11 @@ def prepare_encoder(
     src_pos_enc.stop_gradient = True
     enc_input = src_word_emb + src_pos_enc
     return (
-        paddle.nn.functional.dropout(
+        layers.dropout(
             enc_input,
-            p=dropout_rate,
+            dropout_prob=dropout_rate,
+            seed=ModelHyperParams.dropout_seed,
+            is_test=False,
         )
         if dropout_rate
         else enc_input
@@ -1575,7 +1581,7 @@ def transformer(
     label, weights = make_all_inputs(label_data_input_fields)
     if label_smooth_eps:
         label = F.label_smooth(
-            label=paddle.nn.functional.one_hot(label, trg_vocab_size),
+            label=layers.one_hot(input=label, depth=trg_vocab_size),
             epsilon=label_smooth_eps,
         )
 
@@ -1695,17 +1701,17 @@ def wrap_decoder(
     )
     # Return logits for training and probs for inference.
     if weight_sharing:
-        predict = paddle.matmul(
+        predict = layers.matmul(
             x=dec_output,
             y=fluid.framework._get_var(word_emb_param_names[0]),
             transpose_y=True,
         )
     else:
-        predict = layers.fc(
-            input=dec_output,
+        predict = paddle.static.nn.fc(
+            x=dec_output,
             size=trg_vocab_size,
             num_flatten_dims=2,
-            param_attr=const_para_attr,
+            weight_attr=const_para_attr,
             bias_attr=const_bias_attr,
         )
     if dec_inputs is None:
@@ -1713,6 +1719,160 @@ def wrap_decoder(
     return predict
 
 
+def fast_decode(
+    src_vocab_size,
+    trg_vocab_size,
+    max_in_len,
+    n_layer,
+    n_head,
+    d_key,
+    d_value,
+    d_model,
+    d_inner_hid,
+    dropout_rate,
+    weight_sharing,
+    beam_size,
+    max_out_len,
+    eos_idx,
+):
+    """
+    Use beam search to decode. Caches will be used to store states of history
+    steps which can make the decoding faster.
+    """
+    enc_output = wrap_encoder(
+        src_vocab_size,
+        max_in_len,
+        n_layer,
+        n_head,
+        d_key,
+        d_value,
+        d_model,
+        d_inner_hid,
+        dropout_rate,
+        weight_sharing,
+    )
+    start_tokens, init_scores, trg_src_attn_bias = make_all_inputs(
+        fast_decoder_data_input_fields
+    )
+
+    def beam_search():
+        max_len = layers.fill_constant(
+            shape=[1], dtype=start_tokens.dtype, value=max_out_len
+        )
+        step_idx = layers.fill_constant(
+            shape=[1], dtype=start_tokens.dtype, value=0
+        )
+        cond = paddle.less_than(x=step_idx, y=max_len)
+        while_op = layers.While(cond)
+        # array states will be stored for each step.
+        ids = layers.array_write(
+            paddle.reshape(start_tokens, (-1, 1)), step_idx
+        )
+        scores = layers.array_write(init_scores, step_idx)
+        # cell states will be overwrited at each step.
+        # caches contains states of history steps to reduce redundant
+        # computation in decoder.
+        caches = [
+            {
+                "k": layers.fill_constant_batch_size_like(
+                    input=start_tokens,
+                    shape=[-1, 0, d_model],
+                    dtype=enc_output.dtype,
+                    value=0,
+                ),
+                "v": layers.fill_constant_batch_size_like(
+                    input=start_tokens,
+                    shape=[-1, 0, d_model],
+                    dtype=enc_output.dtype,
+                    value=0,
+                ),
+            }
+            for i in range(n_layer)
+        ]
+        with while_op.block():
+            pre_ids = layers.array_read(array=ids, i=step_idx)
+            pre_ids = paddle.reshape(pre_ids, (-1, 1, 1))
+            pre_scores = layers.array_read(array=scores, i=step_idx)
+            # sequence_expand can gather sequences according to lod thus can be
+            # used in beam search to sift states corresponding to selected ids.
+            pre_src_attn_bias = layers.sequence_expand(
+                x=trg_src_attn_bias, y=pre_scores
+            )
+            pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores)
+            pre_caches = [
+                {
+                    "k": layers.sequence_expand(x=cache["k"], y=pre_scores),
+                    "v": layers.sequence_expand(x=cache["v"], y=pre_scores),
+                }
+                for cache in caches
+            ]
+            pre_pos = layers.elementwise_mul(
+                x=layers.fill_constant_batch_size_like(
+                    input=pre_enc_output,  # can't use pre_ids here since it has lod
+                    value=1,
+                    shape=[-1, 1, 1],
+                    dtype=pre_ids.dtype,
+                ),
+                y=layers.increment(x=step_idx, value=1.0, in_place=False),
+                axis=0,
+            )
+            logits = wrap_decoder(
+                trg_vocab_size,
+                max_in_len,
+                n_layer,
+                n_head,
+                d_key,
+                d_value,
+                d_model,
+                d_inner_hid,
+                dropout_rate,
+                weight_sharing,
+                dec_inputs=(pre_ids, pre_pos, None, pre_src_attn_bias),
+                enc_output=pre_enc_output,
+                caches=pre_caches,
+            )
+            logits = paddle.reshape(logits, (-1, trg_vocab_size))
+            topk_scores, topk_indices = paddle.topk(
+                x=paddle.nn.functional.softmax(logits), k=beam_size
+            )
+            accu_scores = layers.elementwise_add(
+                x=paddle.log(topk_scores),
+                y=paddle.reshape(pre_scores, shape=[-1]),
+                axis=0,
+            )
+            # beam_search op uses lod to distinguish branches.
+            topk_indices = layers.lod_reset(topk_indices, pre_ids)
+            selected_ids, selected_scores = layers.beam_search(
+                pre_ids=pre_ids,
+                pre_scores=pre_scores,
+                ids=topk_indices,
+                scores=accu_scores,
+                beam_size=beam_size,
+                end_id=eos_idx,
+            )
+
+            layers.increment(x=step_idx, value=1.0, in_place=True)
+            # update states
+            layers.array_write(selected_ids, i=step_idx, array=ids)
+            layers.array_write(selected_scores, i=step_idx, array=scores)
+            layers.assign(pre_src_attn_bias, trg_src_attn_bias)
+            layers.assign(pre_enc_output, enc_output)
+            for i in range(n_layer):
+                layers.assign(pre_caches[i]["k"], caches[i]["k"])
+                layers.assign(pre_caches[i]["v"], caches[i]["v"])
+            length_cond = paddle.less_than(x=step_idx, y=max_len)
+            finish_cond = paddle.logical_not(layers.is_empty(x=selected_ids))
+            paddle.logical_and(x=length_cond, y=finish_cond, out=cond)
+
+        finished_ids, finished_scores = layers.beam_search_decode(
+            ids, scores, beam_size=beam_size, end_id=eos_idx
+        )
+        return finished_ids, finished_scores
+
+    finished_ids, finished_scores = beam_search()
+    return finished_ids, finished_scores
+
+
 def get_model(is_dist, is_async):
     sum_cost, avg_cost, predict, token_num = transformer(
         ModelHyperParams.src_vocab_size,
diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py
index 33ab8c7a363b27f8ca9def470884eedc21d6afcb..746a1f07655ab00330bda06e229ff0b28c612a35 100644
--- a/python/paddle/fluid/tests/unittests/dist_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py
@@ -79,19 +79,19 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
                 input=[embed_first, embed_second, embed_third, embed_forth],
                 axis=1,
             )
-            hidden1 = fluid.layers.fc(
-                input=concat_embed,
+            hidden1 = paddle.static.nn.fc(
+                x=concat_embed,
                 size=HIDDEN_SIZE,
-                act='sigmoid',
-                param_attr=fluid.ParamAttr(
+                activation='sigmoid',
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.1)
                 ),
             )
-            predict_word = fluid.layers.fc(
-                input=hidden1,
+            predict_word = paddle.static.nn.fc(
+                x=hidden1,
                 size=dict_size,
-                act='softmax',
-                param_attr=fluid.ParamAttr(
+                activation='softmax',
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.1)
                 ),
             )
diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
index 1c7ecd1036ccf0c43dcff60ee126feeb353402ac..8f803ce1db1424826fdbed39f67b158d1f6d56db 100644
--- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
+++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
@@ -97,11 +97,11 @@ def net(batch_size=4, lr=0.01):
 
     with fluid.device_guard("gpu"):
         for i, dim in enumerate(dnn_layer_dims[1:]):
-            fc = fluid.layers.fc(
-                input=dnn_out,
+            fc = paddle.static.nn.fc(
+                x=dnn_out,
                 size=dim,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.01)
                 ),
                 name='dnn-fc-%d' % i,
@@ -110,7 +110,9 @@ def net(batch_size=4, lr=0.01):
 
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
         label = fluid.layers.cast(label, dtype="int64")
-        predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+        predict = paddle.static.nn.fc(
+            x=merge_layer, size=2, activation='softmax'
+        )
 
         cost = paddle.nn.functional.cross_entropy(
             input=predict, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
index 040e566f0512713091850a5efcc0ff564b1f46be..628953391c059fd4b4b5652acc8cbdaa79213ffe 100755
--- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
+++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
@@ -62,12 +62,12 @@ class TestFleetMetaOptimizer(unittest.TestCase):
                     name="y", shape=[1], dtype='int64'
                 )
 
-                fc_1 = paddle.fluid.layers.fc(
-                    input=input_x, size=64, act='tanh'
+                fc_1 = paddle.static.nn.fc(
+                    x=input_x, size=64, activation='tanh'
                 )
-                fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh')
-                prediction = paddle.fluid.layers.fc(
-                    input=[fc_2], size=2, act='softmax'
+                fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
+                prediction = paddle.static.nn.fc(
+                    x=[fc_2], size=2, activation='softmax'
                 )
                 cost = paddle.nn.functional.cross_entropy(
                     input=prediction,
@@ -82,9 +82,9 @@ class TestFleetMetaOptimizer(unittest.TestCase):
 
     def pp_net(self, main_prog, startup_prog, pp_degree=2):
         def fc_block(input_x):
-            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-            fc_3 = paddle.fluid.layers.fc(input=fc_2, size=64, act='tanh')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+            fc_3 = paddle.static.nn.fc(x=fc_2, size=64, activation='tanh')
             return fc_3
 
         with fluid.program_guard(main_prog, startup_prog):
@@ -104,8 +104,8 @@ class TestFleetMetaOptimizer(unittest.TestCase):
                         input_x = fc_block(input_x)
 
                 with fluid.device_guard("gpu:" + str(pp_degree - 1)):
-                    prediction = paddle.fluid.layers.fc(
-                        input=[input_x], size=2, act='softmax'
+                    prediction = paddle.static.nn.fc(
+                        x=[input_x], size=2, activation='softmax'
                     )
                     cost = paddle.nn.functional.cross_entropy(
                         input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py
index b08835de54be02984fb5699da65bc080f5b8b126..02318ac7d94df8fd51f9c79eb770b50397103bdf 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py
@@ -63,8 +63,8 @@ class TestWeightSharing(IPUOpTest):
                 is_sparse=False,
             )
         with paddle.static.ipu_shard_guard(index=1, stage=1):
-            z = paddle.fluid.layers.fc(
-                input=y, size=768, param_attr=paddle.fluid.ParamAttr(name="fc")
+            z = paddle.static.nn.fc(
+                x=y, size=768, weight_attr=paddle.fluid.ParamAttr(name="fc")
             )
         with paddle.static.ipu_shard_guard(index=0, stage=2):
             out = paddle.matmul(
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py
index 90621333c5bdb257d30a56304640c768f3d51d32..342b897d003acd9d95e6374caae54f2e637826cd 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py
@@ -33,7 +33,7 @@ class TestMKLDNNCpuBfloat16Pass(InferencePassTest):
             out = paddle.transpose(x, perm=[0, 1, 2, 3])
             out = paddle.reshape(out, [0, 0, 0, 0])
 
-            out = fluid.layers.fc(out, size=1)
+            out = paddle.static.nn.fc(out, size=1)
 
             self.feeds = {
                 "x": np.random.random([self.bs] + self.shape_x).astype(
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py
index c9284a1b5e9d2ccba1dc2117f9d3ba66794f9616..e344c873ee263709e63d59c0ead1b8c5a4c020fc 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py
@@ -83,7 +83,7 @@ class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp):
             out = paddle.matmul(x, y)
             out = paddle.transpose(out, perm=[0, 1, 2, 3])
             out = paddle.reshape(out, [0, 0, 0, 0])
-            out = fluid.layers.fc(out, size=1)
+            out = paddle.static.nn.fc(out, size=1)
         return out
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
index a3b297a268faddfd969ec2242ce9ce670017d3b0..252ea329edb26989f8f62dc11f28c24d096035a1 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
@@ -29,8 +29,8 @@ class FCFusePassTRTTest(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[32, 128, 2, 2], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=128, num_flatten_dims=1, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=128, num_flatten_dims=1, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -59,8 +59,8 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[32, 128, 32, 8], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=1, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=1, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -87,8 +87,8 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[3, 24, 16, 16], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=32, num_flatten_dims=2, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=32, num_flatten_dims=2, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -113,8 +113,8 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest):
     def setUp(self):
         with fluid.program_guard(self.main_program, self.startup_program):
             data = fluid.data(name="data", shape=[32, 128], dtype="float32")
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=1, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=1, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -145,8 +145,8 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
     def setUp(self):
         with fluid.program_guard(self.main_program, self.startup_program):
             data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=1, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=1, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -177,8 +177,8 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
     def setUp(self):
         with fluid.program_guard(self.main_program, self.startup_program):
             data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=2, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=2, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -211,8 +211,8 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[32, 12, 4, 6], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=1, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=1, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -247,8 +247,8 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[32, 128, 32, 32], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=2, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=2, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
@@ -283,8 +283,8 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[32, 128, 32, 32], dtype="float32"
             )
-            fc_out1 = fluid.layers.fc(
-                input=data, size=64, num_flatten_dims=3, act="relu"
+            fc_out1 = paddle.static.nn.fc(
+                x=data, size=64, num_flatten_dims=3, activation="relu"
             )
             out = paddle.nn.functional.softmax(fc_out1)
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py
index 1c3af0d6c5271f4b9777db7a4fac5a8830f8340c..5179d0330d6ace9f6538e562c79dd15125e7f9d5 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py
@@ -31,12 +31,12 @@ class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest):
                 name='data', shape=[1, 28, 28], dtype='float32'
             )
             self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
-            fc_out = fluid.layers.fc(
-                input=self.data,
+            fc_out = paddle.static.nn.fc(
+                x=self.data,
                 size=10,
                 num_flatten_dims=1,
                 bias_attr=False,
-                act="relu",
+                activation="relu",
             )
             result = F.relu(fc_out)
             loss = paddle.nn.functional.cross_entropy(
@@ -102,12 +102,12 @@ class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest):
                 name='data', shape=[1, 28, 28], dtype='float32'
             )
             self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
-            fc_out = fluid.layers.fc(
-                input=self.data,
+            fc_out = paddle.static.nn.fc(
+                x=self.data,
                 size=28,
                 num_flatten_dims=2,
                 bias_attr=False,
-                act=None,
+                activation=None,
             )
             c_out = paddle.reshape(fc_out, shape=[0, 784])
             result = F.relu(c_out)
@@ -176,12 +176,12 @@ class FCQuantDequantFusePassTRTDims3Cols3Test(QuantDequantTest):
             self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
             label_shape = paddle.reshape(self.label, shape=[1, 1, 1])
             reshape_out = paddle.reshape(self.data, shape=[1, 14, 14, 4])
-            fc_out = fluid.layers.fc(
-                input=reshape_out,
+            fc_out = paddle.static.nn.fc(
+                x=reshape_out,
                 size=14,
                 num_flatten_dims=3,
                 bias_attr=False,
-                act=None,
+                activation=None,
             )
             c_out = paddle.reshape(fc_out, shape=[1, 1, 2744])
             result = F.relu(c_out)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
index 5c3f91de4e5197fe4753866d242218b7fee121eb..413002d9885517cfeb457b265910d099a69432d3 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
@@ -40,12 +40,12 @@ class TensorRTMatMulQuantDequantDims3Test(QuantDequantTest):
                 transpose_y=self.transpose_y,
             )
             matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            fc_out = fluid.layers.fc(
-                input=matmul_out,
+            fc_out = paddle.static.nn.fc(
+                x=matmul_out,
                 size=10,
                 num_flatten_dims=1,
                 bias_attr=False,
-                act=None,
+                activation=None,
             )
             result = F.relu(fc_out)
             loss = paddle.nn.functional.cross_entropy(
@@ -142,12 +142,12 @@ class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest):
             )
             matmul_out = paddle.scale(matmul_out, scale=self.alpha)
             out = paddle.static.nn.batch_norm(matmul_out, is_test=True)
-            fc_out = fluid.layers.fc(
-                input=matmul_out,
+            fc_out = paddle.static.nn.fc(
+                x=matmul_out,
                 size=10,
                 num_flatten_dims=1,
                 bias_attr=False,
-                act=None,
+                activation=None,
             )
             result = F.relu(fc_out)
             loss = paddle.nn.functional.cross_entropy(
@@ -243,12 +243,12 @@ class TensorRTMatMulQuantDequantDims3DynamicTest(QuantDequantTest):
             )
             matmul_out = paddle.scale(matmul_out, scale=self.alpha)
             out = paddle.static.nn.batch_norm(matmul_out, is_test=True)
-            fc_out = fluid.layers.fc(
-                input=matmul_out,
+            fc_out = paddle.static.nn.fc(
+                x=matmul_out,
                 size=10,
                 num_flatten_dims=1,
                 bias_attr=False,
-                act=None,
+                activation=None,
             )
             result = F.relu(fc_out)
             loss = paddle.nn.functional.cross_entropy(
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
index 9223c693cf3df782d4b503dd8ff335cc6a30855c..da9a86725c008bef4673b7792a14d1bc688b9944 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -31,7 +31,7 @@ class TensorRTSubgraphPassFcTest(InferencePassTest):
             data = fluid.data(
                 name="data", shape=[-1, 6, 64, 64], dtype="float32"
             )
-            fc_out = fluid.layers.fc(input=[data], act=None, size=1000)
+            fc_out = paddle.static.nn.fc(x=[data], activation=None, size=1000)
             reshape_out = paddle.reshape(x=fc_out, shape=[1, 1000])
         self.feeds = {
             "data": np.random.random([1, 6, 64, 64]).astype("float32"),
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
index 3e958d9d191c9cd6421b2be4a85317d6a79b7553..403729786d418d0df2832fcfa3c5add5558ca3ae 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
@@ -28,10 +28,10 @@ class FCFusePassTest(PassTest):
             data = fluid.data(
                 name="data", shape=[32, 128], dtype="float32", lod_level=0
             )
-            tmp_0 = fluid.layers.fc(
-                input=data, size=128, num_flatten_dims=1, act="relu"
+            tmp_0 = paddle.static.nn.fc(
+                x=data, size=128, num_flatten_dims=1, activation="relu"
             )
-            tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1)
+            tmp_1 = paddle.static.nn.fc(x=tmp_0, size=32, num_flatten_dims=1)
             tmp_2 = paddle.nn.functional.softmax(tmp_1)
 
         self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
index 5ccfa2340f92655f2e3c9ede37ede869a89088ed..21697177d0dd626f9f81c555e8dca2dce4bfe3a2 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
@@ -34,7 +34,9 @@ class TestQuantizationSubGraph(unittest.TestCase):
             label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             hidden = data
             for _ in range(num):
-                hidden = fluid.layers.fc(hidden, size=128, act='relu')
+                hidden = paddle.static.nn.fc(
+                    hidden, size=128, activation='relu'
+                )
             loss = paddle.nn.functional.cross_entropy(
                 input=hidden, label=label, reduction='none', use_softmax=False
             )
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
index 1b8e1b7292b8d22f9585c9ba23805b4bed49dc0e..16d28c6af436a1a252a80793382eaf1bb7f80057 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
@@ -260,8 +260,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
index 82c7bf6b855dc374c63a2f5929b3d92550a77070..dfe0b6e070d878e8200c14f5a9a61171041ce815 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
@@ -211,8 +211,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
index 5820f85f4def40ec0b5c9dbb337f1e64d9134caf..6dbcc43c62bf5b72a8f37c665e14979e539a59e2 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
@@ -340,8 +340,8 @@ class TestElementwiseMaxNet(unittest.TestCase):
 
             c = paddle.maximum(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
index 2ac3650c08234e9cb5d40ee767771580cedfc80a..00b746b7f72066435712fa26267cfc34b3a879cc 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
@@ -186,8 +186,8 @@ class TestElementwiseMinOpNet(unittest.TestCase):
 
             c = paddle.minimum(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
index a3f1d2a9af9ecb2cd9b6da3bd63c5df4e2d1a7a5..c4d7acfd2f5a7a4eadc247c766e367e4384db294 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
@@ -108,9 +108,9 @@ class TestGeluNet(unittest.TestCase):
 
             c = paddle.multiply(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
             fc_1_gelu = paddle.nn.functional.gelu(fc_1)
-            prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax')
+            prediction = paddle.static.nn.fc(x=fc_1_gelu, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
index 18124e05e1fd0d578df2947cac73f26d538eea86..7c97ad35b5da84040fff65dd1eb66e2a078637d3 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
@@ -103,8 +103,8 @@ class TestLeakyReluNet(unittest.TestCase):
 
             y = paddle.nn.functional.leaky_relu(x)
 
-            fc_1 = fluid.layers.fc(input=y, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=y, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
index cb393cbd3731122ff35ac31fdd1c3fd4233c1eba..5e5e517878eb3dbece3cc924e416f0cbce80418d 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
@@ -142,7 +142,7 @@ class TestMomentumV2(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
@@ -267,7 +267,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
index 272129c7b1b192742fdf2152760e6ac2d7b4d753..867f45b7838997219cb365142fba67f1f21ff40c 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
@@ -122,8 +122,8 @@ class TestRelu6Net(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.nn.functional.relu6(sum)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
index fcd5d4cc6febe3ccbf8a81d709ce75a86aa969cd..4de46bf4051c0f8441f484ea20a799200357e12a 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
@@ -123,8 +123,8 @@ class TestReluNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.nn.functional.relu(sum)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
index c924bdc6918700bef75fe43614724c749593f77a..8c14e0483ab3745a30d5782aa6feb25267eddc8c 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
@@ -123,8 +123,8 @@ class TestPowNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2)
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2)
 
             cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
index acf321b24d2e205f1574b54fc905e12519eb3122..3f7ad27418a0389e9feb8e3d8f75d503def0857e 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
@@ -104,8 +104,8 @@ class TestTanhNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.tanh(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
index 95cc5b977870a42daabcbf5ea8a4ebd7d4112360..5bf239b5bc77df819a8c2a4fc876b13f8e3ff38b 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
@@ -260,8 +260,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
@@ -343,9 +343,9 @@ class TestNetWithEpsilonTensor(unittest.TestCase):
                 sum = paddle.add(a, b)
                 z = paddle.pow(sum, 2.0)
 
-                fc_1 = fluid.layers.fc(input=z, size=2, param_attr=weight_attr1)
-                prediction = fluid.layers.fc(
-                    input=fc_1, size=2, param_attr=weight_attr2, act='softmax'
+                fc_1 = paddle.static.nn.fc(x=z, size=2, weight_attr=weight_attr1)
+                prediction = paddle.static.nn.fc(
+                    x=fc_1, size=2, weight_attr=weight_attr2, activation='softmax'
                 )
 
                 cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
index 09edd69317890f7943f12592cddb4f2a278d8a6f..901aa4d21d880bf4a266de40f21b7c66f94eec64 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
@@ -211,8 +211,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
index fa7d2a89749f0f4d9d3bd731812bff87618a03c0..6603b111a4b69cc51fe23d866824233aded0b772 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
@@ -101,8 +101,8 @@ class TestCosNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.cos(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
index 1f86cd35fb1f7c5c39825ae2fce17f9a1d7e4f63..ac4e9e62b6bf8c7168707e4ef36989a21616de77 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
@@ -135,8 +135,8 @@ class TestElementwiseDivNet(unittest.TestCase):
             f.stop_gradient = True
             g = paddle.divide(e, f)
 
-            fc_1 = fluid.layers.fc(input=g, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=g, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
index a5d7e56ebf740772d695d369e35d47c065cf4c80..21b6d78dc6d2247745c298ec3b783a2af3c23d8c 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
@@ -299,8 +299,8 @@ class TestElementwiseMaxNet(unittest.TestCase):
 
             c = paddle.maximum(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
index 2a0b526fd10cfb8104dc5090fba06a2cedcd75ad..ae6a7671fd41db2fe92f6ff0a792e83640b717f8 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
@@ -186,8 +186,8 @@ class TestElementwiseMinOpNet(unittest.TestCase):
 
             c = paddle.minimum(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
index aea7e4dcbff4c5f7b92746590bb492bfc2451597..8cf427d4fdda4301230d88510b19d18eca87ba2e 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
@@ -310,8 +310,8 @@ class TestElementwisePowNet(unittest.TestCase):
 
             c = paddle.pow(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
index b204e268a2805c9ebbe6321eebc8d63028867fbc..b3bbc4bc5e71c9b6c02b8bf11502267bfea25f80 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
@@ -191,8 +191,8 @@ class TestSubtractNet(unittest.TestCase):
             c = paddle.assign(b)
             z = paddle.subtract(sum, c)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
index ad0ba66bbd551914b54bc204edb9e9bc8d93d6b5..608709edbe9a7f20289cab61e84b5b3bcbf4e34b 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
@@ -108,9 +108,9 @@ class TestGeluNet(unittest.TestCase):
 
             c = paddle.multiply(a, b)
 
-            fc_1 = fluid.layers.fc(input=c, size=128)
+            fc_1 = paddle.static.nn.fc(x=c, size=128)
             fc_1_gelu = paddle.nn.functional.gelu(fc_1)
-            prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax')
+            prediction = paddle.static.nn.fc(x=fc_1_gelu, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
index 0f88dbed1cb9a31c48721fc4d7aeb654eae8b87a..cf12c0c6c052c89317fa4c3db75175d504de1275 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
@@ -103,8 +103,8 @@ class TestLeakyReluNet(unittest.TestCase):
 
             y = paddle.nn.functional.leaky_relu(x)
 
-            fc_1 = fluid.layers.fc(input=y, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=y, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
index 175d31ccc885eb06b4af4deff511304947f3f8ae..fb69ac3afa9c268cb8a17b416d7d35d080aa45ac 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
@@ -101,8 +101,8 @@ class TestLogNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.log(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
index fe0882c7330fbf984f3ab96f7f4fa231a3522ce9..b500b44e7e57cc0812b9572f005fa61e8c8e27e9 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
@@ -110,7 +110,7 @@ class TestMomentumV2(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
@@ -238,7 +238,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
index 3f3dfc1b06eb72e178eda28ee0ecf4f56af321eb..aa93bb8b3be48e1f4346184c0a31a79f79fa7648 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
@@ -101,8 +101,8 @@ class TestPowNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
index a343de22be25a36f46374240c1138fe333ac7d2f..2352c0d90734a52194e595e1fce7d261639da189 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
@@ -105,12 +105,12 @@ class TestReduceSumNet(unittest.TestCase):
                 name="label", shape=[2, 1], dtype='int64'
             )
 
-            a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None)
-            b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None)
+            a_1 = paddle.static.nn.fc(x=a, size=4, num_flatten_dims=2, activation=None)
+            b_1 = paddle.static.nn.fc(x=b, size=4, num_flatten_dims=2, activation=None)
             z = paddle.add(a_1, b_1)
             z_1 = self.set_reduce_sum_function(z)
 
-            prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')
+            prediction = paddle.static.nn.fc(x=z_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
index 2f8925843aacb9a0bf1bcc55098b41e9884e30de..b9ff61e046f3767d46a0bdaf5f37259f5763cabe 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
@@ -122,8 +122,8 @@ class TestRelu6Net(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.nn.functional.relu6(sum)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
index 8a55a3db635e1c7c9ae333df02dca9c297607cce..70d2cf8e65fd1ad5f908e541016b830f6a52139f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
@@ -115,8 +115,8 @@ class TestReluNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.nn.functional.relu(sum)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
index aeedddf7df4ffdf59f002699f191981e4a2f381d..431822f0a15b92a0065d08c94d6941b3f8360b66 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
@@ -49,8 +49,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
@@ -112,8 +112,8 @@ class TestCenteredNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py
index c45d58d598c2c1cc78a87b4bb908587862097281..94faec4d530a63b874e53c16904699787357b067 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py
@@ -311,12 +311,12 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest):
             ),
             trainable=True,
         )
-        pred = fluid.layers.fc(
-            input=img,
+        pred = paddle.static.nn.fc(
+            x=img,
             size=10,
-            param_attr=weight_attr,
+            weight_attr=weight_attr,
             bias_attr=bias_attr,
-            act='relu',
+            activation='relu',
         )
         # 2. get forward op num
         fwd_op_num = fluid.default_main_program().global_block().desc.op_size()
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
index 80060dab66a096a4d6ccd660bc636bdcd4bbf309..c9fb41701ca25d1ebfd7b8e0ae6c9446747bd237 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
@@ -74,8 +74,8 @@ class TestNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
index 1e9ce9659c5e2cbf0145bc1b96d2177c543fc9cd..c6dfbb7ffbe070c58516c3d920e54205f292ce0f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
@@ -74,9 +74,9 @@ class TestSoftmaxNet(unittest.TestCase):
             d = paddle.sqrt(c)
 
             # 4 x 128
-            fc_1 = fluid.layers.fc(input=d, size=128)
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
             # 4 x 2
-            prediction = fluid.layers.fc(input=fc_1, size=2)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2)
 
             # 4 x 2
             prob = paddle.nn.functional.softmax(prediction, axis=1)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
index f47a0275af0fd9d3176cb51a67aa1b697e7dfc13..1301f6f08fc5d984918226a784bb2458fb23546e 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
@@ -121,8 +121,8 @@ class TestPowNet(unittest.TestCase):
             sum = paddle.add(a, b)
             z = paddle.pow(sum, 2.0)
 
-            fc_1 = fluid.layers.fc(input=z, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2)
+            fc_1 = paddle.static.nn.fc(x=z, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2)
 
             cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
index 983b0db9a640d9d6b5f4301d17e95d53b1c5b26f..1370e7ce7411457632ef3e6866f1c37dffa895c7 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
@@ -104,8 +104,8 @@ class TestSqrtNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.sqrt(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
index 26aa5a73d006330f630efaa9c1a8546d6bd6b73c..693e4540de69546322cd9a3d6009fcbdb7881c0d 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
@@ -101,8 +101,8 @@ class TestSquareNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.square(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
index 17497f23d7b1f2932a6c1a298a9ff33a87194cf0..d52518fc1021b7cced1d21e941e349b2f40a6d66 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
@@ -104,8 +104,8 @@ class TestTanhNet(unittest.TestCase):
             c = paddle.multiply(a, b)
             d = paddle.tanh(c)
 
-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=d, size=128)
+            prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
             cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py
index 0c04d611210ec5c636b7127c3dffd59105bcbc23..beb24de94677e755a91ee1b0dbbb4989833be73f 100644
--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -53,11 +53,11 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
     reshape = paddle.reshape(x=conv, shape=[-1, shape[1], shape[2] * shape[3]])
     pool = paddle.mean(x=reshape, axis=2)
 
-    squeeze = fluid.layers.fc(
-        input=pool, size=num_channels // reduction_ratio, act='relu'
+    squeeze = paddle.static.nn.fc(
+        x=pool, size=num_channels // reduction_ratio, activation='relu'
     )
-    excitation = fluid.layers.fc(
-        input=squeeze, size=num_channels, act='sigmoid'
+    excitation = paddle.static.nn.fc(
+        x=squeeze, size=num_channels, activation='sigmoid'
     )
     scale = paddle.tensor.math._multiply_with_axis(
         x=input, y=excitation, axis=0
@@ -169,7 +169,7 @@ def SE_ResNeXt50Small(use_feed):
         pool if remove_dropout else paddle.nn.functional.dropout(x=pool, p=0.2)
     )
     # Classifier layer:
-    prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax')
+    prediction = paddle.static.nn.fc(x=dropout, size=1000, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py
index 747cde082743a93ff8ec09e96484e35fefb87c7a..2b41107061d07c3fb5fd1f0b5d29495f138973b6 100644
--- a/python/paddle/fluid/tests/unittests/simple_nets.py
+++ b/python/paddle/fluid/tests/unittests/simple_nets.py
@@ -21,15 +21,17 @@ import paddle.fluid as fluid
 def simple_fc_net_with_inputs(img, label, class_num=10):
     hidden = img
     for _ in range(2):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=100,
-            act='relu',
+            activation='relu',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
         )
-    prediction = fluid.layers.fc(hidden, size=class_num, act='softmax')
+    prediction = paddle.static.nn.fc(
+        hidden, size=class_num, activation='softmax'
+    )
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -46,10 +48,10 @@ def simple_fc_net(use_feed=None):
 def batchnorm_fc_with_inputs(img, label, class_num=10):
     hidden = img
     for _ in range(2):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
-            act='relu',
+            activation='relu',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
@@ -57,7 +59,9 @@ def batchnorm_fc_with_inputs(img, label, class_num=10):
 
         hidden = paddle.static.nn.batch_norm(input=hidden)
 
-    prediction = fluid.layers.fc(hidden, size=class_num, act='softmax')
+    prediction = paddle.static.nn.fc(
+        hidden, size=class_num, activation='softmax'
+    )
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -94,9 +98,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
index eb0fec336a33ce5e5f8957c4587ded3860d6ca86..95a485ce3a4e7d4acd0146eacfa33e10116de262 100644
--- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
@@ -145,7 +145,7 @@ class TestAdadeltaV2(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index 1efea016fa771d64f05745aef5682b98793e61e9..d7052c94720a4f0f17da958679f1bb5a65479524 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -788,9 +788,14 @@ class TestAdamOptimizer(unittest.TestCase):
                 sum = paddle.add(a, b)
                 z = paddle.pow(sum, 2.0)
 
-                fc_1 = fluid.layers.fc(input=z, size=2, param_attr=weight_attr1)
-                prediction = fluid.layers.fc(
-                    input=fc_1, size=2, param_attr=weight_attr2, act='softmax'
+                fc_1 = paddle.static.nn.fc(
+                    x=z, size=2, weight_attr=weight_attr1
+                )
+                prediction = paddle.static.nn.fc(
+                    x=fc_1,
+                    size=2,
+                    weight_attr=weight_attr2,
+                    activation='softmax',
                 )
 
                 cost = paddle.nn.functional.cross_entropy(
@@ -930,9 +935,7 @@ class TestAdamOptimizer(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.data(name='x', shape=[None, 13], dtype='float32')
             y = fluid.data(name='y', shape=[None, 1], dtype='float32')
-            y_predict = fluid.layers.fc(
-                input=x, size=1, act=None, param_attr=weight_attr
-            )
+            y_predict = paddle.static.nn.fc(x, size=1, weight_attr=weight_attr)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
@@ -955,8 +958,8 @@ class TestAdamOptimizer(unittest.TestCase):
         sum = paddle.add(a, b)
         z = paddle.pow(sum, 2.0)
 
-        fc_1 = fluid.layers.fc(input=z, size=128)
-        prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=z, size=128)
+        prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
 
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
index 79c653cdfb26dcdf8740ddbf137abc7bfbe9c371..a3d6c0cbfd992bb89275203f9c07c970e34172d2 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
@@ -32,7 +32,7 @@ def main_test_func(place, dtype):
         with fluid.scope_guard(fluid.Scope()):
             x = fluid.data(name='x', shape=[None, 13], dtype=dtype)
             y = fluid.data(name='y', shape=[None, 1], dtype=dtype)
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
index be31ec40b79fdc9c9e98314eb2709284a36ba588..fb6dc442a73c92f12051e20613aa44eeb77e22a3 100644
--- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
@@ -56,7 +56,9 @@ def convolutional_neural_network(use_py_reader):
             act="relu",
         )
 
-        prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+        prediction = paddle.static.nn.fc(
+            x=conv_pool_2, size=10, activation='softmax'
+        )
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py
index e126a67bf5fd573a8c6b1c0de54d0b812099b1db..750732eb5d90794401938759b8738331fb7ebb88 100644
--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -247,19 +247,19 @@ class SimpleNet(BackwardNet):
         x_merge = paddle.add(x_emb, x2_emb, name='x_add_x2')
         x2_merge = paddle.add(x2_emb, x3_emb, name='x2_add_x3')
         # shared fc_w
-        predict = fluid.layers.fc(
-            input=x_merge,
+        predict = paddle.static.nn.fc(
+            x=x_merge,
             size=1,
-            act='softmax',
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            activation='softmax',
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             name='fc_predict',
         )
         # useless layer for calculating loss
-        fc_no_use = fluid.layers.fc(
-            input=x2_merge,
+        fc_no_use = paddle.static.nn.fc(
+            x=x2_merge,
             size=1,
-            act='sigmoid',
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            activation='sigmoid',
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             name='fc_no_use',
         )
         # loss
@@ -332,7 +332,7 @@ class TestAppendBackwardWithError(unittest.TestCase):
         x = fluid.data(name='x', shape=[None, 13], dtype='int64')
         y = fluid.data(name='y', shape=[None, 1], dtype='float32')
         x_emb = fluid.embedding(x, size=[100, 256])
-        y_predict = fluid.layers.fc(input=x_emb, size=1, name='my_fc')
+        y_predict = paddle.static.nn.fc(x=x_emb, size=1, name='my_fc')
         loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_loss = paddle.mean(loss)
         param_names = [
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
index 073a9018c19009b5ccea95a4ae77bcb53e865e73..9019e9e9e3f5ecb110c9cd5200ae8e6906306990 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
@@ -46,7 +46,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase):
 
         pool = fluid.layers.sequence_pool(input=emb, pool_type="sum")
         z = fluid.layers.concat(input=[x, pool], axis=1)
-        y_predict = fluid.layers.fc(input=z, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x=z, size=1)
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py
index 729a7e3e10a56cc89b0092018349e20f8733c4fa..28c3feb010e817924d945760c975bb241e88c69d 100644
--- a/python/paddle/fluid/tests/unittests/test_compiled_program.py
+++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py
@@ -106,7 +106,7 @@ class TestCompiledProgramError(unittest.TestCase):
             name='image', shape=[1, 28, 28], dtype='float32'
         )
         label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        prediction = fluid.layers.fc(input=img, size=10, act='softmax')
+        prediction = paddle.static.nn.fc(x=img, size=10, activation='softmax')
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_conditional_block.py b/python/paddle/fluid/tests/unittests/test_conditional_block.py
index 1eaf25dc3487746939ee8520d3348640d543b9fc..0aee7cadd7e52354b0c43fd3654a029b24ce6eb2 100644
--- a/python/paddle/fluid/tests/unittests/test_conditional_block.py
+++ b/python/paddle/fluid/tests/unittests/test_conditional_block.py
@@ -35,7 +35,7 @@ class ConditionalBlockTest(unittest.TestCase):
             cond = ConditionalBlock(inputs=[data])
             out = paddle.tensor.create_tensor(dtype='float32')
             with cond.block():
-                hidden = layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 layers.assign(hidden, out)
 
             cpu = core.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
index 67a5fcb464cfe995db3ed413a1085d675a7b03c2..32f77ab290b88c87012eeea957f0433ac25012b1 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
@@ -117,17 +117,17 @@ class TestGeneratorSeed(unittest.TestCase):
             # example 1:
             # attr shape is a list which doesn't contain tensor Variable.
             x = paddle.uniform(shape=[2, 10])
-            result_1 = fluid.layers.fc(
-                input=x,
+            result_1 = paddle.static.nn.fc(
+                x,
                 size=10,
-                param_attr=fluid.initializer.TruncatedNormal(
+                weight_attr=fluid.initializer.TruncatedNormal(
                     loc=0.0, scale=2.0
                 ),
             )
-            result_2 = fluid.layers.fc(
-                input=x,
+            result_2 = paddle.static.nn.fc(
+                x,
                 size=10,
-                param_attr=fluid.initializer.TruncatedNormal(
+                weight_attr=fluid.initializer.TruncatedNormal(
                     loc=0.0, scale=2.0
                 ),
             )
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
index 16939f26a2fb7416637c22d28ebcbfe20229c7bd..f84eb48147254eb332c533592756d66399b3d16b 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
@@ -33,7 +33,7 @@ class TestDataLoaderEarlyReset(unittest.TestCase):
         self.iterable = True
 
     def build_network(self):
-        y = fluid.layers.fc(self.x, size=10)
+        y = paddle.static.nn.fc(self.x, size=10)
         loss = paddle.mean(y)
 
         optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
index 82aa47d8a6998f131b346f2a50fc491fde86d322..285265de709ce753c4e911dfc336f9671fae767a 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
@@ -48,7 +48,7 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase):
             capacity=16, feed_list=[input_data], iterable=self.iterable
         )
 
-        fc = fluid.layers.fc(input_data, size=10)
+        fc = paddle.static.nn.fc(input_data, size=10)
         loss = paddle.mean(fc)
 
         loader.set_batch_generator(
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
index 8373482772deeb703893ce7b67f38edf3cbcf990..b45affcc6bf080ca06421581bb51f0f4cd450e3e 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
@@ -54,7 +54,7 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase):
             capacity=16, feed_list=[input_data], iterable=self.iterable
         )
 
-        fc = fluid.layers.fc(input_data, size=10)
+        fc = paddle.static.nn.fc(input_data, size=10)
         loss = paddle.mean(fc)
 
         loader.set_batch_generator(
diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py
index f3c1300aac412f0300a1af6551ee5652e8405003..fb8c9ff6e556694f4a325710d55324016d072476 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset.py
@@ -954,7 +954,7 @@ class TestDatasetWithFetchHandler(unittest.TestCase):
             poolings.append(pool)
 
         concated = fluid.layers.concat(poolings, axis=1)
-        fc = fluid.layers.fc(input=concated, act='tanh', size=32)
+        fc = paddle.static.nn.fc(x=concated, activation='tanh', size=32)
         return slots_vars, fc
 
     def get_dataset(self, inputs, files):
diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
index f3913b7f3a2a0a38164b8965d1f9a22d0e7661ac..6f417973ee228d654bbd71628e356a79c38f9f3f 100644
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
@@ -54,17 +54,17 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
             )
             hidden = image
             for hidden_size in [10, 20, 30]:
-                hidden = fluid.layers.fc(
+                hidden = paddle.static.nn.fc(
                     hidden,
                     size=hidden_size,
-                    act='tanh',
+                    activation='tanh',
                     bias_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(value=1.0)
                     ),
                 )
 
-            predict_label = fluid.layers.fc(
-                hidden, size=CLASS_NUM, act='softmax'
+            predict_label = paddle.static.nn.fc(
+                hidden, size=CLASS_NUM, activation='softmax'
             )
             loss = paddle.mean(
                 paddle.nn.functional.cross_entropy(
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index f99ea4250fc608f6977ff7fd107dd111ed3326a8..ed1f9a9aaf9d95ac60ed3e89ce7123e9b7be0da1 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -53,11 +53,11 @@ def cnn_model(data):
     ]
     scale = (2.0 / (param_shape[0] ** 2 * SIZE)) ** 0.5
 
-    predict = fluid.layers.fc(
-        input=conv_pool_2,
+    predict = paddle.static.nn.fc(
+        x=conv_pool_2,
         size=SIZE,
-        act="softmax",
-        param_attr=fluid.param_attr.ParamAttr(
+        activation="softmax",
+        weight_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.NormalInitializer(
                 loc=0.0, scale=scale
             )
@@ -187,11 +187,13 @@ class TestCloneWithStopGradient(unittest.TestCase):
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
             img = fluid.layers.data(name='image', shape=[784])
-            hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
+            hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
             hidden2 = paddle.nn.functional.dropout(hidden1, p=0.5)
             loss = paddle.nn.functional.cross_entropy(
-                input=fluid.layers.fc(hidden2, size=10, act='softmax'),
+                input=paddle.static.nn.fc(
+                    hidden2, size=10, activation='softmax'
+                ),
                 label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
                 reduction='none',
                 use_softmax=False,
@@ -214,7 +216,7 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
         with fluid.program_guard(train_program, startup_program):
             img = fluid.layers.data(name='image', shape=[784])
             true = paddle.ones(shape=[1], dtype="float32")
-            hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
+            hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
 
             cond = paddle.equal(true, true)
@@ -231,7 +233,9 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
             hidden2 = paddle.static.nn.cond(cond, true_fn, false_fn)
 
             loss = paddle.nn.functional.cross_entropy(
-                input=fluid.layers.fc(hidden2, size=10, act='softmax'),
+                input=paddle.static.nn.fc(
+                    hidden2, size=10, activation='softmax'
+                ),
                 label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
                 reduction='none',
                 use_softmax=False,
@@ -257,7 +261,7 @@ class TestCloneWithRaise(unittest.TestCase):
         with fluid.program_guard(train_program, startup_program):
             img = fluid.layers.data(name='image', shape=[784])
             true = paddle.ones(shape=[1], dtype="float32")
-            hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
+            hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
 
             cond = paddle.equal(true, true)
@@ -273,7 +277,9 @@ class TestCloneWithRaise(unittest.TestCase):
 
             hidden2 = paddle.static.nn.cond(cond, true_fn, false_fn)
             loss = paddle.nn.functional.cross_entropy(
-                input=fluid.layers.fc(hidden2, size=10, act='softmax'),
+                input=paddle.static.nn.fc(
+                    hidden2, size=10, activation='softmax'
+                ),
                 label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
                 reduction='none',
                 use_softmax=False,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
index 967028f02d20327a5007c1117516161f71acf807..080319304186f752cfb9cd98740d96caee41afac 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
@@ -49,9 +49,9 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
index 6ffea5df324b7b870bb15740cd08180c4c5f249d..725c2559db051bb9f79e51cb9123f27be060aa8e 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
@@ -65,9 +65,9 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=x_embedding, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=x_embedding, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
index 3bb5b669c8d1541f813538e80f2e40299efac714..c25e60793fe03639dbdcd0cdd1e124071f52b811 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
@@ -53,9 +53,9 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
             input=input_x, size=[100, 10], is_sparse=True
         )
 
-        fc_1 = paddle.fluid.layers.fc(input=emb, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=emb, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
index 6556e19079ce58a87d8d26bb53de8255a031cd72..3832fd5de23f72d4e83ed65ae0296db9b6a05841 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
@@ -48,9 +48,9 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
@@ -81,9 +81,9 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
index dc77545cc196b664b0cd812e6d7e4a10587a7e37..376a0d087e6d72b61309d0fdf3fb797707aed978 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
@@ -98,11 +98,11 @@ class TestDistFleetHeterProgram(unittest.TestCase):
         concated = fluid.layers.concat(sparse_embed_seq + inputs[0:1], axis=1)
 
         with fluid.device_guard("gpu"):
-            fc1 = fluid.layers.fc(
-                input=concated,
+            fc1 = paddle.static.nn.fc(
+                x=concated,
                 size=400,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Normal(
                         scale=1 / math.sqrt(concated.shape[1])
                     )
@@ -111,11 +111,11 @@ class TestDistFleetHeterProgram(unittest.TestCase):
             )
 
         with fluid.device_guard("cpu"):
-            fc2 = fluid.layers.fc(
-                input=fc1,
+            fc2 = paddle.static.nn.fc(
+                x=fc1,
                 size=400,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Normal(
                         scale=1 / math.sqrt(fc1.shape[1])
                     )
@@ -124,11 +124,11 @@ class TestDistFleetHeterProgram(unittest.TestCase):
             )
 
         with fluid.device_guard("gpu"):
-            fc3 = fluid.layers.fc(
-                input=fc2,
+            fc3 = paddle.static.nn.fc(
+                x=fc2,
                 size=400,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Normal(
                         scale=1 / math.sqrt(fc2.shape[1])
                     )
@@ -137,11 +137,11 @@ class TestDistFleetHeterProgram(unittest.TestCase):
             )
 
         with fluid.device_guard("cpu"):
-            predict = fluid.layers.fc(
-                input=fc3,
+            predict = paddle.static.nn.fc(
+                x=fc3,
                 size=2,
-                act="softmax",
-                param_attr=fluid.ParamAttr(
+                activation="softmax",
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Normal(
                         scale=1 / math.sqrt(fc3.shape[1])
                     )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
index 9fe6e27c27fc4ab8ecc091e86bb170e62ab5aa39..472f8a6ced6fdc70df933ec086276d1a02aa3dec 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
@@ -85,10 +85,10 @@ class TestPSMinimize(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -113,10 +113,10 @@ class TestPSMinimize(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -140,10 +140,10 @@ class TestPSMinimize(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index c641155d9f208513f96f34890350185134a8e4a0..c879875f6f771524573b208625a21595b49b3dc7 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -89,10 +89,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -121,10 +121,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -152,10 +152,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index 0261df66709e24700029564e4d05cac7c8d0b723..668b64d19390cf153b8d02618817cbbf7bb9a9df 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -85,10 +85,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -113,10 +113,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -140,10 +140,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index 330d62cfa039a9be80c888e282c8b8cf73906abd..5aa14fba6a5dab4266d669f1b45a8348d63e7ec3 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -88,10 +88,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -116,10 +116,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -143,10 +143,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index 2bec49877c54df0ae46e4158dcec275d3905ca72..8ecb4e2a2ad8f61f6a988e3cd516bfdd79b8f4c1 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -91,10 +91,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -121,10 +121,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -150,10 +150,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index d75e16f7776d67f6aa5bbd78b06502e3e7b20429..d8dfcda35e235dddfd64855af3029ab0eb39a299 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -91,10 +91,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_ss = paddle.nn.functional.softsign(q_sum)
         q_ss = paddle.static.nn.data_norm(input=q_ss)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -121,10 +121,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -150,10 +150,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index 3b735d193b19b5b7811ac250e3f968a6d55cf70d..c4517cc28471f2f6fd89bfea78a9fd8ad0e79751 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -89,10 +89,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -121,10 +121,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -152,10 +152,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index d1fbfb8937943c405f4f58282a53320d96856e8b..93c2d48f89777a83ff7453396fe6cbfb9b538809 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -87,10 +87,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -117,10 +117,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -146,10 +146,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index e2e81a747abfc22afa52a9bcc412642fee56cba1..ca69a778aac0395add998e54a55081eb32d04354 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -89,10 +89,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -121,10 +121,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -152,10 +152,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index 8e8eacece9f2b9f4bcd7d5fbe7d6eb7a87426e1e..ab6bb7198c655c7eac2e71db1d13d13c7ff59fed 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -87,10 +87,10 @@ class TestPSPassWithBow(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -117,10 +117,10 @@ class TestPSPassWithBow(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -146,10 +146,10 @@ class TestPSPassWithBow(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
index 87d1b12a3a5b5bcb82ad099a28f7ed36fd5c1184..57f4615f7c9dbaecbe826f53ff23fca30c5f684d 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
@@ -230,11 +230,11 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase):
             )
             dnn_out = dnn_pool
             for i, dim in enumerate(dnn_layer_dims[1:]):
-                fc = fluid.layers.fc(
-                    input=dnn_out,
+                fc = paddle.static.nn.fc(
+                    x=dnn_out,
                     size=dim,
-                    act="relu",
-                    param_attr=fluid.ParamAttr(
+                    activation="relu",
+                    weight_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(value=0.01)
                     ),
                     name='dnn-fc-%d' % i,
@@ -256,7 +256,9 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase):
                 input=lr_embbding, pool_type="sum"
             )
             merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
-            predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
+            predict = paddle.static.nn.fc(
+                x=merge_layer, size=2, activation='softmax'
+            )
             return datas, predict
 
         reader = paddle.batch(fake_ctr_reader(), batch_size=4)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
index 446c70ae87d11eacecd7c75c09c8d5ce808c2408..72068108d2206478d19f06516db4bcec1714a7e3 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
@@ -83,10 +83,10 @@ class TestSPMT(unittest.TestCase):
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
         # fc layer after conv
-        q_fc = fluid.layers.fc(
-            input=q_ss,
+        q_fc = paddle.static.nn.fc(
+            x=q_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__q_fc__",
                 learning_rate=base_lr,
@@ -111,10 +111,10 @@ class TestSPMT(unittest.TestCase):
         pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
         pt_ss = paddle.nn.functional.softsign(pt_sum)
         # fc layer
-        pt_fc = fluid.layers.fc(
-            input=pt_ss,
+        pt_fc = paddle.static.nn.fc(
+            x=pt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
@@ -138,10 +138,10 @@ class TestSPMT(unittest.TestCase):
         nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
         nt_ss = paddle.nn.functional.softsign(nt_sum)
         # fc layer
-        nt_fc = fluid.layers.fc(
-            input=nt_ss,
+        nt_fc = paddle.static.nn.fc(
+            x=nt_ss,
             size=hid_dim,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=0.01),
                 name="__fc__",
                 learning_rate=base_lr,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
index 00774eff964fa4fe712811f77628adae41b344b3..0749139be840f3dc9151bfa3cdb7775c3589d72b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
@@ -56,7 +56,7 @@ class FleetCollectiveTest(unittest.TestCase):
             return
 
         data = fluid.layers.data(name='X', shape=[1], dtype='float32')
-        hidden = fluid.layers.fc(input=data, size=10)
+        hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
 
         optimizer = fluid.optimizer.AdamOptimizer()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
index 866722b7d0007200bdf90be9cb3f9c49e9533ab0..368be77fdbbfb923789ac1a4c87b26b6b3d4f7ad 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
@@ -44,11 +44,11 @@ class SparseLoadOp(unittest.TestCase):
                 ),
             )
 
-            fc1 = fluid.layers.fc(
-                input=emb,
+            fc1 = paddle.static.nn.fc(
+                x=emb,
                 size=10,
-                act="relu",
-                param_attr=fluid.ParamAttr(
+                activation="relu",
+                weight_attr=fluid.ParamAttr(
                     name='fc',
                     initializer=fluid.initializer.NumpyArrayInitializer(
                         fc_array
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
index ee9b995031dbc07bc7c2de414710e8558544f37b..f2f526484ce6a7632327855f13f3b7eaab9c36ab 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
@@ -50,8 +50,10 @@ class TestSparseLoadProgram(unittest.TestCase):
                     emb = fluid.layers.embedding(
                         inputs, is_sparse=True, size=[10000, 128]
                     )
-                    fc1 = fluid.layers.fc(input=emb, size=128, act="relu")
-                    fc2 = fluid.layers.fc(input=fc1, size=64, act="relu")
+                    fc1 = paddle.static.nn.fc(
+                        x=emb, size=128, activation="relu"
+                    )
+                    fc2 = paddle.static.nn.fc(x=fc1, size=64, activation="relu")
                     loss = paddle.mean(fc2)
             return scope, train_program, startup_program, loss
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index c6bdd59d496634744da2673d7f2ca8b103346376..71fb3f96d4cef8e03f19606322b109fe12813c9f 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -39,11 +39,10 @@ class TranspilerTest(unittest.TestCase):
 
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -294,11 +293,10 @@ class TestNoSliceVar(TranspilerTest):
 class TestLRDecay(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -461,11 +459,10 @@ class TestFakeInit(TranspilerTest):
 class TestDecayedAdagrad(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -482,11 +479,10 @@ class TestDecayedAdagrad(TranspilerTest):
 class TestFtrl(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -503,11 +499,10 @@ class TestFtrl(TranspilerTest):
 class TestLRDecayConditional(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -567,11 +562,10 @@ class TestLRDecayConditional(TranspilerTest):
 class TestL2Decay(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 name='fc_w', regularizer=fluid.regularizer.L2Decay()
             ),
             bias_attr=fluid.ParamAttr(name='fc_b'),
@@ -606,11 +600,10 @@ class TestL2Decay(TranspilerTest):
 class TestL2DecayWithPiecewise(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -682,11 +675,10 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
         # only one parameter
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=False,
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -738,11 +730,10 @@ class TestDistLookupTableBase(TranspilerTest):
         fc0 = fluid.layers.concat(
             input=[title_emb, brand_emb, profile_emb], axis=1
         )
-        predict = fluid.layers.fc(
-            input=fc0,
+        predict = paddle.static.nn.fc(
+            x=fc0,
             size=2,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
 
@@ -1126,11 +1117,10 @@ class TestDistArgsInProgram(TestDistLookupTableBase):
 class TestRMSPropOptimizer(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
@@ -1159,11 +1149,10 @@ class TestRMSPropOptimizer(TranspilerTest):
 class TestLoadSliceVar(TranspilerTest):
     def net_conf(self):
         x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
-        y_predict = fluid.layers.fc(
-            input=x,
+        y_predict = paddle.static.nn.fc(
+            x,
             size=1000,
-            act=None,
-            param_attr=fluid.ParamAttr(name='fc_w'),
+            weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
index ce93813dd438d2566a3ffbcf0de85e0572ab6d2a..29e022c4ff3c6f9afc494b9c145f08b246b443aa 100644
--- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py
+++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
@@ -56,7 +56,7 @@ class TestListenAndServOp(unittest.TestCase):
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
-            y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x=x_emb, size=1)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -120,7 +120,7 @@ class TestListenAndServOp(unittest.TestCase):
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
-            y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x=x_emb, size=1)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -182,7 +182,7 @@ class TestListenAndServOp(unittest.TestCase):
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
-            y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x=x_emb, size=1)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
index 9895f5b2488bf276df08e42de0867676140e919f..43bd95270839e2a5e064229d0fba197a76c4bec5 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
@@ -36,15 +36,15 @@ def simple_fc_net():
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
     hidden = image
     for _ in range(4):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
-            act='tanh',
+            activation='tanh',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
         )
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
index b7952c3736b26e9f333132d065336c7d201e1489..10f5def7248483bc019209d64f3a804bc9712d83 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
@@ -309,19 +309,19 @@ class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1):
             h_pre = rnn.memory(init=h_boot)
             x_t = rnn.step_input(x)
 
-            temp_l = layers.fc(
-                input=x_t,
+            temp_l = paddle.static.nn.fc(
+                x=x_t,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name='W',
                     initializer=fluid.initializer.ConstantInitializer(1.0),
                 ),
                 bias_attr=False,
             )
-            temp_r = layers.fc(
-                input=h_pre,
+            temp_r = paddle.static.nn.fc(
+                x=h_pre,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name='U',
                     initializer=fluid.initializer.ConstantInitializer(0.0),
                 ),
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
index 7f29a99dc83ea73725f63f808b0576ee3d44aec3..f2b5f667fde9f40586dbb84fab79f413e365dcc3 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
@@ -130,7 +130,7 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
         sum_result.persistable = True
         tmp = paddle.unsqueeze(sum_result, axis=[0])
         tmp = paddle.expand(tmp, [10, -1])
-        fc = layers.fc(tmp, size=256)
+        fc = paddle.static.nn.fc(tmp, size=256)
         loss = paddle.mean(sum_result)
 
         optim = fluid.optimizer.Adam(learning_rate=1e-3)
diff --git a/python/paddle/fluid/tests/unittests/test_ema.py b/python/paddle/fluid/tests/unittests/test_ema.py
index 811e49cc9027976126936763c8a82bf7e571813d..117acb132499ba5a00c523cfa2e86477cc2f0368 100644
--- a/python/paddle/fluid/tests/unittests/test_ema.py
+++ b/python/paddle/fluid/tests/unittests/test_ema.py
@@ -33,8 +33,8 @@ class TestExponentialMovingAverage(unittest.TestCase):
         with fluid.program_guard(self._train_program, self._startup_prog):
             with fluid.unique_name.guard():
                 data = fluid.data(name='x', shape=[-1, 5], dtype='float32')
-                hidden = fluid.layers.fc(
-                    input=data, size=10, param_attr=self._param_name
+                hidden = paddle.static.nn.fc(
+                    x=data, size=10, weight_attr=self._param_name
                 )
                 cost = paddle.mean(hidden)
 
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr.py b/python/paddle/fluid/tests/unittests/test_entry_attr.py
index 1ae98ab7cd6f251f345eddb1f8ac7c1f156e0f17..07d2ab7fa8f0ebfe2f6e262df340ca8d76d2d2da 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr.py
@@ -83,7 +83,9 @@ class EntryAttrChecks(unittest.TestCase):
                     param_attr=fluid.ParamAttr(name="deep_embedding"),
                 )
                 pool = fluid.layers.sequence_pool(input=emb, pool_type="sum")
-                predict = fluid.layers.fc(input=pool, size=2, act='softmax')
+                predict = paddle.static.nn.fc(
+                    x=pool, size=2, activation='softmax'
+                )
 
         block = prog.global_block()
         for op in block.ops:
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr2.py b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
index d1f546249b0f327cd9380345f81f9c04392cad83..5db31f906fa8e306803b0f921f43a188ea291536 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr2.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
@@ -43,7 +43,9 @@ class EntryAttrChecks(unittest.TestCase):
                     param_attr=fluid.ParamAttr(name="deep_embedding"),
                 )
                 pool = fluid.layers.sequence_pool(input=emb, pool_type="sum")
-                predict = fluid.layers.fc(input=pool, size=2, act='softmax')
+                predict = paddle.static.nn.fc(
+                    x=pool, size=2, activation='softmax'
+                )
 
         block = prog.global_block()
         for op in block.ops:
diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py
index 45a11656ccf8e537d58f1f4b4c2894910f20a610..aca120b48fc7ebe712f184e77660a0a81a20d9c8 100644
--- a/python/paddle/fluid/tests/unittests/test_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_exception.py
@@ -42,7 +42,7 @@ class TestExceptionNoCStack(unittest.TestCase):
     def test_exception_in_static_mode(self):
         x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32')
         y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32')
-        predict = fluid.layers.fc(input=x, size=1, act=None)
+        predict = paddle.static.nn.fc(x, size=1)
         loss = paddle.nn.functional.square_error_cost(input=predict, label=y)
         avg_loss = paddle.mean(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
index 9696ebcc4412eb1e893278832b8c6d9c6dacf136..11ea8260efe2e2ca096dd0bbab0a1a2608e0747d 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
@@ -23,7 +23,7 @@ class TestExecutor(unittest.TestCase):
         lr = fluid.data(name="lr", shape=[1], dtype='float32')
         x = fluid.data(name="x", shape=[None, 1], dtype='float32')
         y = fluid.data(name="y", shape=[None, 1], dtype='float32')
-        y_predict = fluid.layers.fc(input=x, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x, size=1)
 
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
index 3d8371177705da4ad42b1fee4d014adce026658b..3f6ce3636b67b592d780d5050ea93b79dba1ef8c 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
@@ -25,7 +25,7 @@ class TestExecutor(unittest.TestCase):
         lr = fluid.data(name="lr", shape=[1], dtype='float32')
         x = fluid.data(name="x", shape=[None, 1], dtype='float32')
         y = fluid.data(name="y", shape=[None, 1], dtype='float32')
-        y_predict = fluid.layers.fc(input=x, size=1, act=None)
+        y_predict = paddle.static.nn.fc(x, size=1)
 
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_fc_op.py b/python/paddle/fluid/tests/unittests/test_fc_op.py
index bc339a516ffacdaa15e89a473ff7227745bb09d8..b07fc1d0001a5ab2e1b63cbce0a9f6f2c0660ad0 100644
--- a/python/paddle/fluid/tests/unittests/test_fc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fc_op.py
@@ -179,26 +179,26 @@ class TestFCOpError(unittest.TestCase):
 
             def test_Variable():
                 # the input type must be Variable
-                fluid.layers.fc(input=input_data, size=1)
+                paddle.static.nn.fc(x=input_data, size=1)
 
             self.assertRaises(TypeError, test_Variable)
 
             def test_input_list():
                 # each of input(list) must be Variable
-                fluid.layers.fc(input=[input_data], size=1)
+                paddle.static.nn.fc(x=[input_data], size=1)
 
             self.assertRaises(TypeError, test_input_list)
 
             def test_type():
                 # dtype must be float32 or float64
                 x2 = fluid.layers.data(name='x2', shape=[4], dtype='int32')
-                fluid.layers.fc(input=x2, size=1)
+                paddle.static.nn.fc(x=x2, size=1)
 
             self.assertRaises(TypeError, test_type)
 
             # The input dtype of fc can be float16 in GPU, test for warning
             x3 = fluid.layers.data(name='x3', shape=[4], dtype='float16')
-            fluid.layers.fc(input=x3, size=1)
+            paddle.static.nn.fc(x=x3, size=1)
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
index 0e08b14e32f9583fb53962158a6bb885ab293143..b642e2524ee292f87ac24f6b9396b91b0e4f04c1 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -63,9 +63,11 @@ class TestFeedData(unittest.TestCase):
 
         hidden = in_data
         for hidden_size in hidden_sizes:
-            hidden = fluid.layers.fc(hidden, size=hidden_size)
+            hidden = paddle.static.nn.fc(hidden, size=hidden_size)
 
-        predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax')
+        predict_label = paddle.static.nn.fc(
+            hidden, size=class_num, activation='softmax'
+        )
         loss = paddle.mean(
             paddle.nn.functional.cross_entropy(
                 input=predict_label,
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
index c1b8046c6976ba3ecbd5e14d88d1e3119bcd6b4c..d93ee36b6e2409d793e5a35a0d281150628146ee 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
@@ -44,8 +44,10 @@ class TestFetchUnmerged(unittest.TestCase):
             pool_type='avg',
             act="relu",
         )
-        hidden = fluid.layers.fc(input=conv_pool_2, size=32, act='relu')
-        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+        hidden = paddle.static.nn.fc(x=conv_pool_2, size=32, activation='relu')
+        prediction = paddle.static.nn.fc(
+            x=hidden, size=10, activation='softmax'
+        )
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py
index 6092710a798c0ad3a08181e64a5049f510cda57d..bc5a083d17d4c1e92647ed4f051fd36b70c0e21e 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet.py
@@ -70,7 +70,7 @@ class TestFleet1(unittest.TestCase):
             bow = paddle.static.nn.data_norm(
                 input=bow, epsilon=1e-4, name="norm"
             )
-            fc = fluid.layers.fc(input=bow, size=1, act=None)
+            fc = paddle.static.nn.fc(x=bow, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
index b3817573352f15d83a5eb0d20970c0facef7adaf..12acfdf76321d3ab95b29c9845ae9d0f1052d2c6 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
@@ -62,7 +62,7 @@ class FleetTest(unittest.TestCase):
         self.assertRaises(Exception, fleet.init, "pserver")
 
         data = fluid.layers.data(name='X', shape=[1], dtype='float32')
-        hidden = fluid.layers.fc(input=data, size=10)
+        hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
         adam = fluid.optimizer.Adam()
         adam.minimize(loss)
@@ -177,7 +177,7 @@ class TranspilerOptimizerTest(unittest.TestCase):
         transpiler = TranspilerOptimizer(fluid.optimizer.Adam(0.001))
         self.assertRaises(Exception, transpiler.minimize, loss=[])
         data = fluid.layers.data(name='X', shape=[1], dtype='float32')
-        hidden = fluid.layers.fc(input=data, size=10)
+        hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
         self.assertRaises(
             Exception, transpiler.minimize, loss=loss.name, startup_program=[]
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
index 28f05a6e03f6fb6a1a0a717dde50a95616be40de..1e30f703ff893b64a15af32316f6a4caaacc9b29 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
@@ -37,9 +37,9 @@ class TestDistributedStrategyAuto(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py
index 1f5ea942965fab182e9a626ca97ec2180ca080e6..fe24c8838ec6c2ac41dde0d8f7ac9911509942b0 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py
@@ -201,8 +201,10 @@ class TestFleetBaseSingleError(unittest.TestCase):
             )
             input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-            fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+            prediction = paddle.static.nn.fc(
+                x=fc_1, size=2, activation='softmax'
+            )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
                 label=input_y,
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
index fdffc388eacea6fd18b837b142a36f435860238a..4a56f8913a434a76080a0f61b633293362a687b4 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
@@ -51,9 +51,9 @@ class TestFleetBase(unittest.TestCase):
             input=input_slot, size=[10, 9], is_sparse=True
         )
         input_x = paddle.concat(x=[input_x, emb], axis=1)
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
index 33d970f109d78a7dfccab9978aeffef3627117a6..30cdf5fbed4b7267c2d404e8d7010afff40a2f60 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
@@ -37,9 +37,9 @@ class TestFleetBase_1(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
@@ -68,9 +68,9 @@ class TestFleetBase(unittest.TestCase):
         )
         input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
 
-        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
-        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
-        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=[fc_2], size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
index 4040e10594e9a15ccc5f452d83e63f92b9a12fdd..6864c541f7ebaaa5f94cf17ae7ef948ecf620be5 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
@@ -83,8 +83,8 @@ class TestFleetBaseSingleRunCollective(unittest.TestCase):
         input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
         input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-        fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-        prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
@@ -124,8 +124,8 @@ class TestFleetBaseSingleRunPS(unittest.TestCase):
         input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
         input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
-        fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
-        prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+        fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
+        prediction = paddle.static.nn.fc(x=fc_1, size=2, activation='softmax')
         cost = paddle.nn.functional.cross_entropy(
             input=prediction, label=input_y, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
index f5975ae990d7029f6bba659aec7132552b61206b..c651a456fd5112e802dfee765542a4d04737d844 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
@@ -66,7 +66,7 @@ class TestFleet1(unittest.TestCase):
                 is_distributed=True,
                 param_attr=fluid.ParamAttr(name="embedding"),
             )
-            fc = fluid.layers.fc(input=emb, size=1, act=None)
+            fc = paddle.static.nn.fc(x=emb, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
index daee01f38f742c8c966a113c1e6970c35ca18ab2..4c3c321ac0ad3c103ef0ecb828cd4eb9483f84f5 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
@@ -89,7 +89,7 @@ class TestCloudRoleMaker(unittest.TestCase):
                 lod_level=1,
                 append_batch_size=False,
             )
-            fc = fluid.layers.fc(input=show, size=1, act=None)
+            fc = paddle.static.nn.fc(x=show, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
index 7a6ba4248352a4ef83c737604d75c82b113db9a0..50a6013e2d2172b33356e6f7dc2bb0800cac2cb9 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
@@ -70,7 +70,7 @@ class TestCloudRoleMaker2(unittest.TestCase):
                 lod_level=1,
                 append_batch_size=False,
             )
-            fc = fluid.layers.fc(input=show, size=1, act=None)
+            fc = paddle.static.nn.fc(x=show, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
index c3df410610ba9688e3bc44e164bb59cc2764033a..27cb171c0dd6d285a4526224ae2b2210c8ff27be 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
@@ -63,7 +63,7 @@ class TestCloudRoleMaker(unittest.TestCase):
                 lod_level=1,
                 append_batch_size=False,
             )
-            fc = fluid.layers.fc(input=show, size=1, act=None)
+            fc = paddle.static.nn.fc(x=show, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
index 78c4a4541e3c06e9989cf4cdb64795cea1edbf09..178fcfa230e449c4d0a1e8c96f2a3401a728dbdc 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
@@ -66,7 +66,7 @@ class TestFleet1(unittest.TestCase):
                 is_distributed=True,
                 param_attr=fluid.ParamAttr(name="embedding"),
             )
-            fc = fluid.layers.fc(input=emb, size=1, act=None)
+            fc = paddle.static.nn.fc(x=emb, size=1, activation=None)
             label = fluid.layers.data(
                 name="click",
                 shape=[-1, 1],
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
index 7826880ac6ca1614844613499e48c80d976f5b50..ccb397fc19c74d10f115b9b6d5e0ca9e71c1d9b7 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -48,11 +48,13 @@ class TestFuseBatchNormActPass(unittest.TestCase):
                 act='relu',
                 data_layout='NHWC',
             )
-            hidden3 = fluid.layers.fc(input=hidden2, size=32, act='relu')
+            hidden3 = paddle.static.nn.fc(x=hidden2, size=32, activation='relu')
             hidden4 = paddle.static.nn.batch_norm(
                 input=hidden3, act='relu', data_layout='NHWC'
             )
-            prediction = fluid.layers.fc(input=hidden4, size=10, act='softmax')
+            prediction = paddle.static.nn.fc(
+                x=hidden4, size=10, activation='softmax'
+            )
             loss = paddle.nn.functional.cross_entropy(
                 input=prediction, label=y, reduction='none', use_softmax=False
             )
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
index a193a81d6c678ae0094c3bb29ed976a2673a6676..43578016e90ac987b9da86b591d87009c5fa6b2d 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -101,11 +101,11 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                 param_attr=self.bn_param_attr2,
                 bias_attr=self.bn_bias_attr2,
             )
-            prediction = fluid.layers.fc(
-                input=fused_bn_add_act,
+            prediction = paddle.static.nn.fc(
+                x=fused_bn_add_act,
                 size=10,
-                act='softmax',
-                param_attr=self.fc_param_attr,
+                activation='softmax',
+                weight_attr=self.fc_param_attr,
             )
             loss = paddle.nn.functional.cross_entropy(
                 input=prediction, label=y, reduction='none', use_softmax=False
@@ -162,8 +162,11 @@ class TestFusedBnAddActAPI(unittest.TestCase):
             )
             out = bn1 + bn2
             out = F.relu(out)
-            prediction = fluid.layers.fc(
-                input=out, size=10, act='softmax', param_attr=self.fc_param_attr
+            prediction = paddle.static.nn.fc(
+                x=out,
+                size=10,
+                activation='softmax',
+                weight_attr=self.fc_param_attr,
             )
             loss = paddle.nn.functional.cross_entropy(
                 input=prediction, label=y, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
index c074a1fe0dcabd90aa885aa9d8d1333fd45ec7d5..34fa16314d04c19b65ba80fd2f0378dca4a7dca0 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
@@ -61,7 +61,7 @@ def simple_depthwise_net(use_feed):
     for _ in range(4):
         hidden = sep_conv(hidden, channel=200, stride=2, filter=5)
         hidden = F.relu(hidden)
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
index 02a34401e79f08a15d2babb03a5cc95739b44fc0..6d7b9914257620e20e001e277190a1f2f9ed97e3 100644
--- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
@@ -55,17 +55,17 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
             )
             hidden = image
             for hidden_size in [10, 20, 30]:
-                hidden = fluid.layers.fc(
+                hidden = paddle.static.nn.fc(
                     hidden,
                     size=hidden_size,
-                    act='tanh',
+                    activation='tanh',
                     bias_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(value=1.0)
                     ),
                 )
 
-            predict_label = fluid.layers.fc(
-                hidden, size=CLASS_NUM, act='softmax'
+            predict_label = paddle.static.nn.fc(
+                hidden, size=CLASS_NUM, activation='softmax'
             )
             loss = paddle.mean(
                 paddle.nn.functional.cross_entropy(
diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
index b5b0b20c6f48bc841bd0dfb5f9a61449cadc93bf..c74917c2a076a3c443ad48e2cfe0a137fc2376c8 100644
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -38,9 +38,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -80,10 +82,16 @@ class TestGradientClip(unittest.TestCase):
             label = fluid.data(name="b", shape=[-1, 1], dtype='int64')
             if dtype != 'float32':
                 image_cast = paddle.cast(image, dtype)
-                hidden = fluid.layers.fc(input=image_cast, size=32, act='relu')
+                hidden = paddle.static.nn.fc(
+                    x=image_cast, size=32, activation='relu'
+                )
             else:
-                hidden = fluid.layers.fc(input=image, size=32, act='relu')
-            predict = fluid.layers.fc(input=hidden, size=10, act='softmax')
+                hidden = paddle.static.nn.fc(
+                    x=image, size=32, activation='relu'
+                )
+            predict = paddle.static.nn.fc(
+                x=hidden, size=10, activation='softmax'
+            )
 
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
index 298111fdb61df75ecfc8651b88c4f5533139ff38..c485a5182843447d051cf4406304eb91849e3b3e 100644
--- a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
@@ -43,7 +43,9 @@ class TestLayer(unittest.TestCase):
                 name='pixel', shape=[3, 48, 48], dtype='float32'
             )
             hidden1 = paddle.static.nn.batch_norm(input=images)
-            hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu')
+            hidden2 = paddle.static.nn.fc(
+                x=hidden1, size=128, activation='relu'
+            )
             paddle.static.nn.batch_norm(input=hidden2)
 
         print(str(main_program))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 0e1974474d86ca235886609d881455fbbdda45fd..abf7a95bfb723e6d6efa862cef0c18e05be7ab2d 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -33,8 +33,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
         a = fluid.data(name="a", shape=[10, 10])
         conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10])
 
-        fc_out1 = fluid.layers.fc(a, 10)
-        fc_out2 = fluid.layers.fc(a, 20)
+        fc_out1 = paddle.static.nn.fc(a, 10)
+        fc_out2 = paddle.static.nn.fc(a, 20)
 
         conv_out_1 = paddle.static.nn.conv2d(
             conv_in, num_filters=10, filter_size=5, act="relu"
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index 5a07e812d0ed549ca7f05ded905195d35863cbfc..f6b3ba6b69539ca1d36606b4ec2fd38f48b1a938 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -58,7 +58,7 @@ class TestBook(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x=x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -166,7 +166,7 @@ class TestSaveInferenceModel(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -190,7 +190,7 @@ class TestSaveInferenceModel(unittest.TestCase):
         with program_guard(program, init_program):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='int32')
-            predict = fluid.layers.fc(input=x, size=2, act='softmax')
+            predict = paddle.static.nn.fc(x, size=2, activation='softmax')
             acc = paddle.static.accuracy(input=predict, label=y)
             auc_var, batch_auc_var, auc_states = paddle.static.auc(
                 input=predict, label=y
@@ -226,7 +226,7 @@ class TestInstance(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -264,7 +264,7 @@ class TestSaveInferenceModelNew(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -443,7 +443,7 @@ class TestSaveInferenceModelNew(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -492,7 +492,7 @@ class TestSaveInferenceModelNew(unittest.TestCase):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='float32')
 
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_io_save_load.py b/python/paddle/fluid/tests/unittests/test_io_save_load.py
index 487ed531e38fa6c46ea12d1c865b42465d259339..309e0594ed555a77e211c9689c38c9c2114334e1 100644
--- a/python/paddle/fluid/tests/unittests/test_io_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_io_save_load.py
@@ -72,7 +72,7 @@ class TestSaveInferenceModelAPIError(unittest.TestCase):
         with fluid.program_guard(main_prog, start_prog):
             x = fluid.data(name='x', shape=[10, 16], dtype='float32')
             y = fluid.data(name='y', shape=[10, 16], dtype='float32')
-            z = fluid.layers.fc(x, 4)
+            z = paddle.static.nn.fc(x, 4)
 
         exe = fluid.Executor(fluid.CPUPlace())
         exe.run(start_prog)
diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
index 5d3efb8230ae22e2c0a5dd828651a79a3191ce2e..48156b7448218ab67ebfe69191cfa5b02dfa0b84 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
@@ -29,17 +29,17 @@ def fc_with_batchnorm(use_feed):
 
     hidden = img
     for _ in range(3):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
-            act='tanh',
+            activation='tanh',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
         )
 
         hidden = paddle.static.nn.batch_norm(input=hidden)
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
index f2de46ea465a93507f889daf3c1f9952f9124ff0..9e51118e96c4c9fda4187b1046a9d90f943d4b40 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
@@ -33,8 +33,8 @@ def simple_fc_net(use_feed):
     x, y = _feed_data_helper()
     hidden_layer = 4
     for _ in range(hidden_layer):
-        x = fluid.layers.fc(input=x, size=20, act='relu')
-    y_predict = fluid.layers.fc(input=x, size=10, act='softmax')
+        x = paddle.static.nn.fc(x, size=20, activation='relu')
+    y_predict = paddle.static.nn.fc(x, size=10, activation='softmax')
     cost = paddle.nn.functional.cross_entropy(
         input=y_predict, label=y, reduction='none', use_softmax=False
     )
@@ -45,11 +45,11 @@ def simple_fc_net(use_feed):
 def fc_with_inplace_net(use_feed):
     assert use_feed
     x, y = _feed_data_helper()
-    fc = fluid.layers.fc(input=x, size=20, act='relu')
-    fc = fluid.layers.fc(input=fc, size=10, act='relu')
+    fc = paddle.static.nn.fc(x=x, size=20, activation='relu')
+    fc = paddle.static.nn.fc(x=fc, size=10, activation='relu')
     reshape = paddle.reshape(x=fc, shape=[-1, 2, 5])
     reshape = paddle.reshape(x=reshape, shape=[-1, 5, 2])
-    y_predict = fluid.layers.fc(input=reshape, size=10, act='softmax')
+    y_predict = paddle.static.nn.fc(x=reshape, size=10, activation='softmax')
     cost = paddle.nn.functional.cross_entropy(
         input=y_predict, label=y, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
index d3abf54a00beed1f58f9e09bbc123b1b39da4f2d..f8f65f5a14a4787bf3522319af80a144739551fc 100644
--- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
@@ -125,7 +125,7 @@ class TestLambOpWithCombinedOp(unittest.TestCase):
                 startup.random_seed = seed
                 x = fluid.layers.data(name='X', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='Y', shape=[1], dtype='float32')
-                prediction = fluid.layers.fc(input=x, size=1, act=None)
+                prediction = paddle.static.nn.fc(x, size=1, activation=None)
                 loss = paddle.nn.functional.square_error_cost(
                     input=prediction, label=y
                 )
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 3f8e774656725f33e44c2a736529f5ec131f5ff9..e33cde31799adc376fe4160e8bfbd027c46b8811 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -1391,7 +1391,8 @@ class TestLayer(LayerTest):
         with self.static_graph():
             data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32")
             label = fluid.data(name="label", shape=[-1, 1], dtype="int")
-            fc_out = fluid.layers.fc(input=data, size=10)
+            data_new = paddle.reshape(data, [3, 32 * 32])
+            fc_out = paddle.nn.Linear(32 * 32, 10)(data_new)
             predict = paddle.nn.functional.softmax(fc_out)
             result = paddle.static.accuracy(input=predict, label=label, k=5)
             place = fluid.CPUPlace()
@@ -1407,7 +1408,8 @@ class TestLayer(LayerTest):
         with self.dynamic_graph(force_to_use_cpu=True):
             data = base.to_variable(x)
             label = base.to_variable(y)
-            fc_out = fluid.layers.fc(data, size=10)
+            data_new = paddle.reshape(data, [3, 32 * 32])
+            fc_out = paddle.nn.Linear(32 * 32, 10)(data_new)
             predict = paddle.nn.functional.softmax(fc_out)
             dynamic_out = paddle.static.accuracy(
                 input=predict, label=label, k=5
@@ -1528,7 +1530,7 @@ class TestBook(LayerTest):
             startup_program=fluid.default_startup_program(),
         ):
             x = self._get_data(name='x', shape=[13], dtype='float32')
-            y_predict = layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.nn.Linear(13, 1)(x)
             y = self._get_data(name='y', shape=[1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -1543,14 +1545,14 @@ class TestBook(LayerTest):
             # Change g_program, so the rest layers use `g_program`
             images = self._get_data(name='pixel', shape=[784], dtype='float32')
             label = self._get_data(name='label', shape=[1], dtype='int64')
-            hidden1 = layers.fc(input=images, size=128, act='relu')
-            hidden2 = layers.fc(input=hidden1, size=64, act='relu')
-            predict = layers.fc(
-                input=[hidden2, hidden1],
-                size=10,
-                act='softmax',
-                param_attr=["sftmax.w1", "sftmax.w2"],
-            )
+            hidden1 = paddle.nn.Linear(784, 128)(images)
+            hidden1 = paddle.nn.functional.relu(hidden1)
+            hidden2 = paddle.nn.Linear(128, 64)(hidden1)
+            hidden2 = paddle.nn.functional.relu(hidden2)
+            hidden1 = paddle.nn.Linear(128, 10, "sftmax.w1")(hidden1)
+            hidden2 = paddle.nn.Linear(64, 10, "sftmax.w2")(hidden2)
+            hidden = hidden1 + hidden2
+            predict = paddle.nn.functional.softmax(hidden)
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
             )
@@ -1591,7 +1593,22 @@ class TestBook(LayerTest):
                 act="relu",
             )
 
-            predict = layers.fc(input=conv_pool_2, size=10, act="softmax")
+            conv_pool_2_new = paddle.reshape(
+                conv_pool_2,
+                [
+                    conv_pool_2.shape[0],
+                    conv_pool_2.shape[1]
+                    * conv_pool_2.shape[2]
+                    * conv_pool_2.shape[3],
+                ],
+            )
+            predict = paddle.nn.Linear(
+                conv_pool_2.shape[1]
+                * conv_pool_2.shape[2]
+                * conv_pool_2.shape[3],
+                10,
+            )(conv_pool_2_new)
+            predict = paddle.nn.functional.softmax(predict)
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
             )
@@ -1643,9 +1660,11 @@ class TestBook(LayerTest):
                 axis=1,
             )
 
-            hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid')
-            predict_word = layers.fc(
-                input=hidden1, size=dict_size, act='softmax'
+            hidden1 = paddle.static.nn.fc(
+                x=concat_embed, size=256, activation='sigmoid'
+            )
+            predict_word = paddle.static.nn.fc(
+                x=hidden1, size=dict_size, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=predict_word,
@@ -1682,7 +1701,7 @@ class TestBook(LayerTest):
             fluid.default_main_program(), fluid.default_startup_program()
         ):
             data = self._get_data(name='data', shape=[10], dtype='float32')
-            hid = layers.fc(input=data, size=20)
+            hid = paddle.nn.Linear(10, 20)(data)
             return paddle.nn.functional.softmax(hid, axis=1)
 
     @prog_scope()
@@ -2108,7 +2127,7 @@ class TestBook(LayerTest):
             seq_data = layers.data(
                 name='seq_data', shape=[10, 10], dtype='float32', lod_level=1
             )
-            seq = layers.fc(input=seq_data, size=20)
+            seq = paddle.static.nn.fc(x=seq_data, size=20)
             return layers.sequence_softmax(seq)
 
     def test_sequence_unsqueeze(self):
diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
index 66557b84079692ed76ad011e9f5904306b35697e..1ed1a713e30aa3cf3f3fc7a479ea1ec1583a2070 100644
--- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
@@ -32,7 +32,7 @@ paddle.enable_static()
 def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
     remove_ps_flag(os.getpid())
     x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-    y_predict = fluid.layers.fc(input=x, size=1, act=None)
+    y_predict = paddle.static.nn.fc(x, size=1, activation=None)
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
     # loss function
@@ -69,7 +69,7 @@ def run_pserver_with_empty_block(
 ):
     remove_ps_flag(os.getpid())
     x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-    y_predict = fluid.layers.fc(input=x, size=1, act=None, bias_attr=False)
+    y_predict = paddle.static.nn.fc(x, size=1, bias_attr=False)
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
     # loss function
diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
index 6a00320322eec19fe5dfc5a07f2f7221faa295d3..15317e7538aff019447fc11f57920d5a4a2c1e23 100644
--- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
+++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
@@ -42,7 +42,9 @@ def convolutional_neural_network(img):
         pool_stride=2,
         act="relu",
     )
-    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(
+        x=conv_pool_2, size=10, activation='softmax'
+    )
     return prediction
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
index 15d81c68e8c8f310c3a57dd2cf30a0f5da64c044..3b2465a344ee1f96e5d02a6ddfb3a7e38a620aa5 100644
--- a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
+++ b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
@@ -16,6 +16,7 @@ import os
 import shutil
 import unittest
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.executor import Executor
 
@@ -30,7 +31,7 @@ class TestLoadVarsShapeCheck(unittest.TestCase):
 
         with fluid.program_guard(program_1, startup_program_1):
             input = fluid.layers.data(name="x", shape=[-1, 10], dtype='float32')
-            out = fluid.layers.fc(input, 20)
+            out = paddle.static.nn.fc(input, 20)
         place = fluid.CPUPlace()
         exe = Executor(place)
         exe.run(startup_program_1)
diff --git a/python/paddle/fluid/tests/unittests/test_lookahead.py b/python/paddle/fluid/tests/unittests/test_lookahead.py
index 4fa8666d4b1f94f4116b65a5ceb93e257992e190..5860f81f736d27bf6a9c3efcebd498b3c9494c75 100644
--- a/python/paddle/fluid/tests/unittests/test_lookahead.py
+++ b/python/paddle/fluid/tests/unittests/test_lookahead.py
@@ -36,7 +36,7 @@ class TestLookAhead(unittest.TestCase):
         with fluid.program_guard(train_program, startup):
             with fluid.unique_name.guard():
                 data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 loss = paddle.mean(hidden)
 
                 optimizer = paddle.optimizer.SGD(learning_rate=SGD_LR)
diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py
index f1293ea7a765b80711a8c374b8142f8423031526..b083bc6d3051b80e5cf0053a76e062dae785d2a1 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py
@@ -27,7 +27,7 @@ def train_simulator(test_batch_size=10):
         )
 
     x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-    y_predict = fluid.layers.fc(input=x, size=1, act=None)
+    y_predict = paddle.static.nn.fc(x, size=1, activation=None)
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
     cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
diff --git a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
index 08bc72ffd7415b408bf98083b1a694b1e9660707..20bf8e0985aa3cc3b2ed3f002af864024dc91d4d 100644
--- a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
+++ b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
@@ -27,7 +27,7 @@ img_shape = [1, 28, 28]
 
 
 def loss_net(hidden, label):
-    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_modelaverage.py b/python/paddle/fluid/tests/unittests/test_modelaverage.py
index 937d293a81dac3389ad9c05303551958febf2da4..156f0cfb8bce859d78c0d43f4ed8c92b4ec9a66c 100644
--- a/python/paddle/fluid/tests/unittests/test_modelaverage.py
+++ b/python/paddle/fluid/tests/unittests/test_modelaverage.py
@@ -33,7 +33,7 @@ class TestModelAverage(unittest.TestCase):
         with fluid.program_guard(train_program, startup):
             with fluid.unique_name.guard():
                 data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 loss = paddle.mean(hidden)
                 test_program = train_program.clone()
                 optimizer = paddle.optimizer.Momentum(
diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py
index 00088ab276b766a3e9a971471c31cbb68f4ef4b6..4b745cb7a6fb0c682332b04811a26612fc3566e8 100644
--- a/python/paddle/fluid/tests/unittests/test_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py
@@ -530,7 +530,7 @@ class TestMomentumV2(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
@@ -668,7 +668,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
index beca81e7047101e708a800b851d000d2f63d0e20..a4dc9f33279db55cdd270e42bfbee8d730015273 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
@@ -64,19 +64,19 @@ def simple_fc_net_static():
                 initializer=fluid.initializer.Constant(value=0.5)
             )
             for hidden_size in [10, 20, 30]:
-                hidden = fluid.layers.fc(
+                hidden = paddle.static.nn.fc(
                     hidden,
                     size=hidden_size,
-                    act='tanh',
-                    param_attr=param_attr,
+                    activation='tanh',
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                 )
 
-            predict_label = fluid.layers.fc(
+            predict_label = paddle.static.nn.fc(
                 hidden,
                 size=CLASS_NUM,
-                act='softmax',
-                param_attr=param_attr,
+                activation='softmax',
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
             )
             loss = paddle.mean(
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
index e63cf6694af266248a6dfc2b1915553afbcdc5ef..24c008a60271f26169cc02f6ab597dfba5a50dd2 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
@@ -64,19 +64,19 @@ def simple_fc_net_static():
                 initializer=fluid.initializer.Constant(value=0.5)
             )
             for hidden_size in [10, 20, 30]:
-                hidden = fluid.layers.fc(
+                hidden = paddle.static.nn.fc(
                     hidden,
                     size=hidden_size,
-                    act='tanh',
-                    param_attr=param_attr,
+                    activation='tanh',
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                 )
 
-            predict_label = fluid.layers.fc(
+            predict_label = paddle.static.nn.fc(
                 hidden,
                 size=CLASS_NUM,
-                act='softmax',
-                param_attr=param_attr,
+                activation='softmax',
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
             )
             loss = paddle.mean(
diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
index af4ff64c894485fc94d49ba3a1a437b61240b83e..cdde0c1a468ec6960377dc8a448f919fb020428e 100644
--- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
+++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -32,7 +32,7 @@ class TestNetWithDtype(unittest.TestCase):
         with fluid.program_guard(main, startup):
             x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
             y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py
index 7a96f0fca9275d0411ac1c4e53a0221434e1a641..d5f54d44829525474fdc9e365435d091e4ddfcca 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer.py
@@ -1164,8 +1164,8 @@ class TestRecomputeOptimizer(unittest.TestCase):
             drop_res = paddle.nn.functional.dropout(
                 input_x, p=0.5, name="dropout_with_seed_cpu"
             )
-            prediction = fluid.layers.fc(
-                input=[drop_res], size=2, act='softmax'
+            prediction = paddle.static.nn.fc(
+                x=[drop_res], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
@@ -1226,8 +1226,8 @@ class TestRecomputeOptimizerCUDA(unittest.TestCase):
             drop_res = paddle.nn.functional.dropout(
                 input_x, p=0.5, name="dropout_with_seed_gpu"
             )
-            prediction = fluid.layers.fc(
-                input=[drop_res], size=2, act='softmax'
+            prediction = paddle.static.nn.fc(
+                x=[drop_res], size=2, activation='softmax'
             )
             cost = paddle.nn.functional.cross_entropy(
                 input=prediction,
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index 99c4d79bb3168dd49aa4b5fa9c4db7ce4cce60a3..255760e11ca403a04be0a71bdb907327ed07dd12 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -99,7 +99,7 @@ class SimpleNetWithCond:
 
         sum_xy = paddle.add(param_x, param_y, name='sum_xy')
         sub_yz = paddle.subtract(param_y, param_z, name='sub_yz')
-        useless = fluid.layers.fc(param_x, size=1, name='fc_useless')
+        useless = paddle.static.nn.fc(param_x, size=1, name='fc_useless')
 
         def cond_true():
             cond_yz = paddle.add(param_y, param_z, name='sum_cond_yz')
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
index ce250524b80c1b2c1521c880d18619bebf9f2ee1..731693f5cf7fc47f996236349bc6fb0f670b412f 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
@@ -46,11 +46,11 @@ def static(
     with program_guard(main_program, startup_program):
 
         def double_fc_net(image):
-            hidden = layers.fc(
+            hidden = paddle.static.nn.fc(
                 image,
                 size=FC_SIZE,
-                act='relu',
-                param_attr=fluid.ParamAttr(
+                activation='relu',
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=0.99)
                 ),
                 bias_attr=fluid.ParamAttr(
@@ -59,11 +59,11 @@ def static(
                 name="hidden",
             )
 
-            prediction = layers.fc(
+            prediction = paddle.static.nn.fc(
                 hidden,
                 size=CLASS_NUM,
-                act='softmax',
-                param_attr=fluid.ParamAttr(
+                activation='softmax',
+                weight_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=1.2)
                 ),
                 bias_attr=fluid.ParamAttr(
@@ -265,7 +265,7 @@ class TestMultiOptimizersMultiCardsError(unittest.TestCase):
                 opt.minimize(avg_loss)
 
             x = fluid.layers.data("X", [10], 'float32')
-            hidden = layers.fc(x, 5)
+            hidden = paddle.static.nn.fc(x, 5)
             avg_loss = paddle.mean(hidden)
 
             adam = optimizer.Adam(learning_rate=LR)
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
index f467d17ee20abe3e0819df80ed807c5c09241c9e..1dfd7f2f858287ab9e430c781e54142deb36d9cc 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
@@ -32,7 +32,7 @@ class TestModelAverage(unittest.TestCase):
         with fluid.program_guard(train_program, startup):
             with fluid.unique_name.guard():
                 data = fluid.data(name='X', shape=[None, 1], dtype='float32')
-                hidden = fluid.layers.fc(input=data, size=10)
+                hidden = paddle.static.nn.fc(x=data, size=10)
                 loss = paddle.mean(hidden)
                 test_program = train_program.clone()
                 optimizer = paddle.optimizer.Momentum(
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
index 4616d8b4b2a472c49815aa2bb8cfba36c43d13a9..ece27b82c5d84f60b04d4df059f0e87bd6fb4dd5 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
@@ -142,7 +142,7 @@ class TestSaveLoadBinaryFormat(unittest.TestCase):
         OUTPUT_NUM = 32
         with new_program_scope():
             x = fluid.data(name="x", shape=[None, IMAGE_SIZE], dtype='float32')
-            y = fluid.layers.fc(
+            y = paddle.static.nn.fc(
                 x,
                 OUTPUT_NUM,
                 name='fc_vars',
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
index ea6805a6bf9d86236420826e830b1b592d667f05..8654f0ba5e97c0e86c1242003191bd80045f28b9 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
@@ -32,7 +32,7 @@ class TestParallelExecutorDropExeScope(unittest.TestCase):
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
             data = fluid.layers.data(name='X', shape=[1], dtype='float32')
-            hidden = fluid.layers.fc(input=data, size=10)
+            hidden = paddle.static.nn.fc(x=data, size=10)
             loss = paddle.mean(hidden)
             test_program = fluid.default_main_program().clone(for_test=True)
             fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
index f540c73113ba41bf9ffbe09eebc44874db276676..93fecfefb5a4475f24dc941bff7a3ae12cf39b2b 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
@@ -80,8 +80,10 @@ class TestMNISTDryRun(TestBase):
         label = fluid.layers.data(name='label', shape=[1], dtype='int64')
         hidden = img
         for _ in range(10):
-            hidden = fluid.layers.fc(input=img, size=200, act='tanh')
-        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+            hidden = paddle.static.nn.fc(x=img, size=200, activation='tanh')
+        prediction = paddle.static.nn.fc(
+            x=hidden, size=10, activation='softmax'
+        )
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
index 992ecbda46d1c34bf558d17d61c6b1c00cfa7162..c92d3234597f7d73cb39fa533c242c5b8421a526 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -32,8 +32,8 @@ def Lenet(data, class_dim):
     bn2 = paddle.static.nn.batch_norm(conv2, act='relu')
     pool2 = paddle.nn.functional.max_pool2d(bn2, 2, 2)
 
-    fc1 = fluid.layers.fc(pool2, size=50, act='relu')
-    fc2 = fluid.layers.fc(fc1, size=class_dim, act='softmax')
+    fc1 = paddle.static.nn.fc(pool2, size=50, activation='relu')
+    fc2 = paddle.static.nn.fc(fc1, size=class_dim, activation='softmax')
 
     return fc2
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
index 7d782fb25bc00e2ca1dfb699d0aed45c3a1393a1..d8e7cbe13038c30ba167dea35e09a7e4d5bed4d4 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
@@ -30,7 +30,7 @@ class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase):
     def build_network(self, is_training):
         x = fluid.data(name='x', shape=[-1, 10], dtype='float32')
         y = fluid.data(name='y', shape=[-1, 10], dtype='float32')
-        fc = fluid.layers.fc(x, size=30, bias_attr=False)
+        fc = paddle.static.nn.fc(x, size=30, bias_attr=False)
         loss = paddle.mean(fc)
         if is_training:
             adam = fluid.optimizer.Adam(learning_rate=1e-3)
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
index 675e39ececf6d82709675e9aa57784d2b79ea3c0..bd5b2c77983b93933e91520ae3ae0520e160ed9f 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
@@ -184,7 +184,7 @@ class TestInferencePartialFeedUsingDataLoader(unittest.TestCase):
         loader = fluid.io.DataLoader.from_generator(
             feed_list=[x], capacity=16, iterable=iterable, drop_last=drop_last
         )
-        y = fluid.layers.fc(x, size=10)
+        y = paddle.static.nn.fc(x, size=10)
         loss = paddle.mean(y)
 
         exe = fluid.Executor(places[0])
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
index a18eba8d5fcb4e7dc1ddf2ab334609ba05c529b1..b8e2054f2b9aef5d6598e6c98da782cb7e370f51 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
@@ -28,15 +28,15 @@ def simple_fc_net(use_feed):
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
     hidden = img
     for _ in range(4):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
-            act='tanh',
+            activation='tanh',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
         )
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -51,10 +51,10 @@ def fc_with_batchnorm(use_feed):
     hidden = img
     for _ in range(1):
         with fluid.name_scope("hidden"):
-            hidden = fluid.layers.fc(
+            hidden = paddle.static.nn.fc(
                 hidden,
                 size=200,
-                act='tanh',
+                activation='tanh',
                 bias_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(value=1.0)
                 ),
@@ -62,7 +62,7 @@ def fc_with_batchnorm(use_feed):
 
             hidden = paddle.static.nn.batch_norm(input=hidden)
     with fluid.name_scope("fc_layer"):
-        prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+        prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     with fluid.name_scope("loss"):
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index e39648285daba775614a67c8de6ff920f89cb4f8..059f50695a592e4c26e7d8e74b8126701c1a4852 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -38,7 +38,7 @@ class TestProfiler(unittest.TestCase):
         main_program = fluid.Program()
         with fluid.program_guard(main_program, startup_program):
             image = fluid.layers.data(name='x', shape=[784], dtype='float32')
-            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
+            hidden1 = paddle.static.nn.fc(x=image, size=64, activation='relu')
             i = layers.zeros(shape=[1], dtype='int64')
             counter = fluid.layers.zeros(
                 shape=[1], dtype='int64', force_cpu=True
@@ -48,14 +48,20 @@ class TestProfiler(unittest.TestCase):
             cond = paddle.less_than(x=counter, y=until)
             while_op = paddle.static.nn.control_flow.While(cond=cond)
             with while_op.block():
-                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
+                hidden_n = paddle.static.nn.fc(
+                    x=hidden1, size=64, activation='relu'
+                )
                 paddle.tensor.array_write(hidden_n, i, data_arr)
                 paddle.increment(x=counter, value=1)
                 paddle.assign(paddle.less_than(x=counter, y=until), cond)
 
             hidden_n = paddle.tensor.array_read(data_arr, i)
-            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
-            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+            hidden2 = paddle.static.nn.fc(
+                x=hidden_n, size=64, activation='relu'
+            )
+            predict = paddle.static.nn.fc(
+                x=hidden2, size=10, activation='softmax'
+            )
             label = fluid.layers.data(name='y', shape=[1], dtype='int64')
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py
index 54320aee59b2a44ac80c807e483419cfb51adfc2..56b8b35234d8b145ff1c1106345e029e15198573 100644
--- a/python/paddle/fluid/tests/unittests/test_program.py
+++ b/python/paddle/fluid/tests/unittests/test_program.py
@@ -99,8 +99,8 @@ class TestProgram(unittest.TestCase):
         startup_program = Program()
         with program_guard(main_program, startup_program):
             d = layers.data(name='x', shape=[784], dtype='float32')
-            hidden = layers.fc(input=d, size=100)
-            layers.fc(input=hidden, size=100)
+            hidden = paddle.static.nn.fc(x=d, size=100)
+            paddle.static.nn.fc(x=hidden, size=100)
 
         new_program = main_program.clone()
         self.assertNotEqual(0, len(new_program.blocks[0].all_parameters()))
@@ -108,7 +108,7 @@ class TestProgram(unittest.TestCase):
     def test_program_all_parameters(self):
         program = fluid.default_main_program()
         data = fluid.data(name='x', shape=[None, 13], dtype='float32')
-        hidden = fluid.layers.fc(input=data, size=10)
+        hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
         fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
index dbbd14394f6dc1c8fb7bb242a14fec36aea73094..337feb16174cbfd358d2ae87e2d4daf156f8acd2 100755
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -35,15 +35,15 @@ def simple_fc_net_with_accuracy(use_feed):
 
     hidden = img
     for _ in range(4):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
-            act='relu',
+            activation='relu',
             bias_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(value=1.0)
             ),
         )
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -55,7 +55,7 @@ def simple_fc_net_with_accuracy(use_feed):
 def cond_net(use_feed=None):
     x = fluid.layers.data(name="x", shape=[4], dtype='float32')
     label = fluid.layers.data('label', shape=[1], dtype='int64')
-    prediction = fluid.layers.fc(input=x, size=1, act=None)
+    prediction = paddle.static.nn.fc(x, size=1, activation=None)
 
     def loss1(pred, label):
         x = fluid.layers.data(name="x", shape=[4], dtype='float32')
@@ -84,7 +84,7 @@ def cond_net(use_feed=None):
 def optimization_in_cond_net(with_optimize=False):
     x = fluid.layers.data(name="x", shape=[4], dtype='float32')
     label = fluid.layers.data('label', shape=[1], dtype='int64')
-    prediction = fluid.layers.fc(input=x, size=1, act=None)
+    prediction = paddle.static.nn.fc(x, size=1, activation=None)
 
     def loss1(opt, pred, label, with_optimize):
         x = fluid.layers.data(name="x", shape=[4], dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/test_program_to_string.py b/python/paddle/fluid/tests/unittests/test_program_to_string.py
index 58edaab582302279193c1e6b5e6aec61641e9109..55f32b687d914726a57b82106619353b8183cd11 100644
--- a/python/paddle/fluid/tests/unittests/test_program_to_string.py
+++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -23,7 +24,7 @@ class TestProgram(unittest.TestCase):
         a = fluid.layers.data(
             name="X", shape=[2, 3], dtype="float32", append_batch_size=False
         )
-        c = fluid.layers.fc(a, size=3)
+        c = paddle.static.nn.fc(a, size=3)
         prog_string = prog.to_string(throw_on_error=True, with_details=False)
         prog_string_with_details = prog.to_string(
             throw_on_error=False, with_details=True
diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py
index d31a0679d39d0da21f584b66e28935d8e6d7ab5d..2c97f49aeea573ea043a2a1433f2ee0cbcb72f26 100644
--- a/python/paddle/fluid/tests/unittests/test_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_prune.py
@@ -27,7 +27,7 @@ class TestPrune(unittest.TestCase):
     def net(self):
         x = fluid.layers.data(name='x', shape=[2], dtype='float32')
         label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-        y = fluid.layers.fc(input=[x], size=2, act="softmax")
+        y = paddle.static.nn.fc(x=[x], size=2, activation="softmax")
         loss = paddle.nn.functional.cross_entropy(
             input=y, label=label, reduction='none', use_softmax=False
         )
@@ -169,8 +169,8 @@ class TestExecutorRunAutoPrune(unittest.TestCase):
             initializer=fluid.initializer.Constant(1.0),
             trainable=True,
         )
-        y = fluid.layers.fc(
-            input=[x], size=2, act="softmax", param_attr=w_param_attrs
+        y = paddle.static.nn.fc(
+            x=[x], size=2, activation="softmax", weight_attr=w_param_attrs
         )
         loss1 = paddle.nn.functional.cross_entropy(
             input=y, label=label, reduction='none', use_softmax=False
@@ -200,11 +200,11 @@ class TestExecutorRunAutoPrune(unittest.TestCase):
             initializer=fluid.initializer.Constant(1.0),
             trainable=True,
         )
-        y1 = fluid.layers.fc(
-            input=[x1], size=2, act="softmax", param_attr=w1_param_attrs
+        y1 = paddle.static.nn.fc(
+            x=[x1], size=2, activation="softmax", weight_attr=w1_param_attrs
         )
-        y2 = fluid.layers.fc(
-            input=[x2], size=2, act="softmax", param_attr=w2_param_attrs
+        y2 = paddle.static.nn.fc(
+            x=[x2], size=2, activation="softmax", weight_attr=w2_param_attrs
         )
         loss1 = paddle.nn.functional.cross_entropy(
             input=y1, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index fe144cadd66ff81b0c067bbc889ad09fa975f715..69e363ee71863c6508f03887973fe6830c76c70b 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -75,7 +75,7 @@ def cross_entropy_grad(logits, labels, bwd_dout):
 def simple_fc_net(img, label, use_py_func_op):
     hidden = img
     for idx in range(4):
-        hidden = fluid.layers.fc(
+        hidden = paddle.static.nn.fc(
             hidden,
             size=200,
             bias_attr=fluid.ParamAttr(
@@ -102,7 +102,7 @@ def simple_fc_net(img, label, use_py_func_op):
                 skip_vars_in_backward_input=hidden,
             )
 
-    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
     if not use_py_func_op:
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py
index 1364ab2309bc570ad8fd95653d09f39050acd779..856b2be783d36cb49ae9eaf60a78ca29cdd35b73 100644
--- a/python/paddle/fluid/tests/unittests/test_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_random_seed.py
@@ -375,17 +375,17 @@ class TestGeneratorSeed(unittest.TestCase):
             # example 1:
             # attr shape is a list which doesn't contain tensor Variable.
             x = paddle.uniform(shape=[2, 10])
-            result_1 = fluid.layers.fc(
-                input=x,
+            result_1 = paddle.static.nn.fc(
+                x,
                 size=10,
-                param_attr=fluid.initializer.TruncatedNormal(
+                weight_attr=fluid.initializer.TruncatedNormal(
                     loc=0.0, scale=2.0
                 ),
             )
-            result_2 = fluid.layers.fc(
-                input=x,
+            result_2 = paddle.static.nn.fc(
+                x,
                 size=10,
-                param_attr=fluid.initializer.TruncatedNormal(
+                weight_attr=fluid.initializer.TruncatedNormal(
                     loc=0.0, scale=2.0
                 ),
             )
diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
index 838fb493043eb4bd1fdb1238a6ffca656ec42e54..db1abc9ef0ec441b1448a7fe28242d11c7225c54 100644
--- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
@@ -298,19 +298,19 @@ class RecurrentOpTest2(RecurrentOpTest1):
             h_pre = rnn.memory(init=h_boot)
             x_t = rnn.step_input(x)
 
-            temp_l = layers.fc(
-                input=x_t,
+            temp_l = paddle.static.nn.fc(
+                x=x_t,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name='W',
                     initializer=fluid.initializer.ConstantInitializer(1.0),
                 ),
                 bias_attr=False,
             )
-            temp_r = layers.fc(
-                input=h_pre,
+            temp_r = paddle.static.nn.fc(
+                x=h_pre,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name='U',
                     initializer=fluid.initializer.ConstantInitializer(0.0),
                 ),
@@ -692,19 +692,19 @@ class RecurrentOpStopGradientTest(RecurrentOpTest1):
             h_pre = rnn.memory(init=h_boot)  # init doesn't have gradient
             x_t = rnn.step_input(x)
 
-            temp_l = layers.fc(
-                input=x_t,
+            temp_l = paddle.static.nn.fc(
+                x=x_t,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name="W",
                     initializer=fluid.initializer.ConstantInitializer(1.0),
                 ),
                 bias_attr=False,
             )
-            temp_r = layers.fc(
-                input=h_pre,
+            temp_r = paddle.static.nn.fc(
+                x=h_pre,
                 size=self.input_dim,
-                param_attr=ParamAttr(
+                weight_attr=ParamAttr(
                     name="U",
                     initializer=fluid.initializer.ConstantInitializer(0.0),
                 ),
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py
index c775b4a976e2e536f49eed7b76cbebdc06d9404f..5ab643819d7665ab427b5bc0bcefa69d25e2d2e5 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer.py
@@ -138,9 +138,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -266,7 +268,7 @@ class TestRegularizer(unittest.TestCase):
         )
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             x = paddle.uniform([2, 2, 3])
-            out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
+            out = paddle.static.nn.fc(x, 5, weight_attr=fc_param_attr)
             loss = paddle.sum(out)
             sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
             sgd.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
index 7080dbdeeba0fb711ccb2c97c0b2bc8dbba183f2..a863ed45fdc17f6a2f5d1e120dbcb0c745883ff1 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer_api.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
@@ -44,9 +44,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
@@ -176,7 +178,7 @@ class TestRegularizer(unittest.TestCase):
         )
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             x = paddle.uniform([2, 2, 3])
-            out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
+            out = paddle.static.nn.fc(x, 5, weight_attr=fc_param_attr)
             loss = paddle.sum(out)
             sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
             sgd.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
index 81b75a1513eb89cc46ad494798f49a62890839aa..01a58266aebd46c02a035e5f2369ba4036ac72b2 100644
--- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
@@ -279,7 +279,7 @@ class TestRMSPropV2(unittest.TestCase):
         with fluid.program_guard(main):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py
index bf0b89ef1eb2757f38e510250b2ff0a1fc701bec..193421ac0792ad8962f4aa5576a1fb2efc21182d 100644
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -398,12 +398,12 @@ class TestRunProgramOpWithFC(RunProgramOpTest):
             ),
             trainable=True,
         )
-        pred = fluid.layers.fc(
-            input=img,
+        pred = paddle.static.nn.fc(
+            x=img,
             size=10,
-            param_attr=weight_attr,
+            weight_attr=weight_attr,
             bias_attr=bias_attr,
-            act='relu',
+            activation='relu',
         )
         # 2. get forward op num
         fwd_op_num = fluid.default_main_program().global_block().desc.op_size()
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index 852975b975e087825b41ad69101868d726f1b3ea..e3309e18a4870c4cda40c76e4b2cf2a51569b0b2 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -423,7 +423,7 @@ class TestSaveLoadPartial(unittest.TestCase):
 
             test_program = fluid.default_main_program().clone(for_test=True)
 
-            add_1 = fluid.layers.fc(
+            add_1 = paddle.static.nn.fc(
                 static_last_hidden,
                 size=hidden_size,
                 num_flatten_dims=2,
@@ -682,7 +682,7 @@ class TestProgramStatePartial(unittest.TestCase):
 
             test_program = fluid.default_main_program().clone(for_test=True)
 
-            add_1 = fluid.layers.fc(
+            add_1 = paddle.static.nn.fc(
                 static_last_hidden,
                 size=hidden_size,
                 num_flatten_dims=2,
@@ -856,8 +856,8 @@ class TestVariableInit(unittest.TestCase):
     def test_variable_init(self):
 
         x = fluid.data(name="x", shape=[10, 10], dtype='float32')
-        y = fluid.layers.fc(x, 10)
-        z = fluid.layers.fc(y, 10)
+        y = paddle.static.nn.fc(x, 10)
+        z = paddle.static.nn.fc(y, 10)
 
         place = self.set_place()
         exe = fluid.Executor(place)
@@ -1479,7 +1479,7 @@ class TestProgramStateOldSave(unittest.TestCase):
 
             test_program = fluid.default_main_program().clone(for_test=True)
 
-            add_1 = fluid.layers.fc(
+            add_1 = paddle.static.nn.fc(
                 static_last_hidden,
                 size=hidden_size,
                 num_flatten_dims=2,
@@ -1651,7 +1651,7 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase):
 
             test_program = fluid.default_main_program().clone(for_test=True)
 
-            add_1 = fluid.layers.fc(
+            add_1 = paddle.static.nn.fc(
                 static_last_hidden,
                 size=hidden_size,
                 num_flatten_dims=2,
diff --git a/python/paddle/fluid/tests/unittests/test_trainable.py b/python/paddle/fluid/tests/unittests/test_trainable.py
index ebb10335a6d98afb0adc6decf0eb01edcc554ef4..a9f96230ffb0fa050be7026671335226336e935d 100644
--- a/python/paddle/fluid/tests/unittests/test_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_trainable.py
@@ -24,8 +24,8 @@ import paddle.fluid as fluid
 def test_trainable():
     x = fluid.layers.data(name='image', shape=[784], dtype='float32')
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    feature = fluid.layers.fc(
-        input=x, size=10, param_attr=fluid.ParamAttr(trainable=False)
+    feature = paddle.static.nn.fc(
+        x, size=10, weight_attr=fluid.ParamAttr(trainable=False)
     )
     loss = paddle.nn.functional.cross_entropy(
         input=feature, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
index 6f7ffc9437639850331f84c890b6e457cbad52f1..e86dec9899af2ebe66f7bdc9a0a2c4e34aa42031 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
@@ -281,10 +281,10 @@ class TestUniformRandomOpApi(unittest.TestCase):
     def test_api(self):
         paddle.seed(10)
         x = fluid.layers.data('x', shape=[16], dtype='float32', lod_level=1)
-        y = fluid.layers.fc(
+        y = paddle.static.nn.fc(
             x,
             size=16,
-            param_attr=fluid.initializer.Uniform(
+            weight_attr=fluid.initializer.Uniform(
                 low=-0.5,
                 high=0.5,
                 seed=10,
diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py
index 68887bb200023da5b4e617931cbc9ecf6e0285dc..8520cf9067c05cce8a1f943f34d418e9b2ff6aae 100644
--- a/python/paddle/fluid/tests/unittests/test_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_variable.py
@@ -169,7 +169,7 @@ class TestVariable(unittest.TestCase):
             var15 = var[::-1, ::-1, ::-1]
 
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.fc(input=x, size=1, act=None)
+            y = paddle.static.nn.fc(x, size=1, activation=None)
             y_1 = y[:, 0]
             feeder = fluid.DataFeeder(place=place, feed_list=[x])
             data = []
diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py
index 4d86190252272333e2f341b7f9bbde78aaf1b359..e125b5876f9ae7e635c6b7677b9d7164f26eb0f7 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py
@@ -60,9 +60,11 @@ def bow_net(
     )
     bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
     bow_tanh = paddle.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
+    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
+    prediction = paddle.static.nn.fc(
+        x=[fc_2], size=class_dim, activation="softmax"
+    )
     cost = paddle.nn.functional.cross_entropy(
         input=prediction, label=label, reduction='none', use_softmax=False
     )
diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
index 3acbf965df0af5766bbdf95dffb6f7839eaca549..e57e3ef9f0b767a7395e9794b42d3d37466f2b73 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
@@ -38,16 +38,16 @@ class TestWeightNormalization(unittest.TestCase):
         data = fluid.layers.data(
             name=cls.data_desc[0][0], shape=cls.data_desc[0][1]
         )
-        out = fluid.layers.fc(
-            input=data,
+        out = paddle.static.nn.fc(
+            x=data,
             size=cls.hidden_size,
-            param_attr=WeightNormParamAttr(
+            weight_attr=WeightNormParamAttr(
                 dim=None,
                 name='weight_norm_param',
                 initializer=ConstantInitializer(1.0),
             ),
             bias_attr=False,
-            act=None,
+            activation=None,
         )
         loss = paddle.sum(out)
         fluid.backward.append_backward(loss=loss)
diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py
index b6ae2f3feeaec4751f15725c341084e63bcb8a18..f4f755c131c9dc1492c473aa7d62dae6c8e94aac 100644
--- a/python/paddle/fluid/tests/unittests/transformer_model.py
+++ b/python/paddle/fluid/tests/unittests/transformer_model.py
@@ -73,28 +73,28 @@ def multi_head_attention(
         """
         Add linear projection to queries, keys, and values.
         """
-        q = layers.fc(
-            input=queries,
+        q = paddle.static.nn.fc(
+            x=queries,
             size=d_key * n_head,
-            param_attr=fluid.initializer.Xavier(
+            weight_attr=fluid.initializer.Xavier(
                 uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key
             ),
             bias_attr=False,
             num_flatten_dims=2,
         )
-        k = layers.fc(
-            input=keys,
+        k = paddle.static.nn.fc(
+            x=keys,
             size=d_key * n_head,
-            param_attr=fluid.initializer.Xavier(
+            weight_attr=fluid.initializer.Xavier(
                 uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key
             ),
             bias_attr=False,
             num_flatten_dims=2,
         )
-        v = layers.fc(
-            input=values,
+        v = paddle.static.nn.fc(
+            x=values,
             size=d_value * n_head,
-            param_attr=fluid.initializer.Xavier(
+            weight_attr=fluid.initializer.Xavier(
                 uniform=False,
                 fan_in=d_model * d_value,
                 fan_out=n_head * d_value,
@@ -184,10 +184,10 @@ def multi_head_attention(
     out = __combine_heads(ctx_multiheads)
 
     # Project back to the model size.
-    proj_out = layers.fc(
-        input=out,
+    proj_out = paddle.static.nn.fc(
+        x=out,
         size=d_model,
-        param_attr=fluid.initializer.Xavier(uniform=False),
+        weight_attr=fluid.initializer.Xavier(uniform=False),
         bias_attr=False,
         num_flatten_dims=2,
     )
@@ -200,20 +200,20 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid):
     This module consists of two linear transformations with a ReLU activation
     in between, which is applied to each position separately and identically.
     """
-    hidden = layers.fc(
-        input=x,
+    hidden = paddle.static.nn.fc(
+        x,
         size=d_inner_hid,
         num_flatten_dims=2,
-        param_attr=fluid.initializer.Uniform(
+        weight_attr=fluid.initializer.Uniform(
             low=-(d_hid**-0.5), high=(d_hid**-0.5)
         ),
-        act="relu",
+        activation="relu",
     )
-    out = layers.fc(
-        input=hidden,
+    out = paddle.static.nn.fc(
+        x=hidden,
         size=d_hid,
         num_flatten_dims=2,
-        param_attr=fluid.initializer.Uniform(
+        weight_attr=fluid.initializer.Uniform(
             low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5)
         ),
     )
@@ -582,10 +582,10 @@ def transformer(
     # TODO(guosheng): Share the weight matrix between the embedding layers and
     # the pre-softmax linear transformation.
     predict = paddle.reshape(
-        x=layers.fc(
-            input=dec_output,
+        x=paddle.static.nn.fc(
+            x=dec_output,
             size=trg_vocab_size,
-            param_attr=fluid.initializer.Xavier(uniform=False),
+            weight_attr=fluid.initializer.Xavier(uniform=False),
             bias_attr=False,
             num_flatten_dims=2,
         ),
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
index f42ccf12c581affde029e03621a2865d8c98bcc4..f503e3cd4f595a22d3181efeba24756b069a0633 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
@@ -170,7 +170,7 @@ class XPUTestAdadelta(XPUOpTestWrapper):
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
                 y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(
                     input=y_predict, label=y
                 )
diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py
index 437e91f3f4c053f02c2d637ddc7e0e542ab7d0e0..29901363dbeef809eeecc2eb135244117aafbde0 100644
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -290,7 +290,7 @@ class DistributeTranspiler:
 
             x = fluid.data(name='x', shape=[1,13], dtype='float32')
             y = fluid.data(name='y', shape=[1], dtype='float32')
-            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_loss = paddle.mean(cost)
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index f68783cbb5d34b327b92e8617dfdf3aef11296d5..53954f49f343a56b570a87141d9945da277267b4 100644
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -173,7 +173,64 @@ def fc(
               bias_attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(value=1.0)))
           # out: [[1.8 1.8]]
     """
-    return paddle.fluid.layers.fc(
+
+    def fc_fluid(
+        input,
+        size,
+        num_flatten_dims=1,
+        param_attr=None,
+        bias_attr=None,
+        act=None,
+        name=None,
+    ):
+        helper = LayerHelper("fc", **locals())
+        check_type(input, 'input', (list, tuple, Variable), 'fc')
+        if isinstance(input, (list, tuple)):
+            for i, input_x in enumerate(input):
+                check_type(input_x, 'input[' + str(i) + ']', Variable, 'fc')
+        dtype = helper.input_dtype()
+        check_dtype(
+            dtype, 'input', ['float16', 'uint16', 'float32', 'float64'], 'fc'
+        )
+        mul_results = []
+        for input_var, param_attr in helper.iter_inputs_and_params():
+            input_shape = input_var.shape
+            if num_flatten_dims == -1:
+                num_flatten_dims = len(input_shape) - 1
+            param_shape = [
+                reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
+            ] + [size]
+
+            w = helper.create_parameter(
+                attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False
+            )
+            tmp = helper.create_variable_for_type_inference(dtype)
+            helper.append_op(
+                type="mul",
+                inputs={"X": input_var, "Y": w},
+                outputs={"Out": tmp},
+                attrs={"x_num_col_dims": num_flatten_dims, "y_num_col_dims": 1},
+            )
+            mul_results.append(tmp)
+
+        if len(mul_results) == 1:
+            pre_bias = mul_results[0]
+        else:
+            pre_bias = helper.create_variable_for_type_inference(dtype)
+            helper.append_op(
+                type="sum",
+                inputs={"X": mul_results},
+                outputs={"Out": pre_bias},
+                attrs={"use_mkldnn": False},
+            )
+        # add bias
+        pre_activation = helper.append_bias_op(
+            pre_bias, dim_start=num_flatten_dims
+        )
+        # add activation
+        return helper.append_activation(pre_activation)
+
+    return fc_fluid(
         input=x,
         size=size,
         num_flatten_dims=num_flatten_dims,
diff --git a/tools/codestyle/test_docstring_checker.py b/tools/codestyle/test_docstring_checker.py
index 09a71fb2467adbbb830565a61c3c1f1809ad3cfe..8e18fd07030a8d9f0543cb8b44dc089fc55fe19d 100644
--- a/tools/codestyle/test_docstring_checker.py
+++ b/tools/codestyle/test_docstring_checker.py
@@ -219,7 +219,7 @@ def fc(input,
     Examples:
         .. code-block:: python
             data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
-            fc = fluid.layers.fc(input=data, size=1000, act="tanh")
+            fc = paddle.static.nn.fc(x=data, size=1000, activation="tanh")
     """
     raise ValueError('A very specific bad thing happened.')
     size = 1
diff --git a/tools/infrt/fake_models/multi_fc.py b/tools/infrt/fake_models/multi_fc.py
index 7f2e4b5aeae275909e83fcedc20f9336ab409160..ded9f67bd34db7bbe79e1ed84c96ca86bf55266a 100644
--- a/tools/infrt/fake_models/multi_fc.py
+++ b/tools/infrt/fake_models/multi_fc.py
@@ -24,19 +24,19 @@ paddle.enable_static()
 a = fluid.layers.data(name="A", shape=[-1, size], dtype='float32')
 label = fluid.layers.data(name="label", shape=[size], dtype='float32')
 
-fc_out = fluid.layers.fc(
-    input=a,
+fc_out = paddle.static.nn.fc(
+    x=a,
     size=size,
-    act="relu",
+    activation="relu",
     bias_attr=fluid.ParamAttr(name="fc_bias"),
     num_flatten_dims=1,
 )
 
 for i in range(num_layers - 1):
-    fc_out = fluid.layers.fc(
-        input=fc_out,
+    fc_out = paddle.static.nn.fc(
+        x=fc_out,
         size=size,
-        act="relu",
+        activation="relu",
         bias_attr=fluid.ParamAttr(name="fc_bias"),
         num_flatten_dims=1,
     )