relocate paddle/fluid/contrib/layers (#52820)

* relocate metri_op.py * reloacte nn.py * fix bug * fix bug * fix bug * fix bug * fix bug * fix bug * fix variable->tensor and fix __all__ * fix ctr_metric_bundle and sparse_embedding * fix bug of function init * fix bug of importing sparse_embedding and ctr_metric_bundle * fix bug * Update __init__.py

relocate paddle/fluid/contrib/layers (#52820)
* relocate metri_op.py * reloacte nn.py * fix bug * fix bug * fix bug * fix bug * fix bug * fix bug * fix variable->tensor and fix __all__ * fix ctr_metric_bundle and sparse_embedding * fix bug of function init * fix bug of importing sparse_embedding and ctr_metric_bundle * fix bug * Update __init__.py
802be98b · LoneRanger · GitHub · cbfd43e4 · 802be98b · 802be98b
40 changed file
--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
@@ -57,7 +57,7 @@ void TDMChildInner(const framework::ExecutionContext &context,
        input_data[input_ids],
        node_nums,
        platform::errors::InvalidArgument(
-            "input id of OP(fluid.contrib.layers.tdm_child) "
+            "input id of OP(paddle.incubate.layers.tdm_child) "
            "expected >= 0 and < %ld, but got %ld. Please check input "
            "value.",
            node_nums,
@@ -66,7 +66,7 @@ void TDMChildInner(const framework::ExecutionContext &context,
        0,
        input_data[input_ids],
        platform::errors::InvalidArgument(
-            "input id of OP(fluid.contrib.layers.tdm_child) "
+            "input id of OP(paddle.incubate.layers.tdm_child) "
            "expected >= 0 and < %ld, but got %ld. Please check input "
            "value.",
            node_nums,

--- a/python/paddle/fluid/contrib/__init__.py
+++ b/python/paddle/fluid/contrib/__init__.py
@@ -22,8 +22,7 @@ from . import extend_optimizer
 from .extend_optimizer import *
 from . import model_stat
 from .model_stat import *
-from . import layers
-from .layers import *
+
 from . import optimizer
 from .optimizer import *

@@ -32,5 +31,4 @@ __all__ = []
 __all__ += memory_usage_calc.__all__
 __all__ += op_frequence.__all__
 __all__ += extend_optimizer.__all__
-__all__ += layers.__all__
 __all__ += optimizer.__all__
--- a/python/paddle/fluid/contrib/layers/metric_op.py
+++ b/python/paddle/fluid/contrib/layers/metric_op.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Contrib layers just related to metric.
-"""
-import paddle
-import warnings
-import paddle
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import Variable
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layers import tensor
-import paddle
-
-__all__ = ['ctr_metric_bundle']
-
-
-def ctr_metric_bundle(input, label, ins_tag_weight=None):
-    """
-    ctr related metric layer
-
-    This function help compute the ctr related metrics: RMSE, MAE, predicted_ctr, q_value.
-    To compute the final values of these metrics, we should do following computations using
-    total instance number:
-    MAE = local_abserr / instance number
-    RMSE = sqrt(local_sqrerr / instance number)
-    predicted_ctr = local_prob / instance number
-    q = local_q / instance number
-    Note that if you are doing distribute job, you should all reduce these metrics and instance
-    number first
-
-    Args:
-        input(Tensor): A floating-point 2D Tensor, values are in the range
-                         [0, 1]. Each row is sorted in descending order. This
-                         input should be the output of topk. Typically, this
-                         Tensor indicates the probability of each label.
-        label(Tensor): A 2D int Tensor indicating the label of the training
-                         data. The height is batch size and width is always 1.
-        ins_tag_weight(Tensor): A 2D int Tensor indicating the ins_tag_weight of the training
-                         data. 1 means real data, 0 means fake data.
-                         A LoDTensor or Tensor with type float32,float64.
-
-    Returns:
-        local_sqrerr(Tensor): Local sum of squared error
-        local_abserr(Tensor): Local sum of abs error
-        local_prob(Tensor): Local sum of predicted ctr
-        local_q(Tensor): Local sum of q value
-
-    Examples 1:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
-            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
-            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label)
-    Examples 2:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
-            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
-            ins_tag_weight = paddle.static.data(name='ins_tag', shape=[-1,16], lod_level=0, dtype='int64')
-            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label, ins_tag_weight=ins_tag_weight)
-
-    """
-    if ins_tag_weight is None:
-        ins_tag_weight = paddle.tensor.fill_constant(
-            shape=[1, 1], dtype="float32", value=1.0
-        )
-
-    assert input.shape == label.shape
-    helper = LayerHelper("ctr_metric_bundle", **locals())
-
-    local_abserr = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-    local_sqrerr = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-    local_prob = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-    local_q = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-    local_pos_num = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-    local_ins_num = helper.create_global_variable(
-        persistable=True, dtype='float32', shape=[1]
-    )
-
-    tmp_res_elesub = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[-1]
-    )
-    tmp_res_sigmoid = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[-1]
-    )
-    tmp_ones = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[-1]
-    )
-
-    batch_prob = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    batch_abserr = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    batch_sqrerr = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    batch_q = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    batch_pos_num = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    batch_ins_num = helper.create_global_variable(
-        persistable=False, dtype='float32', shape=[1]
-    )
-    for var in [
-        local_abserr,
-        batch_abserr,
-        local_sqrerr,
-        batch_sqrerr,
-        local_prob,
-        batch_prob,
-        local_q,
-        batch_q,
-        batch_pos_num,
-        batch_ins_num,
-        local_pos_num,
-        local_ins_num,
-    ]:
-        helper.set_variable_initializer(
-            var,
-            paddle.nn.initializer.ConstantInitializer(
-                value=0.0, force_cpu=True
-            ),
-        )
-
-    helper.append_op(
-        type="elementwise_sub",
-        inputs={"X": [input], "Y": [label]},
-        outputs={"Out": [tmp_res_elesub]},
-    )
-
-    helper.append_op(
-        type="squared_l2_norm",
-        inputs={"X": [tmp_res_elesub]},
-        outputs={"Out": [batch_sqrerr]},
-    )
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_sqrerr], "Y": [local_sqrerr]},
-        outputs={"Out": [local_sqrerr]},
-    )
-
-    helper.append_op(
-        type="l1_norm",
-        inputs={"X": [tmp_res_elesub]},
-        outputs={"Out": [batch_abserr]},
-    )
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_abserr], "Y": [local_abserr]},
-        outputs={"Out": [local_abserr]},
-    )
-
-    helper.append_op(
-        type="reduce_sum", inputs={"X": [input]}, outputs={"Out": [batch_prob]}
-    )
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_prob], "Y": [local_prob]},
-        outputs={"Out": [local_prob]},
-    )
-    helper.append_op(
-        type="sigmoid",
-        inputs={"X": [input]},
-        outputs={"Out": [tmp_res_sigmoid]},
-    )
-    helper.append_op(
-        type="reduce_sum",
-        inputs={"X": [tmp_res_sigmoid]},
-        outputs={"Out": [batch_q]},
-    )
-
-    helper.append_op(
-        type="reduce_sum",
-        inputs={"X": [label]},
-        outputs={"Out": [batch_pos_num]},
-    )
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_pos_num], "Y": [local_pos_num]},
-        outputs={"Out": [local_pos_num]},
-    )
-
-    helper.append_op(
-        type='fill_constant_batch_size_like',
-        inputs={"Input": label},
-        outputs={'Out': [tmp_ones]},
-        attrs={
-            'shape': [-1, 1],
-            'dtype': tmp_ones.dtype,
-            'value': float(1.0),
-        },
-    )
-    helper.append_op(
-        type="reduce_sum",
-        inputs={"X": [tmp_ones]},
-        outputs={"Out": [batch_ins_num]},
-    )
-
-    # if data is fake, return 0
-    inputs_slice = {'Input': ins_tag_weight}
-    attrs = {'axes': [0]}
-    attrs['starts'] = [0]
-    attrs['ends'] = [1]
-    helper.append_op(
-        type="slice",
-        inputs=inputs_slice,
-        attrs=attrs,
-        outputs={"Out": ins_tag_weight},
-    )
-
-    axis = helper.kwargs.get('axis', 0)
-    helper.append_op(
-        type="elementwise_mul",
-        inputs={"X": [batch_ins_num], "Y": [ins_tag_weight]},
-        outputs={"Out": [batch_ins_num]},
-        attrs={'axis': axis},
-    )
-
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_ins_num], "Y": [local_ins_num]},
-        outputs={"Out": [local_ins_num]},
-    )
-
-    helper.append_op(
-        type="elementwise_mul",
-        inputs={"X": [batch_q], "Y": [ins_tag_weight]},
-        outputs={"Out": [batch_q]},
-        attrs={'axis': axis},
-    )
-    helper.append_op(
-        type="elementwise_add",
-        inputs={"X": [batch_q], "Y": [local_q]},
-        outputs={"Out": [local_q]},
-    )
-
-    return (
-        local_sqrerr,
-        local_abserr,
-        local_prob,
-        local_q,
-        local_pos_num,
-        local_ins_num,
-    )
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -197,7 +197,7 @@ def embedding(
    if is_distributed:
        is_distributed = False
        warnings.warn(
-            "is_distributed is go out of use, `fluid.contrib.layers.sparse_embedding` is your needed"
+            "is_distributed is go out of use, `paddle.static.nn.sparse_embedding` is your needed"
        )

    remote_prefetch = True if is_sparse else False
@@ -227,128 +227,6 @@ def embedding(
    return tmp


-def _pull_gpups_sparse(
-    input, size, dtype='float32', is_distributed=False, is_sparse=False
-):
-    r"""
-    **Pull GpuPS Sparse Layer**
-
-    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
-    GpuPS lookup table. The result of this lookup is the embedding of each ID in the
-    :attr:`input`.
-
-    Args:
-        input(Variable|list of Variable): Input is a Tensor<int64> Variable, which
-            contains the IDs information.
-        size(int|list of int): The embedding size parameter of each input, which indicates the size of
-            each embedding vector respectively.
-        dtype(str): The dtype refers to the data type of output tensor. Only supports
-        float32 now.
-
-    Returns:
-        Variable|list of Variable: The tensor variable storing the embeddings of the \
-                  supplied inputs, whose size are indicated by size respectively.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          slots = []
-          data_1 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
-          slots.append(data_1)
-          data_2 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
-          slots.append(data_2)
-          embs = fluid.layers.pull_gpups_sparse(input=slots, size=[11, 35])
-    """
-    helper = LayerHelper('pull_gpups_sparse', **locals())
-    if dtype != 'float32':
-        raise ValueError(
-            "GpuPS only support float type embedding now, and your type is: "
-            + dtype
-        )
-    helper.input_dtype()
-    inputs = helper.multiple_input()
-    outs = [
-        helper.create_variable_for_type_inference(dtype)
-        for i in range(len(inputs))
-    ]
-    w = helper.create_parameter(
-        attr=helper.param_attr, shape=[size[0]], dtype=dtype, is_bias=False
-    )
-    helper.append_op(
-        type='pull_gpups_sparse',
-        inputs={'Ids': inputs, 'W': w},
-        outputs={'Out': outs},
-        attrs={
-            'size': size,
-            'is_distributed': is_distributed,
-            'is_sparse': is_sparse,
-        },
-    )
-    if len(outs) == 1:
-        return outs[0]
-    return outs
-
-
-def _pull_box_sparse(
-    input, size, dtype='float32', is_distributed=False, is_sparse=False
-):
-    r"""
-    **Pull Box Sparse Layer**
-
-    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
-    BoxPS lookup table. The result of this lookup is the embedding of each ID in the
-    :attr:`input`.
-
-    Args:
-        input(Variable|list of Variable): Input is a Tensor<int64> Variable, which
-            contains the IDs information.
-        size(int): The embedding size parameter, which indicates the size of
-            each embedding vector respectively.
-        dtype(str): The dtype refers to the data type of output tensor. Only supports
-        float32 now.
-
-    Returns:
-        Variable|list of Variable: The tensor variable storing the embeddings of the \
-                  supplied inputs.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          data = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
-          emb = fluid.layers.pull_box_sparse(input=data, size=[11])
-    """
-    helper = LayerHelper('pull_box_sparse', **locals())
-    if dtype != 'float32':
-        raise ValueError(
-            "BoxPS only support float type embedding now, and your type is: "
-            + dtype
-        )
-    helper.input_dtype()
-    inputs = helper.multiple_input()
-    outs = [
-        helper.create_variable_for_type_inference(dtype)
-        for i in range(len(inputs))
-    ]
-    w = helper.create_parameter(
-        attr=helper.param_attr, shape=[size], dtype=dtype, is_bias=False
-    )
-    helper.append_op(
-        type='pull_box_sparse',
-        inputs={'Ids': inputs, 'W': w},
-        outputs={'Out': outs},
-        attrs={
-            'size': size,
-            'is_distributed': is_distributed,
-            'is_sparse': is_sparse,
-        },
-    )
-    if len(outs) == 1:
-        return outs[0]
-    return outs
-
-
 def autoincreased_step_counter(counter_name=None, begin=1, step=1):
    """
    :api_attr: Static Graph

--- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
@@ -96,7 +96,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):

        entry = paddle.distributed.ShowClickEntry("show", "click")
        dnn_layer_dims = [128, 64, 32]
-        dnn_embedding = fluid.contrib.layers.sparse_embedding(
+        dnn_embedding = paddle.static.nn.sparse_embedding(
            input=dnn_data,
            size=[dnn_input_dim, dnn_layer_dims[0]],
            is_test=inference,
@@ -120,7 +120,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
            dnn_out = fc

        # build lr model
-        lr_embbding = fluid.contrib.layers.sparse_embedding(
+        lr_embbding = paddle.static.nn.sparse_embedding(
            input=lr_data,
            size=[lr_input_dim, 1],
            is_test=inference,

--- a/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py
@@ -19,6 +19,7 @@ import numpy as np
 from eager_op_test import OpTest, paddle_static_guard

 import paddle
+from paddle.incubate.layers.nn import bilateral_slice


 class Gsz:
@@ -202,7 +203,7 @@ class TestBilateralSliceApi(unittest.TestCase):
            grid = paddle.static.data(
                name='grid', shape=[None, None, 8, 5, 3], dtype='float32'
            )
-            paddle.fluid.contrib.layers.bilateral_slice(x, guide, grid, False)
+            bilateral_slice(x, guide, grid, False)

            if not paddle.fluid.is_compiled_with_cuda():
                return
@@ -212,7 +213,7 @@ class TestBilateralSliceApi(unittest.TestCase):
                guide1 = paddle.rand([3, 50, 30])
                grid1 = paddle.rand([3, 2, 2, 5, 3])

-                paddle.fluid.contrib.bilateral_slice(x1, guide1, grid1, False)
+                bilateral_slice(x1, guide1, grid1, False)


 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/test_boxps.py
+++ b/python/paddle/fluid/tests/unittests/test_boxps.py
@@ -18,7 +18,7 @@ import paddle
 from paddle import fluid
 from paddle.distributed.transpiler import collective
 from paddle.fluid import core
-from paddle.fluid.layers.nn import _pull_box_sparse
+from paddle.incubate.layers.nn import _pull_box_sparse


 class TestTranspile(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
@@ -73,7 +73,7 @@ class TestPSMinimize(unittest.TestCase):
            name="1", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -105,7 +105,7 @@ class TestPSMinimize(unittest.TestCase):
            name="2", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -136,7 +136,7 @@ class TestPSMinimize(unittest.TestCase):
            name="3", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -73,7 +73,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="1", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -105,7 +105,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="2", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="3", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -76,7 +76,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="1", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -108,7 +108,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="2", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -139,7 +139,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="3", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -109,7 +109,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -76,7 +76,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -109,7 +109,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -73,7 +73,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -105,7 +105,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -73,7 +73,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -105,7 +105,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase):
            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
@@ -214,7 +214,7 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase):
            init = paddle.nn.initializer.Uniform()

            dnn_layer_dims = [128, 64, 32]
-            dnn_embedding = fluid.contrib.layers.sparse_embedding(
+            dnn_embedding = paddle.static.nn.sparse_embedding(
                input=dnn_data,
                size=[dnn_input_dim, dnn_layer_dims[0]],
                is_test=inference,
@@ -239,7 +239,7 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase):
                dnn_out = fc

            # build lr model
-            lr_embbding = fluid.contrib.layers.sparse_embedding(
+            lr_embbding = paddle.static.nn.sparse_embedding(
                input=lr_data,
                size=[lr_input_dim, 1],
                is_test=inference,

--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
@@ -71,7 +71,7 @@ class TestSPMT(unittest.TestCase):
            name="1", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        q_emb = fluid.contrib.layers.sparse_embedding(
+        q_emb = paddle.static.nn.sparse_embedding(
            input=q,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -103,7 +103,7 @@ class TestSPMT(unittest.TestCase):
            name="2", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        pt_emb = fluid.contrib.layers.sparse_embedding(
+        pt_emb = paddle.static.nn.sparse_embedding(
            input=pt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(
@@ -134,7 +134,7 @@ class TestSPMT(unittest.TestCase):
            name="3", shape=[-1, 1], dtype="int64", lod_level=1
        )
        # embedding
-        nt_emb = fluid.contrib.layers.sparse_embedding(
+        nt_emb = paddle.static.nn.sparse_embedding(
            input=nt,
            size=[dict_dim, emb_dim],
            param_attr=fluid.ParamAttr(

--- a/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py
@@ -23,6 +23,7 @@ from paddle.incubate.distributed.fleet.parameter_server.distribute_transpiler im
 from paddle.incubate.distributed.fleet.parameter_server.distribute_transpiler.distributed_strategy import (
    StrategyFactory,
 )
+from paddle.incubate.layers.nn import search_pyramid_hash


 class TestPyramidHashOpApi(unittest.TestCase):
@@ -33,7 +34,7 @@ class TestPyramidHashOpApi(unittest.TestCase):
        x = paddle.static.data(
            name='x', shape=x_shape, dtype='int32', lod_level=1
        )
-        hash_embd = fluid.contrib.layers.search_pyramid_hash(
+        hash_embd = search_pyramid_hash(
            input=x,
            num_emb=embed_dim,
            space_len=num_voc * embed_dim,

--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -97,7 +97,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                act=None,
                data_layout='NHWC',
            )
-            fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(
+            fused_bn_add_act = paddle.incubate.layers.nn.fused_bn_add_act(
                conv1_2,
                bn,
                param_attr=self.bn_param_attr2,

--- a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
@@ -20,6 +20,7 @@ from eager_op_test import OpTest, paddle_static_guard, skip_check_grad_ci

 import paddle
 import paddle.version as ver
+from paddle.incubate.layers.nn import fused_embedding_seq_pool


 @skip_check_grad_ci(
@@ -114,7 +115,7 @@ class TestFusedEmbeddingSeqPoolApi(unittest.TestCase):
                    name='word', shape=[-1, 1], dtype='int64', lod_level=1
                )
                padding_idx = np.random.randint(1, 10)
-                out = fluid.contrib.fused_embedding_seq_pool(
+                out = fused_embedding_seq_pool(
                    input=data_t,
                    size=[dict_size, 32],
                    param_attr='w',

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -26,6 +26,13 @@ from paddle import fluid
 from paddle.fluid import core, layers, nets
 from paddle.fluid.dygraph import base, to_variable
 from paddle.fluid.framework import Program, default_main_program, program_guard
+from paddle.incubate.layers.nn import (
+    batch_fc,
+    partial_concat,
+    partial_sum,
+    rank_attention,
+    shuffle_batch,
+)
 from paddle.tensor import random


@@ -2145,9 +2152,9 @@ class TestBook(LayerTest):
            x = paddle.static.data(
                name='X', shape=[-1, 4, 50], dtype='float32', lod_level=0
            )
-            out1 = fluid.contrib.layers.shuffle_batch(x)
+            out1 = shuffle_batch(x)
            default_main_program().random_seed = 1000
-            out2 = fluid.contrib.layers.shuffle_batch(x)
+            out2 = shuffle_batch(x)
            self.assertIsNotNone(out1)
            self.assertIsNotNone(out2)
            return out1
@@ -2156,9 +2163,7 @@ class TestBook(LayerTest):
        with self.static_graph():
            x = paddle.static.data(name="x", shape=[None, 3], dtype="float32")
            y = paddle.static.data(name="y", shape=[None, 3], dtype="float32")
-            sum = fluid.contrib.layers.partial_sum(
-                [x, y], start_index=0, length=2
-            )
+            sum = partial_sum([x, y], start_index=0, length=2)
            return sum

    def test_batch_fc(self):
@@ -2166,7 +2171,7 @@ class TestBook(LayerTest):
            input = paddle.static.data(
                name="input", shape=[16, 2, 3], dtype="float32"
            )
-            out = fluid.contrib.layers.batch_fc(
+            out = batch_fc(
                input=input,
                param_size=[16, 3, 10],
                param_attr=fluid.ParamAttr(
@@ -2192,7 +2197,7 @@ class TestBook(LayerTest):
            rank_offset = paddle.static.data(
                name="rank_offset", shape=[None, 7], dtype="int32"
            )
-            out = fluid.contrib.layers.rank_attention(
+            out = rank_attention(
                input=input,
                rank_offset=rank_offset,
                rank_param_shape=[18, 3],
@@ -2263,12 +2268,8 @@ class TestBook(LayerTest):
        with self.static_graph():
            x = paddle.static.data(name="x", shape=[None, 3], dtype="float32")
            y = paddle.static.data(name="y", shape=[None, 3], dtype="float32")
-            concat1 = fluid.contrib.layers.partial_concat(
-                [x, y], start_index=0, length=2
-            )
-            concat2 = fluid.contrib.layers.partial_concat(
-                x, start_index=0, length=-1
-            )
+            concat1 = partial_concat([x, y], start_index=0, length=2)
+            concat2 = partial_concat(x, start_index=0, length=-1)
            return concat1, concat2

    def test_addmm(self):

--- a/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py
@@ -15,7 +15,7 @@
 import unittest

 import paddle
-from paddle.fluid.contrib.layers.nn import pow2_decay_with_linear_warmup
+from paddle.incubate.layers.nn import pow2_decay_with_linear_warmup
 from paddle.optimizer.lr import LinearWarmup, PolynomialDecay



--- a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
@@ -18,7 +18,7 @@ import numpy as np

 import paddle
 from paddle import fluid
-from paddle.fluid.layers.nn import _pull_gpups_sparse
+from paddle.incubate.layers import _pull_gpups_sparse

 paddle.enable_static()


--- a/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
@@ -18,6 +18,7 @@ import numpy as np

 import paddle
 from paddle import fluid
+from paddle.incubate.layers.nn import search_pyramid_hash


 class TestPyramidHashOpApi(unittest.TestCase):
@@ -28,7 +29,7 @@ class TestPyramidHashOpApi(unittest.TestCase):
        x = paddle.static.data(
            name='x', shape=x_shape, dtype='int32', lod_level=1
        )
-        hash_embd = fluid.contrib.search_pyramid_hash(
+        hash_embd = search_pyramid_hash(
            input=x,
            num_emb=embed_dim,
            space_len=num_voc * embed_dim,

--- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
@@ -19,6 +19,7 @@ from eager_op_test import OpTest, paddle_static_guard

 import paddle
 from paddle import fluid
+from paddle.incubate.layers.nn import tdm_child


 def create_tdm_tree():
@@ -147,7 +148,7 @@ class TestTDMChildShape(unittest.TestCase):
            tdm_tree_info = create_tdm_tree()
            tree_info_np = np.array(tdm_tree_info).astype('int32')

-            child, leaf_mask = fluid.contrib.layers.tdm_child(
+            child, leaf_mask = tdm_child(
                x=x,
                node_nums=26,
                child_nums=2,

--- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
@@ -20,6 +20,7 @@ from eager_op_test import OpTest, paddle_static_guard
 import paddle
 from paddle import fluid
 from paddle.fluid import core
+from paddle.incubate.layers.nn import tdm_sampler


 def create_tdm_travel():
@@ -284,7 +285,7 @@ class TestTDMSamplerShape(unittest.TestCase):
            neg_samples_num_list = [1, 2, 3, 4]
            leaf_node_num = 13

-            sample, label, mask = fluid.contrib.layers.tdm_sampler(
+            sample, label, mask = tdm_sampler(
                x,
                neg_samples_num_list,
                layer_node_num_list,

--- a/python/paddle/fluid/tests/unittests/test_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_op.py
@@ -21,6 +21,7 @@ from paddle import fluid
 from paddle.fluid import core
 from paddle.fluid.backward import append_backward
 from paddle.fluid.executor import Executor
+from paddle.incubate.layers.nn import shuffle_batch

 paddle.enable_static()

@@ -145,7 +146,7 @@ class TestIgnoreVarNameInWhile(unittest.TestCase):

        def body_func(i, ten, batch_info, origin_seq):
            print(batch_info)
-            batch_info = fluid.contrib.layers.shuffle_batch(batch_info)
+            batch_info = shuffle_batch(batch_info)
            print(batch_info)
            i = i + 1
            return [i, ten, batch_info, origin_seq]

--- a/python/paddle/incubate/__init__.py
+++ b/python/paddle/incubate/__init__.py
@@ -35,6 +35,7 @@ from . import autotune  # noqa: F401
 from . import nn  # noqa: F401
 from . import asp  # noqa: F401
 from . import multiprocessing  # noqa: F401
+from . import layers

 from .nn.loss import identity_loss


--- a/python/paddle/incubate/distributed/fleet/fleet_util.py
+++ b/python/paddle/incubate/distributed/fleet/fleet_util.py
@@ -1387,7 +1387,7 @@ class FleetUtil:
                                               label=label, curve='ROC',\
                                               num_thresholds=4096)
              local_sqrerr, local_abserr, local_prob, local_q, local_pos_ins,\
-                  local_total_ins = fluid.contrib.layers.ctr_metric_bundle(\
+                  local_total_ins = paddle.static.ctr_metric_bundle(\
                      similarity_norm, label)

        """
@@ -1587,7 +1587,7 @@ class FleetUtil:
                                               label=label, curve='ROC',\
                                               num_thresholds=4096)
              local_sqrerr, local_abserr, local_prob, local_q, local_pos_ins, \
-                  local_total_ins = fluid.contrib.layers.ctr_metric_bundle(\
+                  local_total_ins = paddle.static.ctr_metric_bundle(\
                      similarity_norm, label)

        """

--- a/python/paddle/fluid/contrib/layers/__init__.py
+++ b/python/paddle/fluid/contrib/layers/__init__.py
@@ -13,11 +13,25 @@
 # limitations under the License.

 from . import nn
-from .nn import *
-
-from . import metric_op
-from .metric_op import *
+from .nn import (
+    fused_embedding_seq_pool,
+    fused_seqpool_cvm,
+    multiclass_nms2,
+    search_pyramid_hash,
+    shuffle_batch,
+    partial_concat,
+    partial_sum,
+    tdm_child,
+    tdm_sampler,
+    rank_attention,
+    batch_fc,
+    _pull_box_extended_sparse,
+    bilateral_slice,
+    correlation,
+    fused_bn_add_act,
+    pow2_decay_with_linear_warmup,
+    _pull_gpups_sparse,
+    _pull_box_sparse,
+)

 __all__ = []
-__all__ += nn.__all__
-__all__ += metric_op.__all__
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -12,50 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Contrib layers just related to the neural network.
+incubate layers just related to the neural network.
 """

-import os
 import warnings
-import inspect

 import numpy as np
+
 import paddle
-from paddle.fluid.layer_helper import LayerHelper
-from ... import unique_name
+from paddle import _legacy_C_ops
+from paddle.fluid import core, unique_name
 from paddle.fluid.data_feeder import (
-    check_variable_and_dtype,
-    check_type,
    check_dtype,
-    convert_dtype,
+    check_type,
+    check_variable_and_dtype,
 )
-
-from paddle.fluid import core
-from paddle.fluid.param_attr import ParamAttr
-
 from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_
-import paddle
-import warnings
-from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.param_attr import ParamAttr

-__all__ = [
-    'fused_embedding_seq_pool',
-    'multiclass_nms2',
-    'search_pyramid_hash',
-    'shuffle_batch',
-    'partial_concat',
-    'sparse_embedding',
-    'partial_sum',
-    'tdm_child',
-    'rank_attention',
-    'tdm_sampler',
-    'batch_fc',
-    '_pull_box_extended_sparse',
-    'bilateral_slice',
-    'correlation',
-    'fused_bn_add_act',
-    'fused_seqpool_cvm',
-]
+__all__ = []


 def fused_embedding_seq_pool(
@@ -73,7 +49,7 @@ def fused_embedding_seq_pool(
    This layer is the fusion of lookup table and sequence_pool.

    Args:
-        input (Variable): Input is a Tensor<int64> Variable, which contains the IDs' information.
+        input (Tensor): Input is a Tensor<int64> , which contains the IDs' information.
            The value of the input IDs should satisfy :math:`0<= id < size[0]`.
        size (tuple|list): The shape of the lookup_table parameter. It should
            have two elements which indicate the size of the dictionary of
@@ -91,7 +67,7 @@ def fused_embedding_seq_pool(
        dtype (np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output
            tensor. It can be float32, float_16, int etc.
    Returns:
-        The sequence pooling variable which is a Tensor.
+        The Tensor of sequence pooling.
    Examples:
        .. code-block:: python
            import numpy as np
@@ -103,7 +79,7 @@ def fused_embedding_seq_pool(
            data_t = paddle.static.data(
                name='word', shape=[-1, 1], dtype='int64', lod_level=1)
            padding_idx = np.random.randint(1, 10)
-            out = fluid.contrib.fused_embedding_seq_pool(
+            out = paddle.incubate.layers.fused_embedding_seq_pool(
                input=data_t,
                size=[dict_size, 32],
                param_attr='w',
@@ -146,15 +122,15 @@ def fused_seqpool_cvm(
    **Note:** The Op only receives List of LoDTensor as input, only support SUM pooling now.

    Args:
-        input(Variable|list of Variable): Input is List of LoDTensor.
+        input(Tensor): Input is List of LoDTensor.
        pool_type(str): pooling type, only support SUM pooling now.
-        cvm(Variable): cvm Variable.
+        cvm(Tensor): cvm Tensor.
        pad_value(float, optional): padding value of sequence pool. Default: 0.0.
        use_cvm(bool, optional): use cvm or not. Default: True.
        cvm_offset(int, optional): cvm offset. Default: 2, which means cvm contains show, click.

    Returns:
-        Variable|list of Variable: The tensor variable storing sequence pool and cvm
+        Tensor : The tensor storing sequence pool and cvm
        of input.

    Examples:
@@ -167,14 +143,14 @@ def fused_seqpool_cvm(
            data = paddle.static.data(name='x', shape=[-1, 1], dtype='int64', lod_level=1)
            data2 = paddle.static.data(name='y', shape=[-1, 1], dtype='int64', lod_level=1)
            inputs = [data, data2]
-            embs = fluid.layers.nn._pull_box_sparse(input=inputs, size=11, is_distributed=True, is_sparse=True)
+            embs = paddle.incubate.layers.nn._pull_box_sparse(input=inputs, size=11, is_distributed=True, is_sparse=True)

            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64", lod_level=1)
            ones = fluid.layers.fill_constant_batch_size_like(input=label, shape=[-1, 1], dtype="int64", value=1)
            show_clk = paddle.cast(paddle.concat([ones, label], axis=1), dtype='float32')
            show_clk.stop_gradient = True

-            cvms = fluid.contrib.layers.fused_seqpool_cvm(embs, 'sum', show_clk)
+            cvms = paddle.incubate.layers.fused_seqpool_cvm(embs, 'sum', show_clk)


    """
@@ -243,7 +219,7 @@ def multiclass_nms2(
    per image if keep_top_k is larger than -1.

    Args:
-        bboxes (Variable): Two types of bboxes are supported:
+        bboxes (Tensor): Two types of bboxes are supported:
                           1. (Tensor) A 3-D Tensor with shape
                           [N, M, 4 or 8 16 24 32] represents the
                           predicted locations of M bounding bboxes,
@@ -253,7 +229,7 @@ def multiclass_nms2(
                           2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
                           M is the number of bounding boxes, C is the
                           class number
-        scores (Variable): Two types of scores are supported:
+        scores (Tensor): Two types of scores are supported:
                           1. (Tensor) A 3-D Tensor with shape [N, C, M]
                           represents the predicted confidence predictions.
                           N is the batch size, C is the class number, M is
@@ -283,8 +259,8 @@ def multiclass_nms2(
        name(str): Name of the multiclass nms op. Default: None.

    Returns:
-        A tuple with two Variables: (Out, Index) if return_index is True,
-        otherwise, a tuple with one Variable(Out) is returned.
+        A tuple with two dimensions of the tensor: (Out, Index) if return_index is True,
+        otherwise, a tuple with one dimension of the tensor(Out) is returned.
        Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
        Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
        or A 2-D LoDTensor with shape [No, 10] represents the detections.
@@ -311,7 +287,7 @@ def multiclass_nms2(
                                      dtype='float32', lod_level=1)
            scores = paddle.static.data(name='scores', shape=[-1, 81],
                                      dtype='float32', lod_level=1)
-            out, index = fluid.contrib.layers.multiclass_nms2(bboxes=boxes,
+            out, index = paddle.incubate.layers.multiclass_nms2(bboxes=boxes,
                                              scores=scores,
                                              background_label=0,
                                              score_threshold=0.5,
@@ -371,7 +347,7 @@ def search_pyramid_hash(
    **Pyramid hash embedding**

    Args:
-        input (Variable): LoDTensor<int32> Variable contained the IDs' information.
+        input (Tensor): LoDTensor<int32> Tensor contained the IDs' information.
        num_emb (int): The embedding size of output.
        space_len (int): The length of pyramid hash embedding space.
        pyramid_layer (int): The number of pyramid layers. It should be greater than 2.
@@ -396,9 +372,9 @@ def search_pyramid_hash(
            Used in Distribute Transpiler to create a trainer/server program.
        name(str, optional): The default value is None.  Normally there is no need for user to set this property.
            For more information, please refer to :ref:`api_guide_Name` .
-        dtype(str): The data type of output variable, float32.
+        dtype(str): The data type of output Tensor, float32.
    Returns:
-        Variable: LoDTensor of pyramid hash embedding.
+        Tensor: LoDTensor of pyramid hash embedding.
    """
    helper = LayerHelper('search_pyramid_hash', **locals())

@@ -438,7 +414,7 @@ def search_pyramid_hash(
        for param in distribute_update_vars:
            if param not in special_name_list:
                raise ValueError(
-                    "Pyramid Hash layer didn't have parameter {}".format(param)
+                    f"Pyramid Hash layer didn't have parameter {param}"
                )
        distribute_update_vars_str = ",".join(distribute_update_vars)

@@ -491,12 +467,12 @@ def shuffle_batch(x, seed=None):
        Out.dims = [4, 2]

    Args:
-        x (Variable): The input variable. The input variable is a N-D LoDTensor with type int, float32 or float64.
-        seed (None|int|Variable): The start up seed. If set, seed will be set as the start up seed of shuffle engine.
+        x (Tensor): The input Tensor. The input Tensor is a N-D LoDTensor with type int, float32 or float64.
+        seed (None|int|Tensor): The start up seed. If set, seed will be set as the start up seed of shuffle engine.
                If not set(Default), start up seed of shuffle engine will be generated randomly.

    Returns:
-        Variables: The shuffled LoDTensor with the same shape and lod as input.
+        Tensor: The shuffled LoDTensor with the same shape and lod as input.

    Examples:

@@ -506,7 +482,7 @@ def shuffle_batch(x, seed=None):
            import paddle
            paddle.enable_static()
            x = paddle.static.data(name="x", shape=[-1, 4])
-            out = fluid.contrib.layers.shuffle_batch(x)
+            out = paddle.incubate.layers.shuffle_batch(x)
    """
    helper = LayerHelper('shuffle_batch', **locals())

@@ -537,7 +513,7 @@ def partial_concat(input, start_index=0, length=-1):
    """
    **Partial Concat**
    This OP concatenates the inputs according to the start index and length. This
-    OP exists in contrib, which means that it is not shown to the public.
+    OP exists in incubate layers, which means that it is not shown to the public.
    Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
    performed along the second dimension.

@@ -563,14 +539,14 @@ def partial_concat(input, start_index=0, length=-1):
        length(int32): The length of each instance for partial concatenation. Default is -1.
            Negative values for all elements after start_index.
    Returns:
-        Variable: A Tensor with the same data type as input's.
+        Tensor: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid
            import paddle
            x = paddle.randn(name="x", shape=[1,3], dtype="float32")
            y = paddle.randn(name="y", shape=[1,3], dtype="float32")
-            concat = fluid.contrib.layers.partial_concat(
+            concat = paddle.incubate.layers.partial_concat(
                [x, y], start_index=0, length=2)
    """
    if not isinstance(input, list):
@@ -605,7 +581,7 @@ def partial_sum(input, start_index=0, length=-1):
    """
    **PartialSum**
    This Op can sum the vars by specifying the initial position(start_index) and length(length).
-    This Op exists in contrib, which means that it is not shown to the public.
+    This Op exists in incubate layers, which means that it is not shown to the public.
    Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
    performed along the second dimension.
    .. code-block:: text
@@ -624,10 +600,9 @@ def partial_sum(input, start_index=0, length=-1):
        input(list): List of input Tensors with data type float32, float64, int32,
            int64.
    Returns:
-        Variable: A Tensor with the same data type as input's.
+        Tensor: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
-        import paddle.fluid.layers as layers
        import paddle.fluid as fluid
        import numpy as np
        import paddle
@@ -635,7 +610,7 @@ def partial_sum(input, start_index=0, length=-1):

        x = paddle.static.data(name="x", shape=[2, 3], dtype="float32")
        y = paddle.static.data(name="y", shape=[2, 3], dtype="float32")
-        sum = fluid.contrib.layers.partial_sum([x,y], start_index=0, length=2)
+        sum = paddle.incubate.layers.partial_sum([x,y], start_index=0, length=2)
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        xx = np.array([1,2,3,4,5,6]).reshape((2,3)).astype("float32")
@@ -662,203 +637,6 @@ def partial_sum(input, start_index=0, length=-1):
    return out


-def sparse_embedding(
-    input,
-    size,
-    padding_idx=None,
-    is_test=False,
-    entry=None,
-    table_class="MemorySparseTable",
-    param_attr=None,
-    dtype='float32',
-    slot=None,
-):
-    r"""
-    :api_attr: Static Graph
-
-    The OP is used as the operator of the Embedding Lookup layer in the large-scale
-    sparse training of the parameter server mode, instead of using the paddle.nn.functional.embedding.
-
-    The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
-    It automatically constructs a 2D embedding matrix based on the input :attr:`size`
-    (vocab_size, emb_size) and :attr:`dtype` .
-
-    The shape of output Tensor is generated by appending an emb_size dimension to the
-    last dimension of the input Tensor shape.
-
-    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise
-    the program will throw an exception and exit.
-
-    .. code-block:: text
-
-        Case 1:
-
-        input is a Tensor. padding_idx = -1
-            input.data = [[1, 3], [2, 4], [4, 127]]
-            input.shape = [3, 2]
-        Given size = [128, 16]
-        output is a Tensor:
-            out.shape = [3, 2, 16]
-            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
-                        [0.345421456, 0.524563927, ..., 0.144534654]],
-
-                        [[0.345249859, 0.124939536, ..., 0.194353745],
-                        [0.945345345, 0.435394634, ..., 0.435345365]],
-
-                        [[0.945345345, 0.435394634, ..., 0.435345365],
-                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
-        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
-        It will pad all-zero data when ids is 127.
-
-        Case 2:
-
-        input is a LoDTensor with 1-level LoD. padding_idx = 0
-            input.lod = [[2, 3]]
-            input.data = [[1], [3], [2], [4], [0]]
-            input.shape = [5, 1]
-        Given size = [128, 16]
-        output is a LoDTensor:
-            out.lod = [[2, 3]]
-            out.shape = [5, 1, 16]
-            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
-                        [[0.345421456, 0.524563927, ..., 0.144534654]],
-                        [[0.345249859, 0.124939536, ..., 0.194353745]],
-                        [[0.945345345, 0.435394634, ..., 0.435345365]],
-                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
-        It will pad all-zero data when ids is 0.
-
-    Args:
-        input(Variable): A Tensor or LoDTensor with type int64, which contains the id
-            information. The value of the input id should satisfy :math:`0<= id < size[0]` .
-        size(tuple|list): The shape of lookup table parameter (vocab_size, emb_size). It
-            should have two elements which indicates the size of the dictionary of embeddings
-            and the size of each embedding vector respectively. The initial parameter size
-            is 0 in the large-scale sparse scenario, which will gradually expand with the
-            training. So if vocab_size is temporarily useless, its value can be any integer.
-            The emb_size is the dimensional configuration of the word embedding weight parameter.
-        padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-vocab_size, vocab_size).
-            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
-            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever
-            lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated
-            while training. If set None, it makes no efe mfect to output. Default: None.
-        is_test(bool, optional): Training or prediction mode. In prediction mode (is_test=False),
-            the output is not initialized and created, and it is filled with 0 and returned. Default: False.
-        entry(str, optional): Entry config with parameter server whose value is ProbabilityEntry,
-            CountFilterEntry or None. Default: None.
-        table_class(str, optional): The type of the sparse table. The value can be CommonSparseTable
-            or SSDSparseTable. The default is CommonSparseTable.
-        param_attr(ParamAttr, optional): To specify the weight parameter property. Default: None, which means the
-            default weight parameter property is used. In addition, user-defined or pre-trained word
-            vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs
-            to be transformed into numpy format, and the shape of local word vector should be consistent
-            with :attr:`size` .
-        dtype(str): It refers to the data type of output Tensor. It must be float32 or
-            float64. Default: float32.
-
-    Returns:
-        Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            paddle.enable_static()
-            sparse_feature_dim = 1024
-            embedding_size = 64
-
-            # Only when the feature appear more than 10 times or more will be participated in the training.
-            entry = paddle.distributed.CountFilterEntry(10)
-
-            input = paddle.static.data(name='ins', shape=[1], dtype='int64')
-
-            emb = paddle.static.nn.sparse_embedding(
-                input=input,
-                size=[sparse_feature_dim, embedding_size],
-                is_test=False,
-                entry=entry,
-                param_attr=paddle.ParamAttr(name="SparseFeatFactors",
-                initializer=paddle.nn.initializer.Uniform()))
-
-    """
-
-    helper = LayerHelper('sparse_embedding', **locals())
-
-    check_variable_and_dtype(
-        input, 'input', ['int64'], 'fluid.contrib.layers.sparse_embedding'
-    )
-
-    check_dtype(
-        dtype,
-        'dtype',
-        ['float32', 'float64'],
-        'paddle.static.nn.sparse_embedding',
-    )
-
-    if input.size == 0:
-        raise ValueError("input size should not be 0")
-
-    w = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=size,
-        type=core.VarDesc.VarType.SELECTED_ROWS,
-        dtype=dtype,
-        is_bias=False,
-    )
-
-    tmp = helper.create_variable_for_type_inference(dtype)
-
-    padding_idx = (
-        -1
-        if padding_idx is None
-        else padding_idx
-        if padding_idx >= 0
-        else (size[0] + padding_idx)
-    )
-
-    if table_class not in [
-        "CommonSparseTable",
-        "SSDSparseTable",
-        "MemorySparseTable",
-    ]:
-        raise ValueError(
-            "table_class must be in [CommonSparseTable, SSDSparseTable, MemorySparseTable]"
-        )
-
-    entry_str = "none"
-
-    if entry is not None:
-        if entry.__class__.__name__ not in [
-            "ProbabilityEntry",
-            "CountFilterEntry",
-            "ShowClickEntry",
-        ]:
-            raise ValueError(
-                "entry must be instance in [paddle.distributed.ProbabilityEntry, paddle.distributed.CountFilterEntry, paddle.distributed.ShowClickEntry]"
-            )
-        entry_str = entry._to_attr()
-
-    if slot is None:
-        slot = 0
-
-    helper.append_op(
-        type='lookup_table',
-        inputs={'Ids': input, 'W': w},
-        outputs={'Out': tmp},
-        attrs={
-            'padding_idx': padding_idx,
-            'is_sparse': True,
-            'is_distributed': True,
-            'remote_prefetch': True,
-            'is_test': is_test,
-            'entry': entry_str,
-            'table_class': table_class,
-            'slot': slot,
-        },
-    )
-    return tmp
-
-
 def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
    """
    **Tdm Child**
@@ -878,7 +656,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
            leaf_mask = [[1, 1],
                         [0, 0]]
    Args:
-        x(Variable): Variable contained the node_id information, dtype support int32/int64.
+        x(Tensor): Tensor contained the node_id information, dtype support int32/int64.
        node_nums(int): Number of total nodes.
        child_nums(int): Maximum number of child nodes per node.
        param_attr(ParamAttr): To specify the tdm-tree-info parameter property. Default: None, which means the
@@ -893,7 +671,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
        dtype(str): The data type of output child and leaf_mask, support int32/int64.

    Returns:
-        tuple: A tuple including input node's child(Variable) and leaf_mask(Variable).
+        tuple: A tuple including input node's child(Tensor) and leaf_mask(Tensor).
            If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0.

    Examples:
@@ -910,7 +688,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
        tree_info_np = np.reshape(tree_info_np, (7,5))
        node_nums = 7
        child_nums = 2
-        child, leaf_mask  = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
+        child, leaf_mask  = paddle.incubate.layers.tdm_child(x, node_nums, child_nums,
                                param_attr=fluid.ParamAttr(
                                    initializer=paddle.nn.initializer.Assign(
                                                                            tree_info_np)))
@@ -922,7 +700,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
    """
    helper = LayerHelper("tdm_child", **locals())
    check_dtype(
-        dtype, 'dtype', ['int32', 'int64'], 'fluid.contrib.layers.tdm_child'
+        dtype, 'dtype', ['int32', 'int64'], 'paddle.incubate.layers.tdm_child'
    )
    c_dtype = convert_np_dtype_to_dtype_(dtype)
    tree_info = helper.create_parameter(
@@ -981,7 +759,7 @@ def tdm_sampler(
            mask = [[1, 1], [1, 1], [1, 1], [1, 1]]

    Args:
-        x (Variable): Variable contained the item_id(corresponding to leaf node) information, dtype support int32/int64.
+        x (Tensor): Tensor contained the item_id(corresponding to leaf node) information, dtype support int32/int64.
        neg_samples_num_list (list(int)): Number of negative samples per layer.
        layer_node_num_list (list(int)): Number of nodes per layer, must has same shape with neg_samples_num_list.
        leaf_node_num (int): Number of leaf nodes.
@@ -1003,7 +781,7 @@ def tdm_sampler(
            and if it is a negative sample, it is 0. If the tree is unbalanced, in order to ensure the consistency of the
            sampling result shape, the padding sample's mask = 0, the real sample's mask value = 1.
            If output_list = True, the result will organize into list format specified by layer information.
-            Output variable have same type with tdm-travel and tdm-layer parameter(tree_dtype).
+            Output Tensor have same type with tdm-travel and tdm-layer parameter(tree_dtype).

    Examples:
        .. code-block:: python
@@ -1022,7 +800,7 @@ def tdm_sampler(
        travel_array = np.array(travel_list)
        layer_array = np.array(layer_list_flat)

-        sample, label, mask = fluid.contrib.layers.tdm_sampler(
+        sample, label, mask = paddle.incubate.layers.tdm_sampler(
            x,
            neg_samples_num_list,
            layer_node_num_list,
@@ -1051,10 +829,10 @@ def tdm_sampler(
        tree_dtype,
        'tree_dtype',
        ['int32', 'int64'],
-        'fluid.contrib.layers.tdm_sampler',
+        'paddle.incubate.layers.tdm_sampler',
    )
    check_dtype(
-        dtype, 'dtype', ['int32', 'int64'], 'fluid.contrib.layers.tdm_sampler'
+        dtype, 'dtype', ['int32', 'int64'], 'paddle.incubate.layers.tdm_sampler'
    )
    c_dtype = convert_np_dtype_to_dtype_(dtype)

@@ -1189,7 +967,7 @@ def rank_attention(
    This Op can calculate rank attention between input and rank_param, and
    rank_param gives the organization of data. Notice: It currently supports
    GPU device.
-    This Op exists in contrib, which means that it is not shown to the public.
+    This Op exists in incubate layers, which means that it is not shown to the public.
    Args:
        input: Tensor with data type float32, float64.
        rank_offset: Tensor with data type int32.
@@ -1197,7 +975,7 @@ def rank_attention(
        rank_param_attr: Attribute initializer of rank_param.
        max_rank: The max rank of input's ranks.
    Returns:
-        Variable: A Tensor with the same data type as input's.
+        Tensor: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
           import paddle.fluid as fluid
@@ -1206,7 +984,7 @@ def rank_attention(

           input = paddle.static.data(name="input", shape=[None, 2], dtype="float32")
           rank_offset = paddle.static.data(name="rank_offset", shape=[None, 7], dtype="int32")
-           out = fluid.contrib.layers.rank_attention(input=input,
+           out = paddle.incubate.layers.rank_attention(input=input,
                                                     rank_offset=rank_offset,
                                                     rank_param_shape=[18,3],
                                                     rank_param_attr=
@@ -1248,7 +1026,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None):
    This Op can calculate BatchFC. This is similar to matmul op,
    except that the bias and relu activation layers are added.
    Notice: It currently supports GPU device.
-    This Op exists in contrib, which means that it is not shown to the public.
+    This Op exists in incubate layers, which means that it is not shown to the public.
    Args:
        input: Tensor with data type float32, float64.
        param_size: The size of w.
@@ -1258,7 +1036,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None):
        act: Activation to be applied to the output of this layer.

    Returns:
-        Variable: A Tensor with the same data type as input's.
+        Tensor: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
           import paddle.fluid as fluid
@@ -1267,7 +1045,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None):
           paddle.enable_static()

           input = paddle.static.data(name="input", shape=[16, 2, 3], dtype="float32")
-           out = fluid.contrib.layers.batch_fc(input=input,
+           out = paddle.incubate.layers.batch_fc(input=input,
                                               param_size=[16, 3, 10],
                                               param_attr=
                                               paddle.ParamAttr(learning_rate=1.0,
@@ -1312,7 +1090,7 @@ def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'):
    BoxPS lookup table. The result of this lookup is the embedding of each ID in the
    :attr:`input`.
    Args:
-        input(Variable|list of Variable): Input is a Tensor<int64> Variable, which
+        input(Tensor): Input is a Tensor<int64>, which
            contains the IDs information.
        size(int): The embedding size parameter, which indicates the size of
            each embedding vector respectively.
@@ -1321,13 +1099,13 @@ def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'):
        dtype(str): The dtype refers to the data type of output tensor. Only supports
      float32 now.
    Returns:
-        Variable|list of Variable: The tensor variable storing the embeddings of the \
+        Tensor: The tensor storing the embeddings of the \
                  supplied inputs.
    Examples:
        .. code-block:: python
          import paddle.fluid as fluid
          data = paddle.static.data(name='sequence', shape=[-1, 1], dtype='int64', lod_level=1)
-          emb, emb_ex = fluid.contrib.layers._pull_box_extended_sparse(input=data, size=8, extend_size=128)
+          emb, emb_ex = paddle.incubate.layers._pull_box_extended_sparse(input=data, size=8, extend_size=128)
    """
    helper = LayerHelper('pull_box_extended_sparse', **locals())
    helper.input_dtype()
@@ -1361,13 +1139,13 @@ def bilateral_slice(x, guide, grid, has_offset, name=None):
    For more information of bilateral slicing, please refer to Deep Bilateral Learning for Real-Time Image Enhancement <https://groups.csail.mit.edu/graphics/hdrnet/data/hdrnet.pdf>_

    Args:
-        x(Variable): The input tensor, which is a 4-D tensor with shape
+        x(Tensor): The input tensor, which is a 4-D tensor with shape
                     [N, C, H, W], N is the batch size, C is the channel
                     number, H and W is the feature height and width.
                     The data type is float32 and float64.
-        guide(Variable): Input grid tensor of shape [N, H, W]. The
+        guide(Tensor): Input grid tensor of shape [N, H, W]. The
                        data type is float32 and float64.
-        grid(Variable): Input grid tensor of shape [N, C, D, H, W]. The
+        grid(Tensor): Input grid tensor of shape [N, C, D, H, W]. The
                        data type is float32 and float64.
        has_offset(bool): Whether to slice with affine offset.
        name(str, optional): For detailed information, please refer
@@ -1375,7 +1153,7 @@ def bilateral_slice(x, guide, grid, has_offset, name=None):
                             None by default.

    Returns:
-        Variable: Output of shape [N, C, H, W]. The data type is same as input tensor.
+        Tensor: Output of shape [N, C, H, W]. The data type is same as input tensor.

    Examples:

@@ -1390,15 +1168,15 @@ def bilateral_slice(x, guide, grid, has_offset, name=None):
            grid = paddle.randn(name='grid', shape=[1, 12, 8, 10, 6], dtype='float32')

            # without offset
-            output = fluid.contrib.bilateral_slice(x, guide, grid, has_offset=False)
+            output = paddle.incubate.layers.bilateral_slice(x, guide, grid, has_offset=False)

            # has offset
-            output = fluid.contrib.bilateral_slice(x, guide, grid, has_offset=True)
+            output = paddle.incubate.layers.bilateral_slice(x, guide, grid, has_offset=True)

    """
    if paddle.fluid._non_static_mode():
        attrs = ('has_offset', has_offset)
-        return getattr(_legacy_C_ops, "bilateral_slice")(x, grid, guide, *attrs)
+        return _legacy_C_ops.bilateral_slice(x, grid, guide, *attrs)

    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'bilateral_slice')
    check_variable_and_dtype(
@@ -1463,7 +1241,7 @@ def correlation(
                                dtype="float32")


-            out = fluid.contrib.correlation(
+            out = paddle.incubate.layers.correlation(
                            x1,
                            x2,
                            pad_size=4,
@@ -1489,7 +1267,7 @@ def correlation(
            "corr_type_multiply",
            corr_type_multiply,
        )
-        output = getattr(_legacy_C_ops, "correlation")(x, y, *attrs)
+        output = _legacy_C_ops.correlation(x, y, *attrs)
    else:
        helper = LayerHelper("correlation", **locals())
        output = helper.create_variable_for_type_inference(dtype=x.dtype)
@@ -1593,7 +1371,7 @@ def fused_bn_add_act(
                        input=conv1_1,
                        act=None,
                        data_layout='NHWC')
-                    fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn)
+                    fused_bn_add_act = paddle.incubate.layers.fused_bn_add_act(conv1_2, bn)
                    prediction = paddle.static.nn.fc(x=fused_bn_add_act, size=10, activation='softmax')
                    loss = paddle.nn.functional.cross_entropy(
                        input=prediction, label=y,
@@ -1757,3 +1535,125 @@ def pow2_decay_with_linear_warmup(
        },
    )
    return lr
+
+
+def _pull_gpups_sparse(
+    input, size, dtype='float32', is_distributed=False, is_sparse=False
+):
+    r"""
+    **Pull GpuPS Sparse Layer**
+
+    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
+    GpuPS lookup table. The result of this lookup is the embedding of each ID in the
+    :attr:`input`.
+
+    Args:
+        input(Tensor): Input is a Tensor<int64>, which
+            contains the IDs information.
+        size(int|list of int): The embedding size parameter of each input, which indicates the size of
+            each embedding vector respectively.
+        dtype(str): The dtype refers to the data type of output tensor. Only supports
+        float32 now.
+
+    Returns:
+        Tensor: The tensor storing the embeddings of the \
+                  supplied inputs, whose size are indicated by size respectively.
+
+    Examples:
+        .. code-block:: python
+
+          import paddle.incubate as incubate
+          slots = []
+          data_1 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
+          slots.append(data_1)
+          data_2 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
+          slots.append(data_2)
+          embs = incubate.layers.pull_gpups_sparse(input=slots, size=[11, 35])
+    """
+    helper = LayerHelper('pull_gpups_sparse', **locals())
+    if dtype != 'float32':
+        raise ValueError(
+            "GpuPS only support float type embedding now, and your type is: "
+            + dtype
+        )
+    helper.input_dtype()
+    inputs = helper.multiple_input()
+    outs = [
+        helper.create_variable_for_type_inference(dtype)
+        for i in range(len(inputs))
+    ]
+    w = helper.create_parameter(
+        attr=helper.param_attr, shape=[size[0]], dtype=dtype, is_bias=False
+    )
+    helper.append_op(
+        type='pull_gpups_sparse',
+        inputs={'Ids': inputs, 'W': w},
+        outputs={'Out': outs},
+        attrs={
+            'size': size,
+            'is_distributed': is_distributed,
+            'is_sparse': is_sparse,
+        },
+    )
+    if len(outs) == 1:
+        return outs[0]
+    return outs
+
+
+def _pull_box_sparse(
+    input, size, dtype='float32', is_distributed=False, is_sparse=False
+):
+    r"""
+    **Pull Box Sparse Layer**
+
+    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
+    BoxPS lookup table. The result of this lookup is the embedding of each ID in the
+    :attr:`input`.
+
+    Args:
+        input(Tensor): Input is a Tensor<int64>, which
+            contains the IDs information.
+        size(int): The embedding size parameter, which indicates the size of
+            each embedding vector respectively.
+        dtype(str): The dtype refers to the data type of output tensor. Only supports
+        float32 now.
+
+    Returns:
+        Tensor: The tensor storing the embeddings of the \
+                  supplied inputs.
+
+    Examples:
+        .. code-block:: python
+
+          import paddle.incubate as incubate
+          data = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
+          emb = incubate.layers.pull_box_sparse(input=data, size=[11])
+    """
+    helper = LayerHelper('pull_box_sparse', **locals())
+    if dtype != 'float32':
+        raise ValueError(
+            "BoxPS only support float type embedding now, and your type is: "
+            + dtype
+        )
+    helper.input_dtype()
+    inputs = helper.multiple_input()
+    outs = [
+        helper.create_variable_for_type_inference(dtype)
+        for i in range(len(inputs))
+    ]
+    w = helper.create_parameter(
+        attr=helper.param_attr, shape=[size], dtype=dtype, is_bias=False
+    )
+    helper.append_op(
+        type='pull_box_sparse',
+        inputs={'Ids': inputs, 'W': w},
+        outputs={'Out': outs},
+        attrs={
+            'size': size,
+            'is_distributed': is_distributed,
+            'is_sparse': is_sparse,
+        },
+    )
+    if len(outs) == 1:
+        return outs[0]
+    return outs
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -71,12 +71,12 @@ from ..fluid.optimizer import Optimizer  # noqa: F401
 from ..fluid.optimizer import Adam  # noqa: F401
 from ..fluid.optimizer import ExponentialMovingAverage  # noqa: F401

-from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401
 from ..fluid.layers import learning_rate_scheduler  # noqa: F401

 from .nn.metric import auc  # noqa: F401
 from .nn.metric import accuracy  # noqa: F401
+from .nn.metric import ctr_metric_bundle  # noqa: F401

 __all__ = [  # noqa
    'append_backward',

--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -39,7 +39,7 @@ from .common import layer_norm  # noqa: F401


 from .common import embedding  # noqa: F401
-from ...fluid.contrib.layers import sparse_embedding  # noqa: F401
+from .common import sparse_embedding  # noqa: F401
 from ...fluid.layers import StaticRNN  # noqa: F401

 from .sequence_lod import sequence_conv  # noqa: F401

--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -3810,3 +3810,200 @@ def embedding(
        },
    )
    return tmp
+
+
+def sparse_embedding(
+    input,
+    size,
+    padding_idx=None,
+    is_test=False,
+    entry=None,
+    table_class="MemorySparseTable",
+    param_attr=None,
+    dtype='float32',
+    slot=None,
+):
+    r"""
+    :api_attr: Static Graph
+
+    The OP is used as the operator of the Embedding Lookup layer in the large-scale
+    sparse training of the parameter server mode, instead of using the paddle.nn.functional.embedding.
+
+    The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
+    It automatically constructs a 2D embedding matrix based on the input :attr:`size`
+    (vocab_size, emb_size) and :attr:`dtype` .
+
+    The shape of output Tensor is generated by appending an emb_size dimension to the
+    last dimension of the input Tensor shape.
+
+    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise
+    the program will throw an exception and exit.
+
+    .. code-block:: text
+
+        Case 1:
+
+        input is a Tensor. padding_idx = -1
+            input.data = [[1, 3], [2, 4], [4, 127]]
+            input.shape = [3, 2]
+        Given size = [128, 16]
+        output is a Tensor:
+            out.shape = [3, 2, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
+                        [0.345421456, 0.524563927, ..., 0.144534654]],
+
+                        [[0.345249859, 0.124939536, ..., 0.194353745],
+                        [0.945345345, 0.435394634, ..., 0.435345365]],
+
+                        [[0.945345345, 0.435394634, ..., 0.435345365],
+                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
+        It will pad all-zero data when ids is 127.
+
+        Case 2:
+
+        input is a LoDTensor with 1-level LoD. padding_idx = 0
+            input.lod = [[2, 3]]
+            input.data = [[1], [3], [2], [4], [0]]
+            input.shape = [5, 1]
+        Given size = [128, 16]
+        output is a LoDTensor:
+            out.lod = [[2, 3]]
+            out.shape = [5, 1, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
+                        [[0.345421456, 0.524563927, ..., 0.144534654]],
+                        [[0.345249859, 0.124939536, ..., 0.194353745]],
+                        [[0.945345345, 0.435394634, ..., 0.435345365]],
+                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        It will pad all-zero data when ids is 0.
+
+    Args:
+        input(Tensor): A Tensor or LoDTensor with type int64, which contains the id
+            information. The value of the input id should satisfy :math:`0<= id < size[0]` .
+        size(tuple|list): The shape of lookup table parameter (vocab_size, emb_size). It
+            should have two elements which indicates the size of the dictionary of embeddings
+            and the size of each embedding vector respectively. The initial parameter size
+            is 0 in the large-scale sparse scenario, which will gradually expand with the
+            training. So if vocab_size is temporarily useless, its value can be any integer.
+            The emb_size is the dimensional configuration of the word embedding weight parameter.
+        padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-vocab_size, vocab_size).
+            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
+            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever
+            lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated
+            while training. If set None, it makes no efe mfect to output. Default: None.
+        is_test(bool, optional): Training or prediction mode. In prediction mode (is_test=False),
+            the output is not initialized and created, and it is filled with 0 and returned. Default: False.
+        entry(str, optional): Entry config with parameter server whose value is ProbabilityEntry,
+            CountFilterEntry or None. Default: None.
+        table_class(str, optional): The type of the sparse table. The value can be CommonSparseTable
+            or SSDSparseTable. The default is CommonSparseTable.
+        param_attr(ParamAttr, optional): To specify the weight parameter property. Default: None, which means the
+            default weight parameter property is used. In addition, user-defined or pre-trained word
+            vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs
+            to be transformed into numpy format, and the shape of local word vector should be consistent
+            with :attr:`size` .
+        dtype(str): It refers to the data type of output Tensor. It must be float32 or
+            float64. Default: float32.
+
+    Returns:
+        Tensor: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.enable_static()
+            sparse_feature_dim = 1024
+            embedding_size = 64
+
+            # Only when the feature appear more than 10 times or more will be participated in the training.
+            entry = paddle.distributed.CountFilterEntry(10)
+
+            input = paddle.static.data(name='ins', shape=[1], dtype='int64')
+
+            emb = paddle.static.nn.sparse_embedding(
+                input=input,
+                size=[sparse_feature_dim, embedding_size],
+                is_test=False,
+                entry=entry,
+                param_attr=paddle.ParamAttr(name="SparseFeatFactors",
+                initializer=paddle.nn.initializer.Uniform()))
+
+    """
+
+    helper = LayerHelper('sparse_embedding', **locals())
+
+    check_variable_and_dtype(
+        input, 'input', ['int64'], 'paddle.incubate.layers.sparse_embedding'
+    )
+
+    check_dtype(
+        dtype,
+        'dtype',
+        ['float32', 'float64'],
+        'paddle.static.nn.sparse_embedding',
+    )
+
+    if input.size == 0:
+        raise ValueError("input size should not be 0")
+
+    w = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=size,
+        type=core.VarDesc.VarType.SELECTED_ROWS,
+        dtype=dtype,
+        is_bias=False,
+    )
+
+    tmp = helper.create_variable_for_type_inference(dtype)
+
+    padding_idx = (
+        -1
+        if padding_idx is None
+        else padding_idx
+        if padding_idx >= 0
+        else (size[0] + padding_idx)
+    )
+
+    if table_class not in [
+        "CommonSparseTable",
+        "SSDSparseTable",
+        "MemorySparseTable",
+    ]:
+        raise ValueError(
+            "table_class must be in [CommonSparseTable, SSDSparseTable, MemorySparseTable]"
+        )
+
+    entry_str = "none"
+
+    if entry is not None:
+        if entry.__class__.__name__ not in [
+            "ProbabilityEntry",
+            "CountFilterEntry",
+            "ShowClickEntry",
+        ]:
+            raise ValueError(
+                "entry must be instance in [paddle.distributed.ProbabilityEntry, paddle.distributed.CountFilterEntry, paddle.distributed.ShowClickEntry]"
+            )
+        entry_str = entry._to_attr()
+
+    if slot is None:
+        slot = 0
+
+    helper.append_op(
+        type='lookup_table',
+        inputs={'Ids': input, 'W': w},
+        outputs={'Out': tmp},
+        attrs={
+            'padding_idx': padding_idx,
+            'is_sparse': True,
+            'is_distributed': True,
+            'remote_prefetch': True,
+            'is_test': is_test,
+            'entry': entry_str,
+            'table_class': table_class,
+            'slot': slot,
+        },
+    )
+    return tmp
--- a/python/paddle/static/nn/metric.py
+++ b/python/paddle/static/nn/metric.py
@@ -317,3 +317,253 @@ def auc(
        batch_auc_out,
        [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg],
    )
+
+
+def ctr_metric_bundle(input, label, ins_tag_weight=None):
+    """
+    ctr related metric layer
+
+    This function help compute the ctr related metrics: RMSE, MAE, predicted_ctr, q_value.
+    To compute the final values of these metrics, we should do following computations using
+    total instance number:
+    MAE = local_abserr / instance number
+    RMSE = sqrt(local_sqrerr / instance number)
+    predicted_ctr = local_prob / instance number
+    q = local_q / instance number
+    Note that if you are doing distribute job, you should all reduce these metrics and instance
+    number first
+
+    Args:
+        input(Tensor): A floating-point 2D Tensor, values are in the range
+                         [0, 1]. Each row is sorted in descending order. This
+                         input should be the output of topk. Typically, this
+                         Tensor indicates the probability of each label.
+        label(Tensor): A 2D int Tensor indicating the label of the training
+                         data. The height is batch size and width is always 1.
+        ins_tag_weight(Tensor): A 2D int Tensor indicating the ins_tag_weight of the training
+                         data. 1 means real data, 0 means fake data.
+                         A LoDTensor or Tensor with type float32,float64.
+
+    Returns:
+        local_sqrerr(Tensor): Local sum of squared error
+        local_abserr(Tensor): Local sum of abs error
+        local_prob(Tensor): Local sum of predicted ctr
+        local_q(Tensor): Local sum of q value
+
+    Examples 1:
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
+            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
+            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
+            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label)
+    Examples 2:
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
+            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
+            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
+            ins_tag_weight = paddle.static.data(name='ins_tag', shape=[-1,16], lod_level=0, dtype='int64')
+            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label, ins_tag_weight=ins_tag_weight)
+
+    """
+    if ins_tag_weight is None:
+        ins_tag_weight = paddle.tensor.fill_constant(
+            shape=[1, 1], dtype="float32", value=1.0
+        )
+
+    assert input.shape == label.shape
+    helper = LayerHelper("ctr_metric_bundle", **locals())
+
+    local_abserr = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+    local_sqrerr = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+    local_prob = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+    local_q = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+    local_pos_num = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+    local_ins_num = helper.create_global_variable(
+        persistable=True, dtype='float32', shape=[1]
+    )
+
+    tmp_res_elesub = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[-1]
+    )
+    tmp_res_sigmoid = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[-1]
+    )
+    tmp_ones = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[-1]
+    )
+
+    batch_prob = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    batch_abserr = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    batch_sqrerr = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    batch_q = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    batch_pos_num = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    batch_ins_num = helper.create_global_variable(
+        persistable=False, dtype='float32', shape=[1]
+    )
+    for var in [
+        local_abserr,
+        batch_abserr,
+        local_sqrerr,
+        batch_sqrerr,
+        local_prob,
+        batch_prob,
+        local_q,
+        batch_q,
+        batch_pos_num,
+        batch_ins_num,
+        local_pos_num,
+        local_ins_num,
+    ]:
+        helper.set_variable_initializer(
+            var,
+            paddle.nn.initializer.ConstantInitializer(
+                value=0.0, force_cpu=True
+            ),
+        )
+
+    helper.append_op(
+        type="elementwise_sub",
+        inputs={"X": [input], "Y": [label]},
+        outputs={"Out": [tmp_res_elesub]},
+    )
+
+    helper.append_op(
+        type="squared_l2_norm",
+        inputs={"X": [tmp_res_elesub]},
+        outputs={"Out": [batch_sqrerr]},
+    )
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_sqrerr], "Y": [local_sqrerr]},
+        outputs={"Out": [local_sqrerr]},
+    )
+
+    helper.append_op(
+        type="l1_norm",
+        inputs={"X": [tmp_res_elesub]},
+        outputs={"Out": [batch_abserr]},
+    )
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_abserr], "Y": [local_abserr]},
+        outputs={"Out": [local_abserr]},
+    )
+
+    helper.append_op(
+        type="reduce_sum", inputs={"X": [input]}, outputs={"Out": [batch_prob]}
+    )
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_prob], "Y": [local_prob]},
+        outputs={"Out": [local_prob]},
+    )
+    helper.append_op(
+        type="sigmoid",
+        inputs={"X": [input]},
+        outputs={"Out": [tmp_res_sigmoid]},
+    )
+    helper.append_op(
+        type="reduce_sum",
+        inputs={"X": [tmp_res_sigmoid]},
+        outputs={"Out": [batch_q]},
+    )
+
+    helper.append_op(
+        type="reduce_sum",
+        inputs={"X": [label]},
+        outputs={"Out": [batch_pos_num]},
+    )
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_pos_num], "Y": [local_pos_num]},
+        outputs={"Out": [local_pos_num]},
+    )
+
+    helper.append_op(
+        type='fill_constant_batch_size_like',
+        inputs={"Input": label},
+        outputs={'Out': [tmp_ones]},
+        attrs={
+            'shape': [-1, 1],
+            'dtype': tmp_ones.dtype,
+            'value': float(1.0),
+        },
+    )
+    helper.append_op(
+        type="reduce_sum",
+        inputs={"X": [tmp_ones]},
+        outputs={"Out": [batch_ins_num]},
+    )
+
+    # if data is fake, return 0
+    inputs_slice = {'Input': ins_tag_weight}
+    attrs = {'axes': [0]}
+    attrs['starts'] = [0]
+    attrs['ends'] = [1]
+    helper.append_op(
+        type="slice",
+        inputs=inputs_slice,
+        attrs=attrs,
+        outputs={"Out": ins_tag_weight},
+    )
+
+    axis = helper.kwargs.get('axis', 0)
+    helper.append_op(
+        type="elementwise_mul",
+        inputs={"X": [batch_ins_num], "Y": [ins_tag_weight]},
+        outputs={"Out": [batch_ins_num]},
+        attrs={'axis': axis},
+    )
+
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_ins_num], "Y": [local_ins_num]},
+        outputs={"Out": [local_ins_num]},
+    )
+
+    helper.append_op(
+        type="elementwise_mul",
+        inputs={"X": [batch_q], "Y": [ins_tag_weight]},
+        outputs={"Out": [batch_q]},
+        attrs={'axis': axis},
+    )
+    helper.append_op(
+        type="elementwise_add",
+        inputs={"X": [batch_q], "Y": [local_q]},
+        outputs={"Out": [local_q]},
+    )
+
+    return (
+        local_sqrerr,
+        local_abserr,
+        local_prob,
+        local_q,
+        local_pos_num,
+        local_ins_num,
+    )
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -448,7 +448,6 @@ packages=['paddle',
          'paddle.fluid.dataloader',
          'paddle.fluid.contrib',
          'paddle.fluid.contrib.extend_optimizer',
-          'paddle.fluid.contrib.layers',
          'paddle.fluid.incubate',
          'paddle.incubate.distributed.fleet',
          'paddle.fluid.incubate.checkpoint',
@@ -483,6 +482,7 @@ packages=['paddle',
          'paddle.incubate.distributed.fleet.parameter_server.distribute_transpiler',
          'paddle.incubate.distributed.fleet.parameter_server.pslib',
          'paddle.incubate.distributed.fleet.parameter_server.ir',
+          'paddle.incubate.layers',
          'paddle.quantization',
          'paddle.quantization.quanters',
          'paddle.quantization.observers',

--- a/setup.py
+++ b/setup.py
@@ -1424,7 +1424,6 @@ def get_setup_parameters():
        'paddle.fluid.dataloader',
        'paddle.fluid.contrib',
        'paddle.fluid.contrib.extend_optimizer',
-        'paddle.fluid.contrib.layers',
        'paddle.fluid.incubate',
        'paddle.incubate.distributed.fleet',
        'paddle.fluid.incubate.checkpoint',
@@ -1459,6 +1458,7 @@ def get_setup_parameters():
        'paddle.incubate.distributed.fleet.parameter_server.distribute_transpiler',
        'paddle.incubate.distributed.fleet.parameter_server.ir',
        'paddle.incubate.distributed.fleet.parameter_server.pslib',
+        'paddle.incubate.layers',
        'paddle.quantization',
        'paddle.quantization.quanters',
        'paddle.quantization.observers',

--- a/test/contrib/test_correlation.py
+++ b/test/contrib/test_correlation.py
@@ -114,7 +114,7 @@ class TestCorrelationOp(unittest.TestCase):
            stride2=1,
        )

-        out = fluid.contrib.correlation(
+        out = paddle.incubate.layers.correlation(
            x1,
            x2,
            pad_size=4,
@@ -142,7 +142,7 @@ class Net(paddle.nn.Layer):
        super().__init__(name_scope)

    def forward(self, x1, x2):
-        y = fluid.contrib.correlation(
+        y = paddle.incubate.layers.correlation(
            x1,
            x2,
            pad_size=4,

--- a/test/legacy_test/test_detection.py
+++ b/test/legacy_test/test_detection.py
@@ -171,10 +171,10 @@ class TestMulticlassNMS2(unittest.TestCase):
            scores = paddle.static.data(
                name='scores', shape=[-1, 10], dtype='float32'
            )
-            output = fluid.contrib.multiclass_nms2(
+            output = paddle.incubate.layers.multiclass_nms2(
                bboxes, scores, 0.3, 400, 200, 0.7
            )
-            output2, index = fluid.contrib.multiclass_nms2(
+            output2, index = paddle.incubate.layers.multiclass_nms2(
                bboxes, scores, 0.3, 400, 200, 0.7, return_index=True
            )
            self.assertIsNotNone(output)

--- a/test/ps/ps_dnn_model.py
+++ b/test/ps/ps_dnn_model.py
@@ -75,7 +75,7 @@ class DNNLayer(nn.Layer):
        sparse_embs = []
        for s_input in sparse_inputs:
            if self.sync_mode == "gpubox":
-                emb = paddle.fluid.contrib.sparse_embedding(
+                emb = paddle.static.nn.sparse_embedding(
                    input=s_input,
                    size=[self.sparse_feature_number, self.sparse_feature_dim],
                    param_attr=paddle.ParamAttr(name="embedding"),

--- a/test/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py
+++ b/test/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py
@@ -20,7 +20,7 @@ sys.path.append('../../python/paddle/fluid/tests/unittests')
 from get_test_cover_info import record_op_test

 import paddle
-from paddle.fluid.contrib.layers.nn import pow2_decay_with_linear_warmup
+from paddle.incubate.layers.nn import pow2_decay_with_linear_warmup
 from paddle.optimizer.lr import LinearWarmup, PolynomialDecay