From df7cc3a0d633ce2a7967f63bae7515f453945c99 Mon Sep 17 00:00:00 2001
From: GGBond8488 <33050871+GGBond8488@users.noreply.github.com>
Date: Thu, 23 Feb 2023 15:26:45 +0800
Subject: [PATCH] Fluid clean move dygraph profiler, fluid.input.on_hot and
 fluid.input.embedding (#50141)

* remove dygraph.profiler

* remove fluid.input.one-hot and move embedding to paddle.static.nn

* fix unitest error

* fix type error

* fix type error

* fix xpu test error

* fxi sample code error

* fxi sample code error

* fix sample code error

* remove test.py

* remove variable in docstr
---
 python/paddle/fluid/__init__.py               |   3 -
 python/paddle/fluid/dygraph/profiler.py       |  28 --
 python/paddle/fluid/input.py                  | 348 ------------------
 python/paddle/fluid/layers/nn.py              |   2 +-
 .../tests/unittests/dist_fleet_simnet_bow.py  |   6 +-
 .../unittests/mlu/test_one_hot_v2_op_mlu.py   |   8 +-
 .../unittests/npu/test_one_hot_v2_op_npu.py   |   6 +-
 .../fluid/tests/unittests/test_backward.py    |   8 +-
 .../test_embedding_id_stop_gradient.py        |   4 +-
 .../test_imperative_load_static_param.py      |   4 +-
 .../unittests/test_lookup_table_v2_bf16_op.py |   2 +-
 .../unittests/test_lookup_table_v2_op.py      |  16 +-
 .../tests/unittests/test_one_hot_v2_op.py     |  10 +-
 .../tests/unittests/test_run_program_op.py    |   2 +-
 .../fluid/tests/unittests/test_sgd_op.py      |   4 +-
 .../xpu/test_lookup_table_v2_op_xpu.py        |  14 +-
 .../unittests/xpu/test_one_hot_v2_op_xpu.py   |  10 +-
 .../tests/unittests/xpu/test_sgd_op_xpu.py    |   4 +-
 python/paddle/static/nn/__init__.py           |   2 +-
 python/paddle/static/nn/common.py             | 161 ++++++++
 20 files changed, 217 insertions(+), 425 deletions(-)
 delete mode 100644 python/paddle/fluid/dygraph/profiler.py
 delete mode 100644 python/paddle/fluid/input.py

diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 6ab933d3ea8..a7019bd9541 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -66,7 +66,6 @@ from . import average
 from . import metrics
 from . import transpiler
 from . import incubate
-from .input import embedding, one_hot
 from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
 
@@ -129,8 +128,6 @@ __all__ = (
     + [
         'io',
         'initializer',
-        'embedding',
-        'one_hot',
         'layers',
         'contrib',
         'data',
diff --git a/python/paddle/fluid/dygraph/profiler.py b/python/paddle/fluid/dygraph/profiler.py
deleted file mode 100644
index 8c4e07ce78d..00000000000
--- a/python/paddle/fluid/dygraph/profiler.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .. import core
-
-__all__ = [
-    'start_gperf_profiler',
-    'stop_gperf_profiler',
-]
-
-
-def start_gperf_profiler():
-    core.start_imperative_gperf_profiler()
-
-
-def stop_gperf_profiler():
-    core.stop_imperative_gperf_profiler()
diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py
deleted file mode 100644
index 255a17d6483..00000000000
--- a/python/paddle/fluid/input.py
+++ /dev/null
@@ -1,348 +0,0 @@
-#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import warnings
-from .framework import Variable, _non_static_mode, static_only
-from .layer_helper import LayerHelper
-from .data_feeder import check_variable_and_dtype, check_dtype
-from ..utils import deprecated
-
-__all__ = ['one_hot', 'embedding']
-
-
-@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot')
-def one_hot(input, depth, allow_out_of_range=False):
-    """
-    :alias_main: paddle.nn.functional.one_hot
-        :alias: paddle.nn.functional.one_hot,paddle.nn.functional.common.one_hot
-        :old_api: paddle.fluid.one_hot
-
-    The operator converts each id in the input to an one-hot vector with a
-    depth length. The value in the vector dimension corresponding to the id
-    is 1, and the value in the remaining dimension is 0.
-
-    The shape of output Tensor or LoDTensor is generated by appending depth dimension
-    behind the last dimension of the input shape.
-
-    .. code-block:: text
-
-        Example 1 (allow_out_of_range=False):
-
-        input:
-            X.shape = [4]
-            X.data = [1, 1, 3, 0]
-            depth = 4
-
-        output:
-            Out.shape = [4, 4]
-            Out.data = [[0., 1., 0., 0.],
-                        [0., 1., 0., 0.],
-                        [0., 0., 0., 1.],
-                        [1., 0., 0., 0.]]
-
-        Example 2 (allow_out_of_range=True):
-
-        input:
-            X.shape = [4]
-            X.data = [1, 1, 5, 0]
-            depth = 4
-            allow_out_of_range = True
-
-        output:
-            Out.shape = [4, 4]
-            Out.data = [[0., 1., 0., 0.],
-                        [0., 1., 0., 0.],
-                        [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data.
-                        [1., 0., 0., 0.]]
-
-        Example 3 (allow_out_of_range=False):
-
-        input:
-            X.shape = [4]
-            X.data = [1, 1, 5, 0]
-            depth = 4
-            allow_out_of_range = False
-
-        output: Throw an exception for Illegal value
-            The second dimension in X is 5, which is greater than depth.
-            Allow_out_of_range =False means that does not allow the word id to exceed depth,
-            so it throws an exception.
-
-
-    Args:
-        input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` ,
-            which contains at least one dimension. The data type is int32 or int64.
-        depth(int): An integer defining the depth of the one hot dimension. If input
-            is word id, depth is generally the dictionary size.
-        allow_out_of_range(bool): A bool value indicating whether the input
-            indices could be out of range :math:`[0, depth)` . When input indices are
-            out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range`
-            is False, or zero-filling representations is created if it is set True.
-            Default: False.
-
-    Returns:
-        Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            paddle.enable_static()
-
-            # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4].
-            label = fluid.data(name="label", shape=[4], dtype="int64")
-            one_hot_label = fluid.one_hot(input=label, depth=4)
-    """
-    check_variable_and_dtype(input, 'input', ['int32', 'int64'], 'one_hot_v2')
-    helper = LayerHelper("one_hot_v2", **locals())
-
-    one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
-
-    if _non_static_mode():
-        inputs = {'X': input}
-        attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range}
-    else:
-        if not isinstance(depth, Variable):
-            # user attribute
-            inputs = {'X': input}
-            attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range}
-        else:
-            depth.stop_gradient = True
-            inputs = {'X': input, 'depth_tensor': depth}
-            attrs = {'allow_out_of_range': allow_out_of_range}
-    helper.append_op(
-        type="one_hot_v2",
-        inputs=inputs,
-        attrs=attrs,
-        outputs={'Out': one_hot_out},
-        stop_gradient=True,
-    )
-    return one_hot_out
-
-
-@static_only
-@deprecated(since='2.0.0', update_to='paddle.nn.functional.embedding')
-def embedding(
-    input,
-    size,
-    is_sparse=False,
-    is_distributed=False,
-    padding_idx=None,
-    param_attr=None,
-    dtype='float32',
-):
-    r"""
-    :api_attr: Static Graph
-
-    The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
-    It automatically constructs a 2D embedding matrix based on the
-    input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
-
-    The shape of output Tensor is generated by appending an emb_size dimension to the
-    last dimension of the input Tensor shape.
-
-    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
-    otherwise the program will throw an exception and exit.
-
-    .. code-block:: text
-
-        Case 1:
-
-        input is a Tensor. padding_idx = -1
-            input.data = [[1, 3], [2, 4], [4, 127]]
-            input.shape = [3, 2]
-        Given size = [128, 16]
-        output is a Tensor:
-            out.shape = [3, 2, 16]
-            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
-                        [0.345421456, 0.524563927, ..., 0.144534654]],
-
-                        [[0.345249859, 0.124939536, ..., 0.194353745],
-                        [0.945345345, 0.435394634, ..., 0.435345365]],
-
-                        [[0.945345345, 0.435394634, ..., 0.435345365],
-                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
-        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
-        It will pad all-zero data when ids is 127.
-
-        Case 2:
-
-        input is a LoDTensor with 1-level LoD. padding_idx = 0
-            input.lod = [[2, 3]]
-            input.data = [[1], [3], [2], [4], [0]]
-            input.shape = [5, 1]
-        Given size = [128, 16]
-        output is a LoDTensor:
-            out.lod = [[2, 3]]
-            out.shape = [5, 1, 16]
-            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
-                        [[0.345421456, 0.524563927, ..., 0.144534654]],
-                        [[0.345249859, 0.124939536, ..., 0.194353745]],
-                        [[0.945345345, 0.435394634, ..., 0.435345365]],
-                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
-        It will pad all-zero data when ids is 0.
-
-
-    Args:
-        input(Variable): A Tensor or LoDTensor with type int64, which contains the id information.
-            The value of the input id should satisfy :math:`0<= id < size[0]` .
-        size(tuple|list): The shape of lookup table parameter. It should have two elements which
-            indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
-        is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
-            affects the performance of the backwards gradient update. It is recommended to set
-            True because sparse update is faster. But some optimizer does not support sparse update
-            In these case, is_sparse must be False. Default: False.
-        is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
-            in multi-machine distributed CPU training. Default: False.
-        padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
-            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
-            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
-            encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
-            If set None, it makes no effect to output. Default: None.
-        param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
-            default weight parameter property is used. In addition,
-            user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
-            The local word vector needs to be transformed into numpy format, and the shape of local word
-            vector should be consistent with :attr:`size` .
-        dtype(str): It refers to the data type of output Tensor.
-            It must be float32 or float64. Default: float32.
-
-    Returns:
-        Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
-
-    Static Examples:
-        .. code-block:: python
-
-            import paddle
-            import numpy as np
-            paddle.enable_static()
-
-            x = paddle.static.data(name="x", shape = [2, 4], dtype=np.int64)
-            embedding = paddle.nn.Embedding(10, 3,
-                        weight_attr=paddle.nn.initializer.Constant(value=1.0))
-            adam = paddle.optimizer.SGD(parameters=[embedding.weight], learning_rate=0.01)
-            output = embedding(x)
-            m_output=paddle.mean(output)
-
-            adam.minimize(m_output)
-
-            place = paddle.CPUPlace()
-            exe = paddle.static.Executor(place)
-            exe.run(paddle.static.default_startup_program())
-
-            x = np.array([[7, 2, 4, 5],[4, 3, 2, 9]], dtype=np.int64)
-
-            # x is a Numpy.
-            # x.data = [[7, 2, 4, 5], [4, 3, 2, 9]]
-            # x.shape = [2, 4]
-
-            out, = exe.run(paddle.static.default_main_program(), feed={'x':x}, fetch_list=[output])
-
-            # out is a Numpy.
-            # out.data = [[1., 1., 1.],
-            #             [1., 1., 1.],
-            #             [1., 1., 1.],
-            #             [1., 1., 1.]],
-            #
-            #            [[1., 1., 1.],
-            #             [1., 1., 1.],
-            #             [1., 1., 1.],
-            #             [0., 0., 0.]]]
-            # out.shape = [2, 4, 3]
-
-
-    Dygraph Examples:
-        .. code-block:: python
-
-            import paddle
-            import numpy as np
-
-            x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
-
-            # x is a Tensor.
-            # x.data = [[3], [4], [5]]
-            # x.shape = [3, 1]
-            x = paddle.to_tensor(x_data, stop_gradient=False)
-
-            # embedding weight shape = [10, 3]
-            embedding = paddle.nn.Embedding(10, 3, sparse=True)
-
-            # embedding weight data = [10, 3]
-            w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
-
-            # embedding.weight.shape = [10, 3]
-            # embedding.weight.data =
-            #                        [[2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.],
-            #                         [2., 2., 2.]]
-            embedding.weight.set_value(w0)
-
-            adam = paddle.optimizer.Adam(
-                parameters=[embedding.weight], learning_rate=0.01)
-            adam.clear_grad()
-
-            # out is Tensor
-            # out.shape: [3, 1, 3]
-            # out.layout: NCHW
-            # out.dtype: float
-            # out.data: [2 2 2 2 2 2 2 2 2]
-            out = embedding(x)
-
-            out.backward()
-            adam.step()
-
-    """
-
-    helper = LayerHelper('embedding', **locals())
-    check_variable_and_dtype(input, 'input', ['int64'], 'fluid.embedding')
-    check_dtype(
-        dtype,
-        'dtype',
-        ['float16', 'float32', 'float64', 'uint16'],
-        'fluid.embedding',
-    )
-    remote_prefetch = is_sparse and (not is_distributed)
-    if remote_prefetch:
-        assert is_sparse is True and is_distributed is False
-    w = helper.create_parameter(
-        attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False
-    )
-    tmp = helper.create_variable_for_type_inference(dtype)
-    padding_idx = (
-        -1
-        if padding_idx is None
-        else padding_idx
-        if padding_idx >= 0
-        else (size[0] + padding_idx)
-    )
-    helper.append_op(
-        type='lookup_table_v2',
-        inputs={'Ids': input, 'W': w},
-        outputs={'Out': tmp},
-        attrs={
-            'is_sparse': is_sparse,
-            'is_distributed': is_distributed,
-            'remote_prefetch': remote_prefetch,
-            'padding_idx': padding_idx,
-        },
-    )
-    return tmp
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 2816f6d4906..ab94a66953e 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -210,7 +210,7 @@ def embedding(
           data = fluid.data(name='x', shape=[None, 1], dtype='int64')
 
           # example 1
-          emb_1 = fluid.embedding(input=data, size=[128, 64])
+          emb_1 = paddle.static.nn.embedding(input=data, size=[128, 64])
 
           # example 2: load custom or pre-trained word vectors
           weight_data = np.random.random(size=(128, 100))  # word vectors with numpy format
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index 4e8f6a42506..22ae37010f7 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -119,7 +119,7 @@ def train_network(
         )
 
     # embedding
-    q_emb = fluid.embedding(
+    q_emb = paddle.static.nn.embedding(
         input=q,
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
@@ -147,7 +147,7 @@ def train_network(
     )
 
     # embedding
-    pt_emb = fluid.embedding(
+    pt_emb = paddle.static.nn.embedding(
         input=pt,
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
@@ -176,7 +176,7 @@ def train_network(
     )
 
     # embedding
-    nt_emb = fluid.embedding(
+    nt_emb = paddle.static.nn.embedding(
         input=nt,
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
index 1fc06f72e89..154a7780d68 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
@@ -196,10 +196,6 @@ class TestOneHotOpApi(unittest.TestCase):
             [np.random.randint(0, depth - 1) for i in range(6)]
         ).reshape([6, 1])
         with fluid.dygraph.guard():
-            one_hot_label = fluid.one_hot(
-                input=fluid.dygraph.to_variable(label), depth=depth
-            )
-
             one_hot_label = paddle.nn.functional.one_hot(
                 fluid.dygraph.to_variable(label), depth
             )
@@ -208,7 +204,7 @@ class TestOneHotOpApi(unittest.TestCase):
 
     def _run(self, depth):
         label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
-        one_hot_label = fluid.one_hot(input=label, depth=depth)
+        one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth)
 
         label_data = np.array(
             [np.random.randint(0, 10 - 1) for i in range(6)]
@@ -239,7 +235,7 @@ class BadInputTestOnehotV2(unittest.TestCase):
                     shape=[4],
                     dtype="float32",
                 )
-                one_hot_label = fluid.one_hot(input=label, depth=4)
+                one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=4)
 
             self.assertRaises(TypeError, test_bad_x)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
index 7b5633c335c..dc4873500bf 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
@@ -219,13 +219,13 @@ class TestOneHotOpApi(unittest.TestCase):
             [np.random.randint(0, depth - 1) for i in range(6)]
         ).reshape([6, 1])
         with fluid.dygraph.guard(paddle.NPUPlace(0)):
-            one_hot_label = fluid.one_hot(
-                input=fluid.dygraph.to_variable(label), depth=depth
+            one_hot_label = paddle.nn.functional.one_hot(
+                x=fluid.dygraph.to_variable(label), num_classes=depth
             )
 
     def _run(self, depth):
         label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
-        one_hot_label = fluid.one_hot(input=label, depth=depth)
+        one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth)
 
         place = fluid.NPUPlace(0)
         label_data = np.array(
diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py
index c3d44d7b343..41fa23658c4 100644
--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -234,13 +234,13 @@ class SimpleNet(BackwardNet):
         )
         # shared layer, the grad of 'w2v' will be summed and renamed.
         # To test  _addup_repetitive_outputs_
-        x_emb = fluid.embedding(
+        x_emb = paddle.static.nn.embedding(
             x, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
         )
-        x2_emb = fluid.embedding(
+        x2_emb = paddle.static.nn.embedding(
             x2, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
         )
-        x3_emb = fluid.embedding(
+        x3_emb = paddle.static.nn.embedding(
             x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
         )
         # merge layers
@@ -331,7 +331,7 @@ class TestAppendBackwardWithError(unittest.TestCase):
     def build_net(self):
         x = fluid.data(name='x', shape=[None, 13], dtype='int64')
         y = fluid.data(name='y', shape=[None, 1], dtype='float32')
-        x_emb = fluid.embedding(x, size=[100, 256])
+        x_emb = paddle.static.nn.embedding(x, size=[100, 256])
         y_predict = paddle.static.nn.fc(x=x_emb, size=1, name='my_fc')
         loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_loss = paddle.mean(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
index d5facd83e2e..7c5776cb4ab 100644
--- a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
@@ -58,7 +58,9 @@ class TestEmbeddingIdStopGradientBase(unittest.TestCase):
 
                 x.stop_gradient = stop_gradient
 
-                emb = fluid.embedding(x, size=[10, 32], dtype='float32')
+                emb = paddle.static.nn.embedding(
+                    x, size=[10, 32], dtype='float32'
+                )
                 avg_cost = paddle.mean(emb, name='mean_loss')
                 optim = fluid.optimizer.SGD(learning_rate=0.001)
                 optim.minimize(avg_cost)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index abf7a95bfb7..d7a7e535991 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -60,8 +60,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
         batchnorm_out_2 = paddle.static.nn.batch_norm(batchnorm_in)
 
         emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64')
-        emb_out_1 = fluid.embedding(emb_in, [1000, 100])
-        emb_out_2 = fluid.embedding(emb_in, [2000, 200])
+        emb_out_1 = paddle.static.nn.embedding(emb_in, [1000, 100])
+        emb_out_2 = paddle.static.nn.embedding(emb_in, [2000, 200])
 
         layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32')
         layernorm_1 = paddle.static.nn.layer_norm(layernorm)
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
index 0f6affcd26c..f951d524242 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
@@ -105,7 +105,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase):
             x = paddle.static.data(
                 name='x', shape=[-1] + self.ids_shape, dtype='int64'
             )
-            self.emb = fluid.input.embedding(
+            self.emb = paddle.static.nn.embedding(
                 input=x,
                 size=self.w_shape,
                 param_attr=fluid.ParamAttr(
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
index 6aea5ef118c..dc0c8f3174b 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
@@ -203,7 +203,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
         with fluid.program_guard(main_program, fluid.Program()):
             x = paddle.static.data(name='x', shape=[-1, 5], dtype='int64')
             y_ = paddle.static.data(name='y_', shape=[-1, 5], dtype='float32')
-            emb = fluid.input.embedding(
+            emb = paddle.static.nn.embedding(
                 input=x,
                 size=[10, 16],
                 param_attr=fluid.ParamAttr(
@@ -246,7 +246,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
 class TestLookupTableApi(unittest.TestCase):
     def test_api(self):
         x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64')
-        emb = fluid.embedding(input=x, size=[128, 64])
+        emb = paddle.static.nn.embedding(input=x, size=[128, 64])
 
         place = fluid.CPUPlace()
         x_data = np.random.randint(0, 127, [2, 20]).astype("int64")
@@ -269,25 +269,29 @@ class TestEmbedOpError(unittest.TestCase):
 
             def test_Variable():
                 # the input type must be Variable
-                fluid.embedding(input=input_data, size=(10, 64))
+                paddle.static.nn.embedding(input=input_data, size=(10, 64))
 
             self.assertRaises(TypeError, test_Variable)
 
             def test_input_dtype():
                 # the input dtype must be int64
                 input = fluid.data(name='x1', shape=[4, 6], dtype='float32')
-                fluid.embedding(input=input, size=(10, 64))
+                paddle.static.nn.embedding(input=input, size=(10, 64))
 
             self.assertRaises(TypeError, test_input_dtype)
 
             def test_param_dtype():
                 # dtype must be float32 or float64
                 input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64')
-                fluid.embedding(input=input2, size=(10, 64), dtype='int64')
+                paddle.static.nn.embedding(
+                    input=input2, size=(10, 64), dtype='int64'
+                )
 
             self.assertRaises(TypeError, test_param_dtype)
             input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64')
-            fluid.embedding(input=input3, size=(10, 64), dtype='float16')
+            paddle.static.nn.embedding(
+                input=input3, size=(10, 64), dtype='float16'
+            )
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
index 85993b34a29..311ee24887b 100644
--- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
@@ -188,10 +188,6 @@ class TestOneHotOpApi(unittest.TestCase):
             [np.random.randint(0, depth - 1) for i in range(6)]
         ).reshape([6, 1])
         with fluid.dygraph.guard():
-            one_hot_label = fluid.one_hot(
-                input=fluid.dygraph.to_variable(label), depth=depth
-            )
-
             one_hot_label = paddle.nn.functional.one_hot(
                 fluid.dygraph.to_variable(label), depth
             )
@@ -202,7 +198,7 @@ class TestOneHotOpApi(unittest.TestCase):
     def _run(self, depth):
         label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         label.desc.set_need_check_feed(False)
-        one_hot_label = fluid.one_hot(input=label, depth=depth)
+        one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth)
 
         place = fluid.CPUPlace()
         label_data = np.array(
@@ -231,7 +227,9 @@ class BadInputTestOnehotV2(unittest.TestCase):
                     dtype="float32",
                 )
                 label.desc.set_need_check_feed(False)
-                one_hot_label = fluid.one_hot(input=label, depth=4)
+                one_hot_label = paddle.nn.functional.one_hot(
+                    x=label, num_classes=4
+                )
 
             self.assertRaises(TypeError, test_bad_x)
 
diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py
index 35c31deb3f8..51f4bb1686f 100644
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -463,7 +463,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
         x = paddle.static.data(
             name=self.input_names['X'][0], shape=[-1, 5], dtype='int64'
         )
-        emb = fluid.input.embedding(
+        emb = paddle.static.nn.embedding(
             input=x,
             size=[10, 16],
             param_attr=fluid.ParamAttr(
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py
index 801866c9023..f9d6b729ce4 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py
@@ -200,7 +200,9 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
         label = fluid.layers.fill_constant(
             shape=[1, 150], value=0.5, dtype='float32'
         )
-        emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
+        emb = paddle.static.nn.embedding(
+            input=data, size=(10000000, 150), dtype='float32'
+        )
         out = paddle.nn.functional.normalize(x=emb, axis=-1)
 
         cost = paddle.nn.functional.square_error_cost(input=out, label=label)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
index 94645bcf9b2..fcbf724ccbc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
@@ -168,7 +168,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows(
 class TestLookupTableApi(unittest.TestCase):
     def test_api(self):
         x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64')
-        emb = fluid.embedding(input=x, size=[128, 64])
+        emb = paddle.static.nn.embedding(input=x, size=[128, 64])
 
         place = paddle.XPUPlace(0)
         x_data = np.random.randint(0, 127, [2, 20]).astype("int64")
@@ -191,25 +191,29 @@ class TestEmbedOpError(unittest.TestCase):
 
             def test_Variable():
                 # the input type must be Variable
-                fluid.embedding(input=input_data, size=(10, 64))
+                paddle.static.nn.embedding(input=input_data, size=(10, 64))
 
             self.assertRaises(TypeError, test_Variable)
 
             def test_input_dtype():
                 # the input dtype must be int64
                 input = fluid.data(name='x1', shape=[4, 6], dtype='float32')
-                fluid.embedding(input=input, size=(10, 64))
+                paddle.static.nn.embedding(input=input, size=(10, 64))
 
             self.assertRaises(TypeError, test_input_dtype)
 
             def test_param_dtype():
                 # dtype must be float32 or float64
                 input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64')
-                fluid.embedding(input=input2, size=(10, 64), dtype='int64')
+                paddle.static.nn.embedding(
+                    input=input2, size=(10, 64), dtype='int64'
+                )
 
             self.assertRaises(TypeError, test_param_dtype)
             input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64')
-            fluid.embedding(input=input3, size=(10, 64), dtype='float16')
+            paddle.static.nn.embedding(
+                input=input3, size=(10, 64), dtype='float16'
+            )
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
index a49ad6d327d..e68b88ef860 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
@@ -162,13 +162,13 @@ class TestOneHotOpApi(unittest.TestCase):
             [np.random.randint(0, depth - 1) for i in range(6)]
         ).reshape([6, 1])
         with fluid.dygraph.guard():
-            one_hot_label = fluid.one_hot(
-                input=fluid.dygraph.to_variable(label), depth=depth
+            one_hot_label = paddle.nn.functional.one_hot(
+                x=fluid.dygraph.to_variable(label), num_classes=depth
             )
 
     def _run(self, depth):
         label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
-        one_hot_label = fluid.one_hot(input=label, depth=depth)
+        one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth)
 
         place = fluid.XPUPlace(0)
         label_data = np.array(
@@ -196,7 +196,9 @@ class BadInputTestOnehotV2(unittest.TestCase):
                     shape=[4],
                     dtype="float32",
                 )
-                one_hot_label = fluid.one_hot(input=label, depth=4)
+                one_hot_label = paddle.nn.functional.one_hot(
+                    x=label, num_classes=4
+                )
 
             self.assertRaises(TypeError, test_bad_x)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
index 06abe1f76b0..b859c06b7e1 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
@@ -72,7 +72,9 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
         label = fluid.layers.fill_constant(
             shape=[1, 150], value=0.5, dtype='float32'
         )
-        emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32')
+        emb = paddle.static.nn.embedding(
+            input=data, size=(10000, 150), dtype='float32'
+        )
         out = paddle.nn.functional.normalize(x=emb, axis=-1)
 
         cost = paddle.nn.functional.square_error_cost(input=out, label=label)
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 53dd06bd1f5..6c528c68865 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -38,7 +38,7 @@ from .common import prelu  # noqa: F401
 from .common import layer_norm  # noqa: F401
 
 
-from ...fluid.input import embedding  # noqa: F401
+from .common import embedding  # noqa: F401
 from ...fluid.contrib.layers import sparse_embedding  # noqa: F401
 from ...fluid.layers import StaticRNN  # noqa: F401
 
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index c7e0794430e..db68df4ea4e 100644
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -3631,3 +3631,164 @@ def layer_norm(
     )
 
     return helper.append_activation(layer_norm_out)
+
+
+@static_only
+def embedding(
+    input,
+    size,
+    is_sparse=False,
+    is_distributed=False,
+    padding_idx=None,
+    param_attr=None,
+    dtype='float32',
+):
+    r"""
+    :api_attr: Static Graph
+
+    The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
+    It automatically constructs a 2D embedding matrix based on the
+    input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
+
+    The shape of output Tensor is generated by appending an emb_size dimension to the
+    last dimension of the input Tensor shape.
+
+    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
+    otherwise the program will throw an exception and exit.
+
+    .. code-block:: text
+
+        Case 1:
+
+        input is a Tensor. padding_idx = -1
+            input.data = [[1, 3], [2, 4], [4, 127]]
+            input.shape = [3, 2]
+        Given size = [128, 16]
+        output is a Tensor:
+            out.shape = [3, 2, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
+                        [0.345421456, 0.524563927, ..., 0.144534654]],
+
+                        [[0.345249859, 0.124939536, ..., 0.194353745],
+                        [0.945345345, 0.435394634, ..., 0.435345365]],
+
+                        [[0.945345345, 0.435394634, ..., 0.435345365],
+                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
+        It will pad all-zero data when ids is 127.
+
+        Case 2:
+
+        input is a LoDTensor with 1-level LoD. padding_idx = 0
+            input.lod = [[2, 3]]
+            input.data = [[1], [3], [2], [4], [0]]
+            input.shape = [5, 1]
+        Given size = [128, 16]
+        output is a LoDTensor:
+            out.lod = [[2, 3]]
+            out.shape = [5, 1, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
+                        [[0.345421456, 0.524563927, ..., 0.144534654]],
+                        [[0.345249859, 0.124939536, ..., 0.194353745]],
+                        [[0.945345345, 0.435394634, ..., 0.435345365]],
+                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        It will pad all-zero data when ids is 0.
+
+
+    Args:
+        input(Tensor): A Tensor or LoDTensor with type int64, which contains the id information.
+            The value of the input id should satisfy :math:`0<= id < size[0]` .
+        size(tuple|list): The shape of lookup table parameter. It should have two elements which
+            indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
+        is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
+            affects the performance of the backwards gradient update. It is recommended to set
+            True because sparse update is faster. But some optimizer does not support sparse update
+            In these case, is_sparse must be False. Default: False.
+        is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
+            in multi-machine distributed CPU training. Default: False.
+        padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
+            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
+            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
+            encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
+            If set None, it makes no effect to output. Default: None.
+        param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
+            default weight parameter property is used. In addition,
+            user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
+            The local word vector needs to be transformed into numpy format, and the shape of local word
+            vector should be consistent with :attr:`size` .
+        dtype(str): It refers to the data type of output Tensor.
+            It must be float32 or float64. Default: float32.
+
+    Returns:
+        Tensor: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
+
+    Static Examples:
+        .. code-block:: python
+
+            import paddle
+            import numpy as np
+            paddle.enable_static()
+
+            x = paddle.static.data(name="x", shape = [2, 4], dtype=np.int64)
+            output = paddle.static.nn.embedding(x, (10, 3),
+                        param_attr=paddle.nn.initializer.Constant(value=1.0))
+            m_output=paddle.mean(output)
+            place = paddle.CPUPlace()
+            exe = paddle.static.Executor(place)
+            exe.run(paddle.static.default_startup_program())
+
+            x = np.array([[7, 2, 4, 5],[4, 3, 2, 9]], dtype=np.int64)
+
+            # x is a Numpy.
+            # x.data = [[7, 2, 4, 5], [4, 3, 2, 9]]
+            # x.shape = [2, 4]
+
+            out, = exe.run(paddle.static.default_main_program(), feed={'x':x}, fetch_list=[output])
+
+            # out is a Numpy.
+            # out.data = [[1., 1., 1.],
+            #             [1., 1., 1.],
+            #             [1., 1., 1.],
+            #             [1., 1., 1.]],
+            #
+            #            [[1., 1., 1.],
+            #             [1., 1., 1.],
+            #             [1., 1., 1.],
+            #             [0., 0., 0.]]]
+            # out.shape = [2, 4, 3]
+    """
+
+    helper = LayerHelper('embedding', **locals())
+    check_variable_and_dtype(input, 'input', ['int64'], 'embedding')
+    check_dtype(
+        dtype,
+        'dtype',
+        ['float16', 'float32', 'float64', 'uint16'],
+        'embedding',
+    )
+    remote_prefetch = is_sparse and (not is_distributed)
+    if remote_prefetch:
+        assert is_sparse is True and is_distributed is False
+    w = helper.create_parameter(
+        attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False
+    )
+    tmp = helper.create_variable_for_type_inference(dtype)
+    padding_idx = (
+        -1
+        if padding_idx is None
+        else padding_idx
+        if padding_idx >= 0
+        else (size[0] + padding_idx)
+    )
+    helper.append_op(
+        type='lookup_table_v2',
+        inputs={'Ids': input, 'W': w},
+        outputs={'Out': tmp},
+        attrs={
+            'is_sparse': is_sparse,
+            'is_distributed': is_distributed,
+            'remote_prefetch': remote_prefetch,
+            'padding_idx': padding_idx,
+        },
+    )
+    return tmp
-- 
GitLab