From 6ccc2a40aa65a3b56563ff932da77fff2005d4fe Mon Sep 17 00:00:00 2001
From: Yanxing Shi <48111042+Yanxing-Shi@users.noreply.github.com>
Date: Thu, 14 Oct 2021 19:17:04 +0800
Subject: [PATCH] add sparse_embedding doc (#36283)

* add sparse_embedding doc

* delete wrong space

* fix error for sample code

* fix error for doc compile

* delete __all__

* modify sample code
---
 python/paddle/fluid/contrib/layers/nn.py | 111 ++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py
index eb2c94b2010..99ede353c10 100644
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -971,12 +971,121 @@ def sparse_embedding(input,
                      table_class="CommonSparseTable",
                      param_attr=None,
                      dtype='float32'):
+    r"""
+    :api_attr: Static Graph
+
+    The OP is used as the operator of the Embedding Lookup layer in the large-scale 
+    sparse training of the parameter server mode, instead of using the paddle.nn.functional.embedding.
+
+    The operator is used to lookup embeddings vector of ids provided by :attr:`input` . 
+    It automatically constructs a 2D embedding matrix based on the input :attr:`size` 
+    (vocab_size, emb_size) and :attr:`dtype` .
+
+    The shape of output Tensor is generated by appending an emb_size dimension to the
+    last dimension of the input Tensor shape.
+
+    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise 
+    the program will throw an exception and exit.
+
+    .. code-block:: text
+
+        Case 1:
+
+        input is a Tensor. padding_idx = -1
+            input.data = [[1, 3], [2, 4], [4, 127]]
+            input.shape = [3, 2]
+        Given size = [128, 16]
+        output is a Tensor:
+            out.shape = [3, 2, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
+                        [0.345421456, 0.524563927, ..., 0.144534654]],
+
+                        [[0.345249859, 0.124939536, ..., 0.194353745],
+                        [0.945345345, 0.435394634, ..., 0.435345365]],
+                        
+                        [[0.945345345, 0.435394634, ..., 0.435345365],
+                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
+        It will pad all-zero data when ids is 127.
+        
+        Case 2:
+
+        input is a LoDTensor with 1-level LoD. padding_idx = 0
+            input.lod = [[2, 3]]
+            input.data = [[1], [3], [2], [4], [0]]
+            input.shape = [5, 1]
+        Given size = [128, 16]
+        output is a LoDTensor:
+            out.lod = [[2, 3]]
+            out.shape = [5, 1, 16]
+            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
+                        [[0.345421456, 0.524563927, ..., 0.144534654]],
+                        [[0.345249859, 0.124939536, ..., 0.194353745]],
+                        [[0.945345345, 0.435394634, ..., 0.435345365]],
+                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
+        It will pad all-zero data when ids is 0.
+
+    Args:
+        input(Variable): A Tensor or LoDTensor with type int64, which contains the id 
+            information. The value of the input id should satisfy :math:`0<= id < size[0]` .
+        size(tuple|list): The shape of lookup table parameter (vocab_size, emb_size). It 
+            should have two elements which indicates the size of the dictionary of embeddings 
+            and the size of each embedding vector respectively. The initial parameter size 
+            is 0 in the large-scale sparse scenario, which will gradually expand with the 
+            training. So if vocab_size is temporarily useless, its value can be any integer.
+            The emb_size is the dimensional configuration of the word embedding weight parameter.
+        padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-vocab_size, vocab_size). 
+            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
+            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever 
+            lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated 
+            while training. If set None, it makes no efe mfect to output. Default: None.
+        is_test(bool, optional): Training or prediction mode. In prediction mode (is_test=False), 
+            the output is not initialized and created, and it is filled with 0 and returned. Default: False.
+        entry(str, optional): Entry config with parameter server whose value is ProbabilityEntry, 
+            CountFilterEntry or None. Default: None.
+        table_class(str, optional): The type of the sparse table. The value can be CommonSparseTable 
+            or SSDSparseTable. The default is CommonSparseTable.
+        param_attr(ParamAttr, optional): To specify the weight parameter property. Default: None, which means the
+            default weight parameter property is used. In addition, user-defined or pre-trained word 
+            vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs 
+            to be transformed into numpy format, and the shape of local word vector should be consistent 
+            with :attr:`size` .
+        dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. It must be float32 or 
+            float64. Default: float32.
+            
+    Returns:
+        Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
+    
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            
+            paddle.enable_static()
+            sparse_feature_dim = 1024
+            embedding_size = 64
+
+            # Only when the feature appear more than 10 times or more will be participated in the training.
+            entry = paddle.distributed.CountFilterEntry(10)
+
+            input = paddle.static.data(name='ins', shape=[1], dtype='int64')
+        
+            emb = paddle.static.nn.sparse_embedding(
+                input=input,
+                size=[sparse_feature_dim, embedding_size],
+                is_test=False,
+                entry=entry,
+                param_attr=paddle.ParamAttr(name="SparseFeatFactors",
+                initializer=paddle.nn.initializer.Uniform()))
+
+    """
+
     helper = LayerHelper('sparse_embedding', **locals())
 
     check_variable_and_dtype(input, 'input', ['int64'],
                              'fluid.contrib.layers.sparse_embedding')
 
-    check_dtype(dtype, 'dtype', ['float32'],
+    check_dtype(dtype, 'dtype', ['float32', 'float64'],
                 'paddle.static.nn.sparse_embedding')
 
     w = helper.create_parameter(
-- 
GitLab