diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index eb2c94b20106c5699fc62f65a815bb46bd1057e2..ded963b3a39c74a15c288dcbe4c1281678e9a3b8 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -971,12 +971,121 @@ def sparse_embedding(input, table_class="CommonSparseTable", param_attr=None, dtype='float32'): + r""" + :api_attr: Static Graph + + The OP is used as the operator of the Embedding Lookup layer in the large-scale + sparse training of the parameter server mode, instead of using the paddle.nn.functional.embedding. + + The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the input :attr:`size` + (vocab_size, emb_size) and :attr:`dtype` . + + The shape of output Tensor is generated by appending an emb_size dimension to the + last dimension of the input Tensor shape. + + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise + the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[1, 3], [2, 4], [4, 127]] + input.shape = [3, 2] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Case 2: + + input is a LoDTensor with 1-level LoD. padding_idx = 0 + input.lod = [[2, 3]] + input.data = [[1], [3], [2], [4], [0]] + input.shape = [5, 1] + Given size = [128, 16] + output is a LoDTensor: + out.lod = [[2, 3]] + out.shape = [5, 1, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]], + [[0.345421456, 0.524563927, ..., 0.144534654]], + [[0.345249859, 0.124939536, ..., 0.194353745]], + [[0.945345345, 0.435394634, ..., 0.435345365]], + [[0.0, 0.0, ..., 0.0 ]]] # padding data + It will pad all-zero data when ids is 0. + + Args: + input(Variable): A Tensor or LoDTensor with type int64, which contains the id + information. The value of the input id should satisfy :math:`0<= id < size[0]` . + size(tuple|list): The shape of lookup table parameter (vocab_size, emb_size). It + should have two elements which indicates the size of the dictionary of embeddings + and the size of each embedding vector respectively. The initial parameter size + is 0 in the large-scale sparse scenario, which will gradually expand with the + training. So if vocab_size is temporarily useless, its value can be any integer. + The emb_size is the dimensional configuration of the word embedding weight parameter. + padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever + lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated + while training. If set None, it makes no efe mfect to output. Default: None. + is_test(bool, optional): Training or prediction mode. In prediction mode (is_test=False), + the output is not initialized and created, and it is filled with 0 and returned. Default: False. + entry(str, optional): Entry config with parameter server whose value is ProbabilityEntry, + CountFilterEntry or None. Default: None. + table_class(str, optional): The type of the sparse table. The value can be CommonSparseTable + or SSDSparseTable. The default is CommonSparseTable. + param_attr(ParamAttr, optional): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. In addition, user-defined or pre-trained word + vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs + to be transformed into numpy format, and the shape of local word vector should be consistent + with :attr:`size` . + dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. It must be float32 or + float64. Default: float32. + + Returns: + Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . + + Examples: + .. code-block:: python + + import paddle + + paddle.enable_static() + sparse_feature_dim = 1024 + embedding_size = 64 + + # Only when the feature appear more than 10 times or more will be participated in the training. + entry = paddle.distributed.CountFilterEntry(10) + + input = paddle.static.data(name='ins', shape=[1], dtype='int64') + + emb = paddle.static.nn.sparse_embedding( + input=input, + size=[sparse_feature_dim, embedding_size], + is_test=False, + entry=entry, + param_attr=paddle.ParamAttr(name="SparseFeatFactors", + initializer=paddle.nn.initializer.Uniform())) + + """ + helper = LayerHelper('sparse_embedding', **locals()) check_variable_and_dtype(input, 'input', ['int64'], 'fluid.contrib.layers.sparse_embedding') - check_dtype(dtype, 'dtype', ['float32'], + check_dtype(dtype, 'dtype', ['float32', 'float64'], 'paddle.static.nn.sparse_embedding') w = helper.create_parameter(