diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 1eefbe537c89aa94abfef6df5f1bd21683bf8efe..1ea6861dde0c740c1267367ec7bb51141774c86e 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -672,7 +672,7 @@ paddle.fluid.dygraph.BatchNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'inc paddle.fluid.dygraph.BatchNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.BatchNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.BatchNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.Embedding ('paddle.fluid.dygraph.nn.Embedding', ('document', 'b1b1ed9dc2125c3e16ee08113605fcb4')) +paddle.fluid.dygraph.Embedding ('paddle.fluid.dygraph.nn.Embedding', ('document', 'dec90fba70f93ff5dac2f4ed0704dbdd')) paddle.fluid.dygraph.Embedding.__init__ (ArgSpec(args=['self', 'name_scope', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Embedding.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.Embedding.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 6a4f80a5605dd10bf6e805d666f9945bcd7efe52..7af0be58387960c264b32472ddeae3ab5a7ea68b 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1357,29 +1357,72 @@ class Embedding(layers.Layer): """ **Embedding Layer** - This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in - a lookup table. The result of this lookup is the embedding of each ID in the - :attr:`input`. - All the input variables are passed in as local variables to the LayerHelper constructor + This interface is used to construct a callable object of the ``Embedding`` class. + For specific usage, refer to code examples. It implements the function of the Embedding Layer. + This layer is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the + input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + + This layer requires the last dimension of Tensor shape must be equal to 1. The shape + of output Tensor is generated by replacing the last dimension of the input Tensor shape + with emb_size. + + The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[[1], [3]], [[2], [4]], [[4], [127]]] + input.shape = [3, 2, 1] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. Parameters: name_scope(str): The name of this class. size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. Default: False - is_distributed(bool): Whether to run lookup table from remote parameter server. Default: False. - padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. - Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters - it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. Default: None. - param_attr(ParamAttr): Parameters for this layer. Default: None. - dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc. Default: 'float32'. + is_sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizer does not support sparse update, + such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , + :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , + :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . + In these case, is_sparse must be False. Default: False. + is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used + in multi-machine distributed CPU training. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. + The local word vector needs to be transformed into numpy format, and the shape of local word + vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example 2 for details. + dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor. + It must be "float32" or "float64". Default: "float32". - Attributes: - weight (Parameter): the learnable weights of this layer. + Attribute: + **weight** (Parameter): the learnable weights of this layer. Returns: - Variable: The tensor variable storing the embeddings of the \ - supplied inputs. + Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . Examples: @@ -1389,6 +1432,7 @@ class Embedding(layers.Layer): import paddle.fluid.dygraph.base as base import numpy as np + # example 1 inp_word = np.array([[[1]]]).astype('int64') dict_size = 20 with fluid.dygraph.guard(): @@ -1398,6 +1442,21 @@ class Embedding(layers.Layer): param_attr='emb.w', is_sparse=False) static_rlt3 = emb(base.to_variable(inp_word)) + + # example 2: load custom or pre-trained word vectors + weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format + w_param_attrs = fluid.ParamAttr( + name="emb_weight", + learning_rate=0.5, + initializer=fluid.initializer.NumpyArrayInitializer(weight_data), + trainable=True) + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( + name_scope='embedding', + size=[128, 100], + param_attr= w_param_attrs, + is_sparse=False) + static_rlt3 = emb(base.to_variable(inp_word)) """ def __init__(self,