提交 52dcc167 编写于 作者: Z zhongpu 提交者: hong

fix Embedding API for dygraph (#20358)

* fix Embedding API for dygraph, test=develop, test=document_fix

* fix dtype, test=develop, test=document_fix
上级 f855a86c
......@@ -672,7 +672,7 @@ paddle.fluid.dygraph.BatchNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'inc
paddle.fluid.dygraph.BatchNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac'))
paddle.fluid.dygraph.BatchNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.BatchNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Embedding ('paddle.fluid.dygraph.nn.Embedding', ('document', 'b1b1ed9dc2125c3e16ee08113605fcb4'))
paddle.fluid.dygraph.Embedding ('paddle.fluid.dygraph.nn.Embedding', ('document', 'dec90fba70f93ff5dac2f4ed0704dbdd'))
paddle.fluid.dygraph.Embedding.__init__ (ArgSpec(args=['self', 'name_scope', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Embedding.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.Embedding.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
......
......@@ -1357,29 +1357,72 @@ class Embedding(layers.Layer):
"""
**Embedding Layer**
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
a lookup table. The result of this lookup is the embedding of each ID in the
:attr:`input`.
All the input variables are passed in as local variables to the LayerHelper constructor
This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
This layer requires the last dimension of Tensor shape must be equal to 1. The shape
of output Tensor is generated by replacing the last dimension of the input Tensor shape
with emb_size.
The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor. padding_idx = -1
input.data = [[[1], [3]], [[2], [4]], [[4], [127]]]
input.shape = [3, 2, 1]
Given size = [128, 16]
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Parameters:
name_scope(str): The name of this class.
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. Default: False
is_distributed(bool): Whether to run lookup table from remote parameter server. Default: False.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters
it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. Default: None.
param_attr(ParamAttr): Parameters for this layer. Default: None.
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc. Default: 'float32'.
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
in multi-machine distributed CPU training. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details.
dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor.
It must be "float32" or "float64". Default: "float32".
Attributes:
weight (Parameter): the learnable weights of this layer.
Attribute:
**weight** (Parameter): the learnable weights of this layer.
Returns:
Variable: The tensor variable storing the embeddings of the \
supplied inputs.
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
Examples:
......@@ -1389,6 +1432,7 @@ class Embedding(layers.Layer):
import paddle.fluid.dygraph.base as base
import numpy as np
# example 1
inp_word = np.array([[[1]]]).astype('int64')
dict_size = 20
with fluid.dygraph.guard():
......@@ -1398,6 +1442,21 @@ class Embedding(layers.Layer):
param_attr='emb.w',
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
# example 2: load custom or pre-trained word vectors
weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format
w_param_attrs = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
trainable=True)
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding(
name_scope='embedding',
size=[128, 100],
param_attr= w_param_attrs,
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
"""
def __init__(self,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册