未验证 提交 7a639606 编写于 作者: T tangwei12 提交者: GitHub

Release/2.0 beta fix emb (#27150)

* fix weight

* fix weight and fix doc

* fix embeeding padding idx

* add UT

* fix interval
上级 29861846
...@@ -16,20 +16,49 @@ from __future__ import print_function ...@@ -16,20 +16,49 @@ from __future__ import print_function
import unittest import unittest
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
class EmbeddingDygraph(unittest.TestCase): class EmbeddingDygraph(unittest.TestCase):
def test_1(self): def test_1(self):
import paddle x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
import paddle.nn as nn y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
import numpy as np paddle.disable_static(paddle.CPUPlace())
paddle.disable_static() x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)
embedding = paddle.nn.Embedding(10, 3, sparse=True)
w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
embedding.weight.set_value(w0)
adam = paddle.optimizer.Adam(
parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad()
out = embedding(x)
out.backward()
adam.step()
def test_2(self):
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)
with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(10, 3, padding_idx=11, sparse=True)
# example 1 with self.assertRaises(ValueError):
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') embedding = paddle.nn.Embedding(-1, 3, sparse=True)
inp_word.shape # [2, 3]
dict_size = 20
emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False) with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(10, -3, sparse=True)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase): ...@@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase):
dtype="int32") dtype="int32")
emb = functional.embedding( emb = functional.embedding(
x=label, weight=weight, sparse=True, name="embedding") x=label,
weight=weight,
padding_idx=129,
sparse=True,
name="embedding")
with self.assertRaises(ValueError):
test_bad_x() test_bad_x()
......
...@@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None): ...@@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None):
def embedding(x, weight, padding_idx=None, sparse=False, name=None): def embedding(x, weight, padding_idx=None, sparse=False, name=None):
""" """
The operator is used to lookup embeddings vector of ids provided by :attr:`input` . The operator is used to lookup embeddings vector of ids provided by :attr:`x` .
The shape of output Tensor is generated by appending the last dimension of the input Tensor shape The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
with embedding size. with embedding size.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` ,
**Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
otherwise the program will throw an exception and exit. otherwise the program will throw an exception and exit.
.. code-block:: text .. code-block:: text
Case 1: Case 1:
input is a Tensor. x is a Tensor.
padding_idx = -1 padding_idx = -1
x.data = [[1, 3], [2, 4], [4, 127]] x.data = [[1, 3], [2, 4], [4, 127]]
x.shape = [3, 2] x.shape = [3, 2]
...@@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): ...@@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
[0.0, 0.0, ..., 0.0 ]]] # padding data [0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127. It will pad all-zero data when id is 127.
Args: Args:
x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
...@@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): ...@@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these cases, is_sparse must be False. Default: False. In these cases, sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None. If set None, it makes no effect to output. Default: None.
name(str|None): For detailed information, please refer name(str|None): For detailed information, please refer
...@@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): ...@@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
None by default. None by default.
Returns: Returns:
Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`. Tensor: Embedding Tensor mapped by x. The data type is the same as :attr:`weight`.
Examples: Examples:
...@@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): ...@@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
weight.shape[0] + padding_idx) weight.shape[0] + padding_idx)
if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]:
raise ValueError("padding_idx must be within [-{}, {})".format(
weight.shape[0], weight.shape[0]))
helper.append_op( helper.append_op(
type='lookup_table_v2', type='lookup_table_v2',
inputs={'Ids': x, inputs={'Ids': x,
......
...@@ -1551,22 +1551,18 @@ class CosineSimilarity(layers.Layer): ...@@ -1551,22 +1551,18 @@ class CosineSimilarity(layers.Layer):
class Embedding(layers.Layer): class Embedding(layers.Layer):
""" """
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding
**Embedding Layer** **Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class. This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer. For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` . This layer is used to lookup embeddings vector of ids provided by :attr:`x` .
It automatically constructs a 2D embedding matrix based on the It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . input :attr:`num_embeddings` and attr:`embedding_dim`.
The shape of output Tensor is generated by appending an emb_size dimension to the The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape. last dimension of the input Tensor shape.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` ,
otherwise the program will throw an exception and exit. otherwise the program will throw an exception and exit.
.. code-block:: text .. code-block:: text
...@@ -1594,7 +1590,7 @@ class Embedding(layers.Layer): ...@@ -1594,7 +1590,7 @@ class Embedding(layers.Layer):
num_embeddings (int): Just one element which indicate the size num_embeddings (int): Just one element which indicate the size
of the dictionary of embeddings. of the dictionary of embeddings.
embedding_dim: Just one element which indicate the size of each embedding vector respectively. embedding_dim: Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
...@@ -1605,13 +1601,13 @@ class Embedding(layers.Layer): ...@@ -1605,13 +1601,13 @@ class Embedding(layers.Layer):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False. In these case, sparse must be False. Default: False.
weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details. is used to load custom or pre-trained word vectors. See code example for details.
name(str|None): For detailed information, please refer name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
...@@ -1626,20 +1622,34 @@ class Embedding(layers.Layer): ...@@ -1626,20 +1622,34 @@ class Embedding(layers.Layer):
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn as nn import numpy as np
import numpy as np
paddle.disable_static() x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)
embedding = paddle.nn.Embedding(10, 3, sparse=True)
w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32)
embedding.weight.set_value(w0)
# example 1 adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') adam.clear_grad()
inp_word.shape # [2, 3]
dict_size = 20 # weight.shape = [10, 3]
# x.data = [[3],[4],[5]]
# x.shape = [3, 1]
# out.data = [[2,2,2], [2,2,2], [2,2,2]]
# out.shape = [3, 1, 3]
out=embedding(x)
out.backward()
adam.step()
emb = nn.Embedding(
dict_size,
32,
sparse=False)
""" """
def __init__(self, def __init__(self,
...@@ -1656,13 +1666,24 @@ class Embedding(layers.Layer): ...@@ -1656,13 +1666,24 @@ class Embedding(layers.Layer):
self._is_distributed = False self._is_distributed = False
self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
num_embeddings + padding_idx) num_embeddings + padding_idx)
if self._num_embeddings <= 0:
raise ValueError("num_embeddings must be gather than 0")
if self._embedding_dim <= 0:
raise ValueError("embedding_dim must be gather than 0")
if self._padding_idx >= num_embeddings or self._padding_idx < -num_embeddings:
raise ValueError("padding_idx must be within [-{}, {})".format(
num_embeddings, num_embeddings))
self._dtype = self._helper.get_default_dtype() self._dtype = self._helper.get_default_dtype()
self._size = [self._num_embeddings, self._embedding_dim] self._size = [self._num_embeddings, self._embedding_dim]
self._weight_attr = weight_attr self._weight_attr = weight_attr
self._remote_prefetch = False self._remote_prefetch = False
self._name = name self._name = name
self._weight = self.create_parameter( self.weight = self.create_parameter(
attr=self._weight_attr, attr=self._weight_attr,
shape=self._size, shape=self._size,
dtype=self._dtype, dtype=self._dtype,
...@@ -1671,7 +1692,7 @@ class Embedding(layers.Layer): ...@@ -1671,7 +1692,7 @@ class Embedding(layers.Layer):
def forward(self, x): def forward(self, x):
return F.embedding( return F.embedding(
x, x,
weight=self._weight, weight=self.weight,
padding_idx=self._padding_idx, padding_idx=self._padding_idx,
sparse=self._sparse, sparse=self._sparse,
name=self._name) name=self._name)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册