From 5dec254be5cc33f2220932dc8410591b5348e973 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Tue, 8 Sep 2020 13:53:17 +0800 Subject: [PATCH] fix weight (#26986) * fix weight * fix weight and fix doc * fix embeeding padding idx * add UT * fix interval --- .../test_nn_functional_embedding_dygraph.py | 47 +++++++++--- .../test_nn_functional_embedding_static.py | 7 +- python/paddle/nn/functional/input.py | 21 ++++-- python/paddle/nn/layer/common.py | 73 ++++++++++++------- 4 files changed, 104 insertions(+), 44 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py index e0edf90193..43a0d481b2 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py @@ -16,20 +16,49 @@ from __future__ import print_function import unittest +import paddle +import paddle.nn as nn +import numpy as np + +paddle.disable_static() + class EmbeddingDygraph(unittest.TestCase): def test_1(self): - import paddle - import paddle.nn as nn - import numpy as np - paddle.disable_static() + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + embedding = paddle.nn.Embedding(10, 3, sparse=True) + + w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32) + embedding.weight.set_value(w0) + + adam = paddle.optimizer.Adam( + parameters=[embedding.weight], learning_rate=0.01) + adam.clear_grad() + + out = embedding(x) + out.backward() + adam.step() + + def test_2(self): + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(10, 3, padding_idx=11, sparse=True) - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(-1, 3, sparse=True) - emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False) + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(10, -3, sparse=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py index c9c91ceb39..4af0cce12b 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase): dtype="int32") emb = functional.embedding( - x=label, weight=weight, sparse=True, name="embedding") + x=label, + weight=weight, + padding_idx=129, + sparse=True, + name="embedding") + with self.assertRaises(ValueError): test_bad_x() diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index bc48cc21c2..0794b95c80 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None): def embedding(x, weight, padding_idx=None, sparse=False, name=None): """ - The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + The operator is used to lookup embeddings vector of ids provided by :attr:`x` . The shape of output Tensor is generated by appending the last dimension of the input Tensor shape with embedding size. - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` , + + **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` , otherwise the program will throw an exception and exit. .. code-block:: text Case 1: - input is a Tensor. + x is a Tensor. padding_idx = -1 x.data = [[1, 3], [2, 4], [4, 127]] x.shape = [3, 2] @@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): [0.0, 0.0, ..., 0.0 ]]] # padding data The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. + It will pad all-zero data when id is 127. Args: x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should @@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . - In these cases, is_sparse must be False. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + In these cases, sparse must be False. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. If set None, it makes no effect to output. Default: None. name(str|None): For detailed information, please refer @@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): None by default. Returns: - Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`. + Tensor: Embedding Tensor mapped by x. The data type is the same as :attr:`weight`. Examples: @@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( weight.shape[0] + padding_idx) + if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]: + raise ValueError("padding_idx must be within [-{}, {})".format( + weight.shape[0], weight.shape[0])) + helper.append_op( type='lookup_table_v2', inputs={'Ids': x, diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index a1923542c4..433443fee1 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -1564,22 +1564,18 @@ class CosineSimilarity(layers.Layer): class Embedding(layers.Layer): """ - :alias_main: paddle.nn.Embedding - :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding - :old_api: paddle.fluid.dygraph.Embedding - **Embedding Layer** This interface is used to construct a callable object of the ``Embedding`` class. For specific usage, refer to code examples. It implements the function of the Embedding Layer. - This layer is used to lookup embeddings vector of ids provided by :attr:`input` . + This layer is used to lookup embeddings vector of ids provided by :attr:`x` . It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + input :attr:`num_embeddings` and attr:`embedding_dim`. The shape of output Tensor is generated by appending an emb_size dimension to the last dimension of the input Tensor shape. - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` , otherwise the program will throw an exception and exit. .. code-block:: text @@ -1607,7 +1603,7 @@ class Embedding(layers.Layer): num_embeddings (int): Just one element which indicate the size of the dictionary of embeddings. embedding_dim: Just one element which indicate the size of each embedding vector respectively. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. @@ -1618,13 +1614,13 @@ class Embedding(layers.Layer): such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . - In these case, is_sparse must be False. Default: False. + In these case, sparse must be False. Default: False. weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition, user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` - is used to load custom or pre-trained word vectors. See code example 2 for details. + vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example for details. name(str|None): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -1639,20 +1635,34 @@ class Embedding(layers.Layer): .. code-block:: python - import paddle - import paddle.nn as nn - import numpy as np - paddle.disable_static() + import paddle + import numpy as np + + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + embedding = paddle.nn.Embedding(10, 3, sparse=True) + + w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32) + embedding.weight.set_value(w0) - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 + adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01) + adam.clear_grad() + + # weight.shape = [10, 3] + + # x.data = [[3],[4],[5]] + # x.shape = [3, 1] + + # out.data = [[2,2,2], [2,2,2], [2,2,2]] + # out.shape = [3, 1, 3] + out=embedding(x) + out.backward() + adam.step() - emb = nn.Embedding( - dict_size, - 32, - sparse=False) """ def __init__(self, @@ -1669,13 +1679,24 @@ class Embedding(layers.Layer): self._is_distributed = False self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( num_embeddings + padding_idx) + + if self._num_embeddings <= 0: + raise ValueError("num_embeddings must be gather than 0") + + if self._embedding_dim <= 0: + raise ValueError("embedding_dim must be gather than 0") + + if self._padding_idx >= num_embeddings or self._padding_idx < -num_embeddings: + raise ValueError("padding_idx must be within [-{}, {})".format( + num_embeddings, num_embeddings)) + self._dtype = self._helper.get_default_dtype() self._size = [self._num_embeddings, self._embedding_dim] self._weight_attr = weight_attr self._remote_prefetch = False self._name = name - self._weight = self.create_parameter( + self.weight = self.create_parameter( attr=self._weight_attr, shape=self._size, dtype=self._dtype, @@ -1684,7 +1705,7 @@ class Embedding(layers.Layer): def forward(self, x): return F.embedding( x, - weight=self._weight, + weight=self.weight, padding_idx=self._padding_idx, sparse=self._sparse, name=self._name) -- GitLab