fix weight (#26986)

* fix weight * fix weight and fix doc * fix embeeding padding idx * add UT * fix interval

fix weight (#26986)
* fix weight * fix weight and fix doc * fix embeeding padding idx * add UT * fix interval
5dec254b · tangwei12 · GitHub · dc00bd67 · 5dec254b · 5dec254b
4 changed file
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
@@ -16,20 +16,49 @@ from __future__ import print_function

 import unittest

+import paddle
+import paddle.nn as nn
+import numpy as np
+
+paddle.disable_static()
+

 class EmbeddingDygraph(unittest.TestCase):
    def test_1(self):
-        import paddle
-        import paddle.nn as nn
-        import numpy as np
-        paddle.disable_static()
+        x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
+        y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
+        paddle.disable_static(paddle.CPUPlace())
+        x = paddle.to_tensor(x_data, stop_gradient=False)
+        y = paddle.to_tensor(y_data, stop_gradient=False)
+
+        embedding = paddle.nn.Embedding(10, 3, sparse=True)
+
+        w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
+        embedding.weight.set_value(w0)
+
+        adam = paddle.optimizer.Adam(
+            parameters=[embedding.weight], learning_rate=0.01)
+        adam.clear_grad()
+
+        out = embedding(x)
+        out.backward()
+        adam.step()
+
+    def test_2(self):
+        x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
+        y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
+        paddle.disable_static(paddle.CPUPlace())
+        x = paddle.to_tensor(x_data, stop_gradient=False)
+        y = paddle.to_tensor(y_data, stop_gradient=False)
+
+        with self.assertRaises(ValueError):
+            embedding = paddle.nn.Embedding(10, 3, padding_idx=11, sparse=True)

-        # example 1
-        inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
-        inp_word.shape  # [2, 3]
-        dict_size = 20
+        with self.assertRaises(ValueError):
+            embedding = paddle.nn.Embedding(-1, 3, sparse=True)

-        emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False)
+        with self.assertRaises(ValueError):
+            embedding = paddle.nn.Embedding(10, -3, sparse=True)


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
@@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase):
                    dtype="int32")

                emb = functional.embedding(
-                    x=label, weight=weight, sparse=True, name="embedding")
+                    x=label,
+                    weight=weight,
+                    padding_idx=129,
+                    sparse=True,
+                    name="embedding")

+        with self.assertRaises(ValueError):
            test_bad_x()



--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None):

 def embedding(x, weight, padding_idx=None, sparse=False, name=None):
    """
-    The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
+    The operator is used to lookup embeddings vector of ids provided by :attr:`x` .

    The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
    with embedding size.
-    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` ,
+
+    **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
    otherwise the program will throw an exception and exit.

    .. code-block:: text

        Case 1:
-            input is a Tensor. 
+            x is a Tensor.
                padding_idx = -1
                x.data = [[1, 3], [2, 4], [4, 127]]
                x.shape = [3, 2]
@@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
                            [0.0,         0.0,         ..., 0.0        ]]]  # padding data

            The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
-            It will pad all-zero data when ids is 127.
+            It will pad all-zero data when id is 127.

    Args:
        x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
@@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
            such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
            :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
            :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
-            In these cases, is_sparse must be False. Default: False.
-        padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
+            In these cases, sparse must be False. Default: False.
+        padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
-            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
+            to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup
            encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
            If set None, it makes no effect to output. Default: None.
        name(str|None): For detailed information, please refer
@@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
           None by default.

    Returns:
-        Tensor: Embedding Tensor  mapped by input. The data type is the same as :attr:`weight`.
+        Tensor: Embedding Tensor  mapped by x. The data type is the same as :attr:`weight`.

    Examples:

@@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
        padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
            weight.shape[0] + padding_idx)

+        if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]:
+            raise ValueError("padding_idx must be within [-{}, {})".format(
+                weight.shape[0], weight.shape[0]))
+
        helper.append_op(
            type='lookup_table_v2',
            inputs={'Ids': x,

--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -1564,22 +1564,18 @@ class CosineSimilarity(layers.Layer):

 class Embedding(layers.Layer):
    """
-    :alias_main: paddle.nn.Embedding
-	:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
-	:old_api: paddle.fluid.dygraph.Embedding
-
    **Embedding Layer**

    This interface is used to construct a callable object of the ``Embedding`` class.
    For specific usage, refer to code examples. It implements the function of the Embedding Layer.
-    This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
+    This layer is used to lookup embeddings vector of ids provided by :attr:`x` .
    It automatically constructs a 2D embedding matrix based on the
-    input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
+    input :attr:`num_embeddings` and attr:`embedding_dim`.

    The shape of output Tensor is generated by appending an emb_size dimension to the
    last dimension of the input Tensor shape.

-    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
+    **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` ,
    otherwise the program will throw an exception and exit.

    .. code-block:: text
@@ -1607,7 +1603,7 @@ class Embedding(layers.Layer):
        num_embeddings (int): Just one element which indicate the size
            of the dictionary of embeddings.
        embedding_dim:  Just one element which indicate the size of each embedding vector respectively.
-        padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
+        padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
            encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
@@ -1618,13 +1614,13 @@ class Embedding(layers.Layer):
            such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
            :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
            :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
-            In these case, is_sparse must be False. Default: False.
+            In these case, sparse must be False. Default: False.
        weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
-            default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
+            default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition,
            user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
            The local word vector needs to be transformed into numpy format, and the shape of local word
-            vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
-            is used to load custom or pre-trained word vectors. See code example 2 for details.
+            vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
+            is used to load custom or pre-trained word vectors. See code example for details.
        name(str|None): For detailed information, please refer
               to :ref:`api_guide_Name`. Usually name is no need to set and
               None by default.
@@ -1639,20 +1635,34 @@ class Embedding(layers.Layer):

        .. code-block:: python

-          import paddle
-          import paddle.nn as nn
-          import numpy as np
-          paddle.disable_static()
+            import paddle
+            import numpy as np
+
+            x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
+            y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
+            paddle.disable_static(paddle.CPUPlace())
+            x = paddle.to_tensor(x_data, stop_gradient=False)
+            y = paddle.to_tensor(y_data, stop_gradient=False)
+
+            embedding = paddle.nn.Embedding(10, 3, sparse=True)
+
+            w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32)
+            embedding.weight.set_value(w0)

-          # example 1
-          inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
-          inp_word.shape  # [2, 3]
-          dict_size = 20
+            adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
+            adam.clear_grad()
+
+            # weight.shape = [10, 3]
+
+            # x.data = [[3],[4],[5]]
+            # x.shape = [3, 1]
+
+            # out.data = [[2,2,2], [2,2,2], [2,2,2]]
+            # out.shape = [3, 1, 3]
+            out=embedding(x)
+            out.backward()
+            adam.step()

-          emb = nn.Embedding(
-                    dict_size,
-                    32,
-                    sparse=False)
    """

    def __init__(self,
@@ -1669,13 +1679,24 @@ class Embedding(layers.Layer):
        self._is_distributed = False
        self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
            num_embeddings + padding_idx)
+
+        if self._num_embeddings <= 0:
+            raise ValueError("num_embeddings must be gather than 0")
+
+        if self._embedding_dim <= 0:
+            raise ValueError("embedding_dim must be gather than 0")
+
+        if self._padding_idx >= num_embeddings or self._padding_idx < -num_embeddings:
+            raise ValueError("padding_idx must be within [-{}, {})".format(
+                num_embeddings, num_embeddings))
+
        self._dtype = self._helper.get_default_dtype()
        self._size = [self._num_embeddings, self._embedding_dim]

        self._weight_attr = weight_attr
        self._remote_prefetch = False
        self._name = name
-        self._weight = self.create_parameter(
+        self.weight = self.create_parameter(
            attr=self._weight_attr,
            shape=self._size,
            dtype=self._dtype,
@@ -1684,7 +1705,7 @@ class Embedding(layers.Layer):
    def forward(self, x):
        return F.embedding(
            x,
-            weight=self._weight,
+            weight=self.weight,
            padding_idx=self._padding_idx,
            sparse=self._sparse,
            name=self._name)