Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
5dec254b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5dec254b
编写于
9月 08, 2020
作者:
T
tangwei12
提交者:
GitHub
9月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix weight (#26986)
* fix weight * fix weight and fix doc * fix embeeding padding idx * add UT * fix interval
上级
dc00bd67
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
104 addition
and
44 deletion
+104
-44
python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
...d/tests/unittests/test_nn_functional_embedding_dygraph.py
+38
-9
python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
...id/tests/unittests/test_nn_functional_embedding_static.py
+6
-1
python/paddle/nn/functional/input.py
python/paddle/nn/functional/input.py
+13
-8
python/paddle/nn/layer/common.py
python/paddle/nn/layer/common.py
+47
-26
未找到文件。
python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
浏览文件 @
5dec254b
...
...
@@ -16,20 +16,49 @@ from __future__ import print_function
import
unittest
import
paddle
import
paddle.nn
as
nn
import
numpy
as
np
paddle
.
disable_static
()
class
EmbeddingDygraph
(
unittest
.
TestCase
):
def
test_1
(
self
):
import
paddle
import
paddle.nn
as
nn
import
numpy
as
np
paddle
.
disable_static
()
x_data
=
np
.
arange
(
3
,
6
).
reshape
((
3
,
1
)).
astype
(
np
.
int64
)
y_data
=
np
.
arange
(
6
,
12
).
reshape
((
3
,
2
)).
astype
(
np
.
float32
)
paddle
.
disable_static
(
paddle
.
CPUPlace
())
x
=
paddle
.
to_tensor
(
x_data
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
y_data
,
stop_gradient
=
False
)
embedding
=
paddle
.
nn
.
Embedding
(
10
,
3
,
sparse
=
True
)
w0
=
np
.
full
(
shape
=
(
10
,
3
),
fill_value
=
2
).
astype
(
np
.
float32
)
embedding
.
weight
.
set_value
(
w0
)
adam
=
paddle
.
optimizer
.
Adam
(
parameters
=
[
embedding
.
weight
],
learning_rate
=
0.01
)
adam
.
clear_grad
()
out
=
embedding
(
x
)
out
.
backward
()
adam
.
step
()
def
test_2
(
self
):
x_data
=
np
.
arange
(
3
,
6
).
reshape
((
3
,
1
)).
astype
(
np
.
int64
)
y_data
=
np
.
arange
(
6
,
12
).
reshape
((
3
,
2
)).
astype
(
np
.
float32
)
paddle
.
disable_static
(
paddle
.
CPUPlace
())
x
=
paddle
.
to_tensor
(
x_data
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
y_data
,
stop_gradient
=
False
)
with
self
.
assertRaises
(
ValueError
):
embedding
=
paddle
.
nn
.
Embedding
(
10
,
3
,
padding_idx
=
11
,
sparse
=
True
)
# example 1
inp_word
=
np
.
array
([[
2
,
3
,
5
],
[
4
,
2
,
1
]]).
astype
(
'int64'
)
inp_word
.
shape
# [2, 3]
dict_size
=
20
with
self
.
assertRaises
(
ValueError
):
embedding
=
paddle
.
nn
.
Embedding
(
-
1
,
3
,
sparse
=
True
)
emb
=
nn
.
Embedding
(
dict_size
,
32
,
weight_attr
=
'emb.w'
,
sparse
=
False
)
with
self
.
assertRaises
(
ValueError
):
embedding
=
paddle
.
nn
.
Embedding
(
10
,
-
3
,
sparse
=
True
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
浏览文件 @
5dec254b
...
...
@@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase):
dtype
=
"int32"
)
emb
=
functional
.
embedding
(
x
=
label
,
weight
=
weight
,
sparse
=
True
,
name
=
"embedding"
)
x
=
label
,
weight
=
weight
,
padding_idx
=
129
,
sparse
=
True
,
name
=
"embedding"
)
with
self
.
assertRaises
(
ValueError
):
test_bad_x
()
...
...
python/paddle/nn/functional/input.py
浏览文件 @
5dec254b
...
...
@@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None):
def
embedding
(
x
,
weight
,
padding_idx
=
None
,
sparse
=
False
,
name
=
None
):
"""
The operator is used to lookup embeddings vector of ids provided by :attr:`
input
` .
The operator is used to lookup embeddings vector of ids provided by :attr:`
x
` .
The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
with embedding size.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` ,
**Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor.
x is a Tensor.
padding_idx = -1
x.data = [[1, 3], [2, 4], [4, 127]]
x.shape = [3, 2]
...
...
@@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when id
s
is 127.
It will pad all-zero data when id is 127.
Args:
x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
...
...
@@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these cases,
is_
sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-
vocab_size, vocab_size
).
In these cases, sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-
weight.shape[0], weight.shape[0]
).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`
vocab\_size
+ padding\_idx` . It will output all-zero padding data whenever lookup
to :math:`
weight.shape[0]
+ padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
name(str|None): For detailed information, please refer
...
...
@@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
None by default.
Returns:
Tensor: Embedding Tensor mapped by
input
. The data type is the same as :attr:`weight`.
Tensor: Embedding Tensor mapped by
x
. The data type is the same as :attr:`weight`.
Examples:
...
...
@@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
padding_idx
=
-
1
if
padding_idx
is
None
else
padding_idx
if
padding_idx
>=
0
else
(
weight
.
shape
[
0
]
+
padding_idx
)
if
padding_idx
>=
weight
.
shape
[
0
]
or
padding_idx
<
-
weight
.
shape
[
0
]:
raise
ValueError
(
"padding_idx must be within [-{}, {})"
.
format
(
weight
.
shape
[
0
],
weight
.
shape
[
0
]))
helper
.
append_op
(
type
=
'lookup_table_v2'
,
inputs
=
{
'Ids'
:
x
,
...
...
python/paddle/nn/layer/common.py
浏览文件 @
5dec254b
...
...
@@ -1564,22 +1564,18 @@ class CosineSimilarity(layers.Layer):
class
Embedding
(
layers
.
Layer
):
"""
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding
**Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`
input
` .
This layer is used to lookup embeddings vector of ids provided by :attr:`
x
` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`
size` (vocab_size, emb_size) and :attr:`dtype`
.
input :attr:`
num_embeddings` and attr:`embedding_dim`
.
The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape.
**Note:** The id in :attr:`
input` must satisfy :math:`0 =< id < size[0]
` ,
**Note:** The id in :attr:`
x` must satisfy :math:`0 =< id < num_embeddings
` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
...
...
@@ -1607,7 +1603,7 @@ class Embedding(layers.Layer):
num_embeddings (int): Just one element which indicate the size
of the dictionary of embeddings.
embedding_dim: Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None): padding_idx needs to be in the interval [-
vocab_size, vocab_size
).
padding_idx(int|long|None): padding_idx needs to be in the interval [-
num_embeddings, num_embeddings
).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
...
...
@@ -1618,13 +1614,13 @@ class Embedding(layers.Layer):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these case,
is_
sparse must be False. Default: False.
In these case, sparse must be False. Default: False.
weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_
fluid_
ParamAttr` . In addition,
default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`
size` . Then :ref:`api_fluid
_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example
2
for details.
vector should be consistent with :attr:`
num_embeddings` . Then :ref:`api
_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example for details.
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
...
...
@@ -1639,20 +1635,34 @@ class Embedding(layers.Layer):
.. code-block:: python
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
import paddle
import numpy as np
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)
embedding = paddle.nn.Embedding(10, 3, sparse=True)
w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32)
embedding.weight.set_value(w0)
# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad()
# weight.shape = [10, 3]
# x.data = [[3],[4],[5]]
# x.shape = [3, 1]
# out.data = [[2,2,2], [2,2,2], [2,2,2]]
# out.shape = [3, 1, 3]
out=embedding(x)
out.backward()
adam.step()
emb = nn.Embedding(
dict_size,
32,
sparse=False)
"""
def
__init__
(
self
,
...
...
@@ -1669,13 +1679,24 @@ class Embedding(layers.Layer):
self
.
_is_distributed
=
False
self
.
_padding_idx
=
-
1
if
padding_idx
is
None
else
padding_idx
if
padding_idx
>=
0
else
(
num_embeddings
+
padding_idx
)
if
self
.
_num_embeddings
<=
0
:
raise
ValueError
(
"num_embeddings must be gather than 0"
)
if
self
.
_embedding_dim
<=
0
:
raise
ValueError
(
"embedding_dim must be gather than 0"
)
if
self
.
_padding_idx
>=
num_embeddings
or
self
.
_padding_idx
<
-
num_embeddings
:
raise
ValueError
(
"padding_idx must be within [-{}, {})"
.
format
(
num_embeddings
,
num_embeddings
))
self
.
_dtype
=
self
.
_helper
.
get_default_dtype
()
self
.
_size
=
[
self
.
_num_embeddings
,
self
.
_embedding_dim
]
self
.
_weight_attr
=
weight_attr
self
.
_remote_prefetch
=
False
self
.
_name
=
name
self
.
_
weight
=
self
.
create_parameter
(
self
.
weight
=
self
.
create_parameter
(
attr
=
self
.
_weight_attr
,
shape
=
self
.
_size
,
dtype
=
self
.
_dtype
,
...
...
@@ -1684,7 +1705,7 @@ class Embedding(layers.Layer):
def
forward
(
self
,
x
):
return
F
.
embedding
(
x
,
weight
=
self
.
_
weight
,
weight
=
self
.
weight
,
padding_idx
=
self
.
_padding_idx
,
sparse
=
self
.
_sparse
,
name
=
self
.
_name
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录