未验证 提交 6e356c68 编写于 作者: W wangzhen38 提交者: GitHub

[remove fluid] fluid dygraph Embedding (#48806)

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding
上级 c40122d9
......@@ -83,9 +83,10 @@ def save_dygraph(state_dict, model_path):
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......
......@@ -170,10 +170,11 @@ class PiecewiseDecay(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10] )
emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
parameter_list = emb.parameters() )
......@@ -240,9 +241,10 @@ class NaturalExpDecay(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
......@@ -403,9 +405,10 @@ class InverseTimeDecay(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr,
......@@ -487,11 +490,12 @@ class PolynomialDecay(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
start_lr = 0.01
total_step = 5000
end_lr = 0
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10])
emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.PolynomialDecay(
start_lr, total_step, end_lr, power=1.0),
......@@ -639,10 +643,11 @@ class NoamDecay(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
warmup_steps = 100
learning_rate = 0.01
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.NoamDecay(
1/(warmup_steps *(learning_rate ** 2)),
......
......@@ -51,7 +51,6 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [
'BatchNorm',
'Embedding',
]
......@@ -360,187 +359,6 @@ class BatchNorm(layers.Layer):
return self._helper.append_activation(batch_norm_out, self._act)
class Embedding(layers.Layer):
r"""
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding
**Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor. padding_idx = -1
input.data = [[1, 3], [2, 4], [4, 127]
input.shape = [3, 2]
Given size = [128, 16]
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Parameters:
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
in multi-machine distributed CPU training. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details.
dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor.
It must be "float32" or "float64". Default: "float32".
Attribute:
**weight** (Parameter): the learnable weights of this layer.
Returns:
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.dygraph.base as base
import numpy as np
# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding(
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
static_rlt3.shape # [2, 3, 32]
# example 2: load custom or pre-trained word vectors
weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format
w_param_attrs = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
trainable=True)
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding(
size=[128, 100],
param_attr= w_param_attrs,
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
"""
def __init__(
self,
size,
is_sparse=False,
is_distributed=False,
padding_idx=None,
param_attr=None,
dtype='float32',
):
super().__init__()
self._size = size
self._is_sparse = is_sparse
self._is_distributed = is_distributed
self._padding_idx = (
-1
if padding_idx is None
else padding_idx
if padding_idx >= 0
else (size[0] + padding_idx)
)
self._param_attr = param_attr
self._dtype = dtype
self._remote_prefetch = self._is_sparse and (not self._is_distributed)
if self._remote_prefetch:
assert self._is_sparse is True and self._is_distributed is False
self.weight = self.create_parameter(
attr=self._param_attr,
shape=self._size,
dtype=self._dtype,
is_bias=False,
)
def forward(self, input):
if _non_static_mode():
return _legacy_C_ops.lookup_table_v2(
self.weight,
input,
'is_sparse',
self._is_sparse,
'is_distributed',
self._is_distributed,
'remote_prefetch',
self._remote_prefetch,
'padding_idx',
self._padding_idx,
)
check_variable_and_dtype(
input,
'input',
['uint8', 'int8', 'int16', 'int32', 'int64'],
'Embedding',
)
attrs = {
'is_sparse': self._is_sparse,
'is_distributed': self._is_distributed,
'remote_prefetch': self._remote_prefetch,
'padding_idx': self._padding_idx,
}
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type='lookup_table_v2',
inputs={'Ids': input, 'W': self.weight},
outputs={'Out': out},
attrs=attrs,
)
return out
class RowConv(layers.Layer):
"""
***Row-convolution operator***
......
......@@ -723,10 +723,6 @@ class DataParallel(layers.Layer):
def check_layer_sparse(sublayer):
if isinstance(sublayer, paddle.nn.layer.common.Embedding):
return sublayer._sparse
# NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding
# is removed in the future, the check will also be removed here.
if isinstance(sublayer, paddle.fluid.dygraph.Embedding):
return sublayer._is_sparse
return False
is_sparse_gradient = [
......@@ -875,8 +871,8 @@ class DataParallel(layers.Layer):
dist.init_parallel_env()
emb = fluid.dygraph.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb)
emb = paddle.nn.Embedding(10, 10)
emb = paddle.fluid.dygraph.DataParallel(emb)
state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams")
......@@ -910,7 +906,7 @@ class DataParallel(layers.Layer):
dist.init_parallel_env()
emb = paddle.nn.Embedding(10, 10)
emb = fluid.dygraph.DataParallel(emb)
emb = paddle.fluid.dygraph.DataParallel(emb)
state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams")
......
......@@ -1660,10 +1660,11 @@ class Variable(metaclass=VariableMetaClass):
# example2: return tuple of ndarray
with fluid.dygraph.guard():
embedding = fluid.dygraph.Embedding(
size=[20, 32],
param_attr='emb.w',
is_sparse=True)
embedding = paddle.nn.Embedding(
20,
32,
weight_attr='emb.w',
sparse=True)
x_data = np.arange(12).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, 3, 1))
x = fluid.dygraph.base.to_variable(x_data)
......
......@@ -214,9 +214,10 @@ class Optimizer:
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
adam = fluid.optimizer.Adam(0.001, parameter_list=emb.parameters())
state_dict = adam.state_dict()
......@@ -582,7 +583,7 @@ class Optimizer:
# example1: LearningRateDecay is not used, return value is all the same
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters())
lr = adam.current_step_lr()
print(lr) # 0.001
......
......@@ -39,7 +39,6 @@ from paddle.distributed.fleet.meta_parallel.parallel_layers.pp_layers import (
from paddle.distributed.sharding.group_sharded import group_sharded_parallel
from paddle.distributed.utils.log_utils import get_logger
from paddle.fluid.dataloader.dataset import IterableDataset
from paddle.fluid.dygraph.nn import Embedding
from paddle.incubate.distributed.utils.io import save_for_auto_inference
from paddle.nn import Linear
......@@ -131,7 +130,7 @@ class MLP(fluid.Layer):
bias_attr=None,
):
super(MLP, self).__init__()
self.embedding = Embedding((embedding_size, linear_size))
self.embedding = paddle.nn.Embedding(embedding_size, linear_size)
self._linear1 = Linear(linear_size, linear_size)
self._linear2 = Linear(linear_size, linear_size)
self._linear3 = Linear(linear_size, 10)
......
......@@ -18,7 +18,6 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid.dygraph.nn import Embedding
paddle.seed(123)
np.random.seed(2021)
......@@ -29,10 +28,10 @@ class SimpleNet(fluid.Layer):
super().__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embedding = Embedding(
size=[self.vocab_size, self.hidden_size],
dtype='float32',
is_sparse=is_sparse,
self.embedding = paddle.nn.Embedding(
self.vocab_size,
self.hidden_size,
sparse=is_sparse,
)
self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size)
......
......@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, to_variable
from paddle.fluid.dygraph import Layer, to_variable
from paddle.optimizer.lr import NoamDecay
"""
......@@ -513,11 +513,11 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_emb_dim = src_emb_dim
self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate
self._input_emb = Embedding(
size=[src_vocab_size, src_emb_dim],
is_sparse=is_sparse,
padding_idx=0,
param_attr=fluid.ParamAttr(
self._input_emb = paddle.nn.Embedding(
src_vocab_size,
src_emb_dim,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
),
......@@ -527,10 +527,11 @@ class PrepareEncoderDecoderLayer(Layer):
pos_inp = pos_inp1
else:
pos_inp = pos_inp2
self._pos_emb = Embedding(
size=[self._src_max_len, src_emb_dim],
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
self._pos_emb = paddle.nn.Embedding(
self._src_max_len,
src_emb_dim,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
trainable=False,
......
......@@ -16,7 +16,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, Layer
from paddle.fluid.dygraph import Layer
from paddle.jit.api import declarative
from paddle.nn import Linear
......@@ -208,29 +208,29 @@ class BertModelLayer(Layer):
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range']
)
self._src_emb = Embedding(
size=[self._voc_size, self._emb_size],
param_attr=fluid.ParamAttr(
paddle.set_default_dtype(self._dtype)
self._src_emb = paddle.nn.Embedding(
self._voc_size,
self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer
),
dtype=self._dtype,
)
self._pos_emb = Embedding(
size=[self._max_position_seq_len, self._emb_size],
param_attr=fluid.ParamAttr(
self._pos_emb = paddle.nn.Embedding(
self._max_position_seq_len,
self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer
),
dtype=self._dtype,
)
self._sent_emb = Embedding(
size=[self._sent_types, self._emb_size],
param_attr=fluid.ParamAttr(
self._sent_emb = paddle.nn.Embedding(
self._sent_types,
self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer
),
dtype=self._dtype,
)
self.pooled_fc = Linear(
......
......@@ -21,8 +21,8 @@ import paddle.fluid as fluid
from paddle.fluid import ParamAttr, layers
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit.api import declarative
from paddle.nn import Embedding
INF = 1.0 * 1e5
alpha = 0.6
......@@ -122,16 +122,18 @@ class BaseModel(fluid.dygraph.Layer):
forget_bias = 1.0
self.src_embeder = Embedding(
size=[self.src_vocab_size, self.hidden_size],
param_attr=fluid.ParamAttr(
self.src_vocab_size,
self.hidden_size,
weight_attr=fluid.ParamAttr(
initializer=uniform_initializer(init_scale)
),
)
self.tar_embeder = Embedding(
size=[self.tar_vocab_size, self.hidden_size],
is_sparse=False,
param_attr=fluid.ParamAttr(
self.tar_vocab_size,
self.hidden_size,
sparse=False,
weight_attr=fluid.ParamAttr(
initializer=uniform_initializer(init_scale)
),
)
......@@ -545,17 +547,19 @@ class AttentionModel(fluid.dygraph.Layer):
forget_bias = 1.0
self.src_embeder = Embedding(
size=[self.src_vocab_size, self.hidden_size],
param_attr=fluid.ParamAttr(
self.src_vocab_size,
self.hidden_size,
weight_attr=fluid.ParamAttr(
name='source_embedding',
initializer=uniform_initializer(init_scale),
),
)
self.tar_embeder = Embedding(
size=[self.tar_vocab_size, self.hidden_size],
is_sparse=False,
param_attr=fluid.ParamAttr(
self.tar_vocab_size,
self.hidden_size,
sparse=False,
weight_attr=fluid.ParamAttr(
name='target_embedding',
initializer=uniform_initializer(init_scale),
),
......
......@@ -17,7 +17,7 @@ from functools import reduce
import paddle
import paddle.fluid as fluid
import paddle.fluid.param_attr as attr
from paddle.fluid.dygraph import Embedding, Layer
from paddle.fluid.dygraph import Layer
from paddle.jit.api import declarative
from paddle.static import Variable
......@@ -42,11 +42,12 @@ class EmbeddingLayer:
"""
# TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it.
emb = Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
emb = paddle.nn.Embedding(
self.dict_size,
self.emb_dim,
sparse=True,
padding_idx=self.padding_idx,
param_attr=attr.ParamAttr(
weight_attr=attr.ParamAttr(
name=self.name, initializer=fluid.initializer.Xavier()
),
)
......
......@@ -38,11 +38,12 @@ class EmbeddingLayer:
"""
# TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it.
emb = paddle.fluid.dygraph.Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
emb = paddle.nn.Embedding(
self.dict_size,
self.emb_dim,
sparse=True,
padding_idx=self.padding_idx,
param_attr=paddle.ParamAttr(
weight_attr=paddle.ParamAttr(
name=self.name,
initializer=paddle.nn.initializer.XavierUniform(),
),
......
......@@ -25,7 +25,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import paddle
import paddle.fluid as fluid
from paddle import _legacy_C_ops
from paddle.fluid.dygraph import Embedding, to_variable
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.framework import _non_static_mode
from paddle.jit import ProgramTranslator
......@@ -371,10 +371,10 @@ class LexNet(fluid.dygraph.Layer):
self.bigru_num = args.bigru_num
self.init_bound = 0.1
self.word_embedding = Embedding(
size=[self.vocab_size, self.word_emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(
self.word_embedding = paddle.nn.Embedding(
self.vocab_size,
self.word_emb_dim,
weight_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
......
......@@ -21,7 +21,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.optimizer import SGDOptimizer
from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative
......@@ -156,11 +155,11 @@ class PtbModel(fluid.Layer):
init_scale=init_scale,
dropout=dropout,
)
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=fluid.ParamAttr(
self.embedding = paddle.nn.Embedding(
vocab_size,
hidden_size,
sparse=False,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......
......@@ -150,11 +150,11 @@ class PtbModel(paddle.nn.Layer):
init_scale=init_scale,
dropout=dropout,
)
self.embedding = paddle.fluid.dygraph.nn.Embedding(
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=paddle.ParamAttr(
self.embedding = paddle.nn.Embedding(
vocab_size,
hidden_size,
sparse=False,
weight_attr=paddle.ParamAttr(
name='embedding_para',
initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale
......
......@@ -20,10 +20,9 @@ from test_lac import DynamicGRU
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative
from paddle.nn import Linear
from paddle.nn import Embedding, Linear
SEED = 2020
program_translator = ProgramTranslator()
......@@ -73,9 +72,9 @@ class CNN(fluid.dygraph.Layer):
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False,
self.dict_dim + 1,
self.emb_dim,
sparse=False,
)
self._simple_conv_pool_1 = SimpleConvPool(
self.channels,
......@@ -124,9 +123,9 @@ class BOW(fluid.dygraph.Layer):
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False,
self.dict_dim + 1,
self.emb_dim,
sparse=False,
)
self._fc1 = Linear(self.hid_dim, self.hid_dim)
self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
......@@ -167,10 +166,10 @@ class GRU(fluid.dygraph.Layer):
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False,
self.dict_dim + 1,
self.emb_dim,
weight_attr=fluid.ParamAttr(learning_rate=30),
sparse=False,
)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
......@@ -213,10 +212,10 @@ class BiGRU(fluid.dygraph.Layer):
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False,
self.dict_dim + 1,
self.emb_dim,
weight_attr=fluid.ParamAttr(learning_rate=30),
sparse=False,
)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
......
......@@ -20,9 +20,9 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative
from paddle.nn import Embedding
def fake_text():
......@@ -227,9 +227,9 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size = embedding_size
self.embedding = Embedding(
size=[self.vocab_size, self.embedding_size],
dtype='float32',
param_attr=fluid.ParamAttr(
self.vocab_size,
self.embedding_size,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-0.5 / self.embedding_size,
......@@ -239,9 +239,9 @@ class SkipGram(fluid.dygraph.Layer):
)
self.embedding_out = Embedding(
size=[self.vocab_size, self.embedding_size],
dtype='float32',
param_attr=fluid.ParamAttr(
self.vocab_size,
self.embedding_size,
weight_attr=fluid.ParamAttr(
name='embedding_out_para',
initializer=fluid.initializer.UniformInitializer(
low=-0.5 / self.embedding_size,
......
......@@ -18,7 +18,7 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, to_variable
from paddle.fluid.dygraph import Layer, to_variable
from paddle.fluid.layers.utils import map_structure
from paddle.jit.api import dygraph_to_static_func
from paddle.nn import Linear
......@@ -276,10 +276,10 @@ class Encoder(Layer):
class Embedder(Layer):
def __init__(self, vocab_size, emb_dim, bos_idx=0):
super().__init__()
self.word_embedder = Embedding(
size=[vocab_size, emb_dim],
padding_idx=bos_idx,
param_attr=fluid.ParamAttr(
self.word_embedder = paddle.nn.Embedding(
vocab_size,
emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5)
),
)
......@@ -311,9 +311,10 @@ class WrapEncoder(Layer):
self.emb_dropout = prepostprocess_dropout
self.emb_dim = d_model
self.word_embedder = word_embedder
self.pos_encoder = Embedding(
size=[max_length, self.emb_dim],
param_attr=fluid.ParamAttr(
self.pos_encoder = paddle.nn.Embedding(
max_length,
self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
position_encoding_init(max_length, self.emb_dim)
),
......@@ -499,9 +500,10 @@ class WrapDecoder(Layer):
self.emb_dropout = prepostprocess_dropout
self.emb_dim = d_model
self.word_embedder = word_embedder
self.pos_encoder = Embedding(
size=[max_length, self.emb_dim],
param_attr=fluid.ParamAttr(
self.pos_encoder = paddle.nn.Embedding(
max_length,
self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
position_encoding_init(max_length, self.emb_dim)
),
......
......@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.nn import Embedding
class SimpleNet(fluid.Layer):
......@@ -37,10 +37,10 @@ class SimpleNet(fluid.Layer):
self.init_scale = init_scale
self.num_steps = num_steps
self.embedding = Embedding(
size=[self.vocab_size, self.hidden_size],
dtype=dtype,
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
self.vocab_size,
self.hidden_size,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
)
......
......@@ -19,6 +19,7 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Embedding
from paddle.tensor import random
......@@ -122,8 +123,8 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer):
def __init__(self, input_size, vocab_size, size, dtype="float32"):
super().__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.embed0 = Embedding(vocab_size, size)
self.embed1 = Embedding(vocab_size, size)
self.linear_0 = paddle.nn.Linear(input_size, size)
self.linear_1 = paddle.nn.Linear(input_size, size)
......@@ -144,8 +145,8 @@ class MyLayer(fluid.Layer):
class MyLayer2(fluid.Layer):
def __init__(self, input_size, vocab_size, size, dtype="float32"):
super().__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.embed0 = Embedding(vocab_size, size)
self.embed1 = Embedding(vocab_size, size)
self.linear_0 = paddle.nn.Linear(input_size, size)
self.linear_1 = paddle.nn.Linear(input_size, size)
......
......@@ -21,7 +21,7 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.nn import BatchNorm, Embedding
from paddle.fluid.dygraph.nn import BatchNorm
from paddle.nn import Linear
......@@ -206,8 +206,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
self.batch_norm_1 = BatchNorm(10)
self.batch_norm_2 = BatchNorm(10)
self.emb1 = Embedding([1000, 100])
self.emb2 = Embedding([2000, 200])
self.emb1 = paddle.nn.Embedding(1000, 100)
self.emb2 = paddle.nn.Embedding(2000, 200)
self.layer_norm_1 = paddle.nn.LayerNorm([10])
self.layer_norm_2 = paddle.nn.LayerNorm(10)
......
......@@ -22,7 +22,6 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer
......@@ -42,11 +41,12 @@ class SimpleNet(fluid.Layer):
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_steps = num_steps
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype=dtype,
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
paddle.set_default_dtype(dtype)
self.embedding = paddle.nn.Embedding(
vocab_size,
hidden_size,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......
......@@ -101,7 +101,7 @@ class TestImperativeNamedParameters(unittest.TestCase):
self.linear1 = paddle.nn.Linear(10, 10)
self.linear2 = paddle.nn.Linear(5, 5)
self.conv2d = paddle.nn.Conv2D(3, 2, 3)
self.embedding = fluid.dygraph.Embedding(size=[128, 16])
self.embedding = paddle.nn.Embedding(128, 16)
self.h_0 = fluid.dygraph.to_variable(
np.zeros([10, 10]).astype('float32')
)
......
......@@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import BatchNorm, Embedding
from paddle.fluid.dygraph.nn import BatchNorm
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Linear
......@@ -371,8 +371,8 @@ class OCRAttention(fluid.dygraph.Layer):
Config.decoder_size,
bias_attr=False,
)
self.embedding = Embedding(
[Config.num_classes + 2, Config.word_vector_dim], dtype='float32'
self.embedding = paddle.nn.Embedding(
Config.num_classes + 2, Config.word_vector_dim
)
self.gru_decoder_with_attention = GRUDecoderWithAttention(
Config.decoder_size, Config.num_classes
......
......@@ -23,9 +23,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer
from paddle.nn import Embedding
class SimpleLSTMRNN(fluid.Layer):
......@@ -172,10 +172,10 @@ class PtbModel(fluid.Layer):
dropout=dropout,
)
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
vocab_size,
hidden_size,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......
......@@ -22,9 +22,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import Adam
from paddle.nn import Embedding
class SimpleLSTMRNN(fluid.Layer):
......@@ -167,10 +167,10 @@ class PtbModel(fluid.Layer):
dropout=dropout,
)
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=fluid.ParamAttr(
vocab_size,
hidden_size,
sparse=False,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......@@ -991,7 +991,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_testOnlyLoadParams(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
......@@ -1011,7 +1011,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_load_compatible_with_keep_name_table(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
......
......@@ -23,8 +23,8 @@ import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Embedding
from paddle.optimizer import Adam
......@@ -168,10 +168,10 @@ class PtbModel(fluid.Layer):
dropout=dropout,
)
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=fluid.ParamAttr(
vocab_size,
hidden_size,
sparse=False,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......@@ -1015,7 +1015,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_testOnlyLoadParams(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
paddle.save(
state_dict,
......@@ -1028,7 +1028,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_no_state_in_input_dict(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
paddle.save(
state_dict,
......@@ -1044,7 +1044,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_state_shape_mismatch(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
paddle.save(
state_dict,
......
......@@ -27,11 +27,11 @@ from paddle.fluid.optimizer import SGDOptimizer
class SimpleNet(paddle.nn.Layer):
def __init__(self, vocab_size, hidden_size, dtype):
super().__init__()
self.emb = fluid.dygraph.Embedding(
size=[vocab_size, hidden_size],
dtype=dtype,
param_attr='emb.w',
is_sparse=True,
self.emb = paddle.nn.Embedding(
vocab_size,
hidden_size,
weight_attr='emb.w',
sparse=True,
)
def forward(self, input):
......
......@@ -22,9 +22,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer
from paddle.nn import Embedding
class SimpleNet(fluid.Layer):
......@@ -42,11 +42,12 @@ class SimpleNet(fluid.Layer):
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_steps = num_steps
paddle.set_default_dtype(dtype)
self.embedding = Embedding(
size=[vocab_size, hidden_size],
dtype=dtype,
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
vocab_size,
hidden_size,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
......
......@@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid import Embedding, Layer, core
from paddle.fluid import Layer, core
from paddle.fluid.dygraph import guard, to_variable
from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
from paddle.nn import Linear
......@@ -664,11 +664,11 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_emb_dim = src_emb_dim
self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate
self._input_emb = Embedding(
size=[src_vocab_size, src_emb_dim],
is_sparse=is_sparse,
padding_idx=0,
param_attr=fluid.ParamAttr(
self._input_emb = paddle.nn.Embedding(
src_vocab_size,
src_emb_dim,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
),
......@@ -678,10 +678,11 @@ class PrepareEncoderDecoderLayer(Layer):
pos_inp = pos_inp1
else:
pos_inp = pos_inp2
self._pos_emb = Embedding(
size=[self._src_max_len, src_emb_dim],
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
self._pos_emb = paddle.nn.Embedding(
self._src_max_len,
src_emb_dim,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
trainable=False,
......
......@@ -26,7 +26,7 @@ import paddle.fluid.layers as layers
import paddle.fluid.nets as nets
import paddle.nn.functional as F
from paddle.fluid import core
from paddle.fluid.dygraph import base, nn, to_variable
from paddle.fluid.dygraph import base, to_variable
from paddle.fluid.framework import (
Program,
_test_eager_guard,
......@@ -732,8 +732,8 @@ class TestLayer(LayerTest):
)[0]
with self.static_graph():
data_t = layers.data(name='word', shape=[1], dtype='int64')
emb2 = nn.Embedding(
size=[dict_size, 32], param_attr='emb.w', is_sparse=False
emb2 = paddle.nn.Embedding(
dict_size, 32, weight_attr='emb.w', sparse=False
)
emb_rlt = emb2(data_t)
static_rlt2 = self.get_static_graph_result(
......@@ -741,16 +741,17 @@ class TestLayer(LayerTest):
)[0]
with self.dynamic_graph():
with _test_eager_guard():
emb2 = nn.Embedding(
size=[dict_size, 32],
param_attr='eager_emb.w',
is_sparse=False,
emb2 = paddle.nn.Embedding(
dict_size,
32,
weight_attr='eager_emb.w',
sparse=False,
)
dy_eager_rlt = emb2(base.to_variable(inp_word))
dy_eager_rlt_value = dy_eager_rlt.numpy()
emb2 = nn.Embedding(
size=[dict_size, 32], param_attr='emb.w', is_sparse=False
emb2 = paddle.nn.Embedding(
dict_size, 32, weight_attr='emb.w', sparse=False
)
dy_rlt = emb2(base.to_variable(inp_word))
dy_rlt_value = dy_rlt.numpy()
......@@ -767,11 +768,12 @@ class TestLayer(LayerTest):
custom_weight
)
)
emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False)
emb2 = nn.Embedding(
size=[dict_size, 32],
param_attr=weight_attr,
is_sparse=False,
emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False)
emb2 = paddle.nn.Embedding(
dict_size,
32,
weight_attr=weight_attr,
sparse=False,
)
rep1 = emb1(base.to_variable(inp_word))
rep2 = emb2(base.to_variable(inp_word))
......@@ -797,9 +799,9 @@ class TestLayer(LayerTest):
custom_weight
)
)
emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False)
emb2 = nn.Embedding(
size=[dict_size, 32], param_attr=weight_attr, is_sparse=False
emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False)
emb2 = paddle.nn.Embedding(
dict_size, 32, weight_attr=weight_attr, sparse=False
)
rep1 = emb1(base.to_variable(inp_word))
rep2 = emb2(base.to_variable(inp_word))
......
......@@ -689,9 +689,7 @@ class TestBeamSearch(ModuleApiTest):
beam_size=4,
max_step_num=20,
):
embedder = paddle.fluid.dygraph.Embedding(
size=[vocab_size, embed_dim], dtype="float64"
)
embedder = paddle.nn.Embedding(vocab_size, embed_dim)
output_layer = nn.Linear(hidden_size, vocab_size)
cell = nn.LSTMCell(embed_dim, hidden_size)
self.max_step_num = max_step_num
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册