未验证 提交 6e356c68 编写于 作者: W wangzhen38 提交者: GitHub

[remove fluid] fluid dygraph Embedding (#48806)

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding

* [remove fluid] fluid dygraph Embedding
上级 c40122d9
...@@ -83,9 +83,10 @@ def save_dygraph(state_dict, model_path): ...@@ -83,9 +83,10 @@ def save_dygraph(state_dict, model_path):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
......
...@@ -170,10 +170,11 @@ class PiecewiseDecay(LearningRateDecay): ...@@ -170,10 +170,11 @@ class PiecewiseDecay(LearningRateDecay):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
boundaries = [10000, 20000] boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1] values = [1.0, 0.5, 0.1]
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10] ) emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0), learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
parameter_list = emb.parameters() ) parameter_list = emb.parameters() )
...@@ -240,9 +241,10 @@ class NaturalExpDecay(LearningRateDecay): ...@@ -240,9 +241,10 @@ class NaturalExpDecay(LearningRateDecay):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
base_lr = 0.1 base_lr = 0.1
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.NaturalExpDecay( learning_rate=fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr, learning_rate=base_lr,
...@@ -403,9 +405,10 @@ class InverseTimeDecay(LearningRateDecay): ...@@ -403,9 +405,10 @@ class InverseTimeDecay(LearningRateDecay):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
base_lr = 0.1 base_lr = 0.1
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay( learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr, learning_rate=base_lr,
...@@ -487,11 +490,12 @@ class PolynomialDecay(LearningRateDecay): ...@@ -487,11 +490,12 @@ class PolynomialDecay(LearningRateDecay):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
start_lr = 0.01 start_lr = 0.01
total_step = 5000 total_step = 5000
end_lr = 0 end_lr = 0
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10]) emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.PolynomialDecay( learning_rate = fluid.dygraph.PolynomialDecay(
start_lr, total_step, end_lr, power=1.0), start_lr, total_step, end_lr, power=1.0),
...@@ -639,10 +643,11 @@ class NoamDecay(LearningRateDecay): ...@@ -639,10 +643,11 @@ class NoamDecay(LearningRateDecay):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
warmup_steps = 100 warmup_steps = 100
learning_rate = 0.01 learning_rate = 0.01
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.NoamDecay( learning_rate = fluid.dygraph.NoamDecay(
1/(warmup_steps *(learning_rate ** 2)), 1/(warmup_steps *(learning_rate ** 2)),
......
...@@ -51,7 +51,6 @@ from paddle import _C_ops, _legacy_C_ops ...@@ -51,7 +51,6 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [ __all__ = [
'BatchNorm', 'BatchNorm',
'Embedding',
] ]
...@@ -360,187 +359,6 @@ class BatchNorm(layers.Layer): ...@@ -360,187 +359,6 @@ class BatchNorm(layers.Layer):
return self._helper.append_activation(batch_norm_out, self._act) return self._helper.append_activation(batch_norm_out, self._act)
class Embedding(layers.Layer):
r"""
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding
**Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor. padding_idx = -1
input.data = [[1, 3], [2, 4], [4, 127]
input.shape = [3, 2]
Given size = [128, 16]
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Parameters:
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
in multi-machine distributed CPU training. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details.
dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor.
It must be "float32" or "float64". Default: "float32".
Attribute:
**weight** (Parameter): the learnable weights of this layer.
Returns:
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.dygraph.base as base
import numpy as np
# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding(
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
static_rlt3.shape # [2, 3, 32]
# example 2: load custom or pre-trained word vectors
weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format
w_param_attrs = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
trainable=True)
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding(
size=[128, 100],
param_attr= w_param_attrs,
is_sparse=False)
static_rlt3 = emb(base.to_variable(inp_word))
"""
def __init__(
self,
size,
is_sparse=False,
is_distributed=False,
padding_idx=None,
param_attr=None,
dtype='float32',
):
super().__init__()
self._size = size
self._is_sparse = is_sparse
self._is_distributed = is_distributed
self._padding_idx = (
-1
if padding_idx is None
else padding_idx
if padding_idx >= 0
else (size[0] + padding_idx)
)
self._param_attr = param_attr
self._dtype = dtype
self._remote_prefetch = self._is_sparse and (not self._is_distributed)
if self._remote_prefetch:
assert self._is_sparse is True and self._is_distributed is False
self.weight = self.create_parameter(
attr=self._param_attr,
shape=self._size,
dtype=self._dtype,
is_bias=False,
)
def forward(self, input):
if _non_static_mode():
return _legacy_C_ops.lookup_table_v2(
self.weight,
input,
'is_sparse',
self._is_sparse,
'is_distributed',
self._is_distributed,
'remote_prefetch',
self._remote_prefetch,
'padding_idx',
self._padding_idx,
)
check_variable_and_dtype(
input,
'input',
['uint8', 'int8', 'int16', 'int32', 'int64'],
'Embedding',
)
attrs = {
'is_sparse': self._is_sparse,
'is_distributed': self._is_distributed,
'remote_prefetch': self._remote_prefetch,
'padding_idx': self._padding_idx,
}
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type='lookup_table_v2',
inputs={'Ids': input, 'W': self.weight},
outputs={'Out': out},
attrs=attrs,
)
return out
class RowConv(layers.Layer): class RowConv(layers.Layer):
""" """
***Row-convolution operator*** ***Row-convolution operator***
......
...@@ -723,10 +723,6 @@ class DataParallel(layers.Layer): ...@@ -723,10 +723,6 @@ class DataParallel(layers.Layer):
def check_layer_sparse(sublayer): def check_layer_sparse(sublayer):
if isinstance(sublayer, paddle.nn.layer.common.Embedding): if isinstance(sublayer, paddle.nn.layer.common.Embedding):
return sublayer._sparse return sublayer._sparse
# NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding
# is removed in the future, the check will also be removed here.
if isinstance(sublayer, paddle.fluid.dygraph.Embedding):
return sublayer._is_sparse
return False return False
is_sparse_gradient = [ is_sparse_gradient = [
...@@ -875,8 +871,8 @@ class DataParallel(layers.Layer): ...@@ -875,8 +871,8 @@ class DataParallel(layers.Layer):
dist.init_parallel_env() dist.init_parallel_env()
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
emb = fluid.dygraph.DataParallel(emb) emb = paddle.fluid.dygraph.DataParallel(emb)
state_dict = emb.state_dict() state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams") paddle.save(state_dict, "paddle_dy.pdparams")
...@@ -910,7 +906,7 @@ class DataParallel(layers.Layer): ...@@ -910,7 +906,7 @@ class DataParallel(layers.Layer):
dist.init_parallel_env() dist.init_parallel_env()
emb = paddle.nn.Embedding(10, 10) emb = paddle.nn.Embedding(10, 10)
emb = fluid.dygraph.DataParallel(emb) emb = paddle.fluid.dygraph.DataParallel(emb)
state_dict = emb.state_dict() state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams") paddle.save(state_dict, "paddle_dy.pdparams")
......
...@@ -1660,10 +1660,11 @@ class Variable(metaclass=VariableMetaClass): ...@@ -1660,10 +1660,11 @@ class Variable(metaclass=VariableMetaClass):
# example2: return tuple of ndarray # example2: return tuple of ndarray
with fluid.dygraph.guard(): with fluid.dygraph.guard():
embedding = fluid.dygraph.Embedding( embedding = paddle.nn.Embedding(
size=[20, 32], 20,
param_attr='emb.w', 32,
is_sparse=True) weight_attr='emb.w',
sparse=True)
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, 3, 1)) x_data = x_data.reshape((-1, 3, 1))
x = fluid.dygraph.base.to_variable(x_data) x = fluid.dygraph.base.to_variable(x_data)
......
...@@ -214,9 +214,10 @@ class Optimizer: ...@@ -214,9 +214,10 @@ class Optimizer:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
adam = fluid.optimizer.Adam(0.001, parameter_list=emb.parameters()) adam = fluid.optimizer.Adam(0.001, parameter_list=emb.parameters())
state_dict = adam.state_dict() state_dict = adam.state_dict()
...@@ -582,7 +583,7 @@ class Optimizer: ...@@ -582,7 +583,7 @@ class Optimizer:
# example1: LearningRateDecay is not used, return value is all the same # example1: LearningRateDecay is not used, return value is all the same
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters()) adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters())
lr = adam.current_step_lr() lr = adam.current_step_lr()
print(lr) # 0.001 print(lr) # 0.001
......
...@@ -39,7 +39,6 @@ from paddle.distributed.fleet.meta_parallel.parallel_layers.pp_layers import ( ...@@ -39,7 +39,6 @@ from paddle.distributed.fleet.meta_parallel.parallel_layers.pp_layers import (
from paddle.distributed.sharding.group_sharded import group_sharded_parallel from paddle.distributed.sharding.group_sharded import group_sharded_parallel
from paddle.distributed.utils.log_utils import get_logger from paddle.distributed.utils.log_utils import get_logger
from paddle.fluid.dataloader.dataset import IterableDataset from paddle.fluid.dataloader.dataset import IterableDataset
from paddle.fluid.dygraph.nn import Embedding
from paddle.incubate.distributed.utils.io import save_for_auto_inference from paddle.incubate.distributed.utils.io import save_for_auto_inference
from paddle.nn import Linear from paddle.nn import Linear
...@@ -131,7 +130,7 @@ class MLP(fluid.Layer): ...@@ -131,7 +130,7 @@ class MLP(fluid.Layer):
bias_attr=None, bias_attr=None,
): ):
super(MLP, self).__init__() super(MLP, self).__init__()
self.embedding = Embedding((embedding_size, linear_size)) self.embedding = paddle.nn.Embedding(embedding_size, linear_size)
self._linear1 = Linear(linear_size, linear_size) self._linear1 = Linear(linear_size, linear_size)
self._linear2 = Linear(linear_size, linear_size) self._linear2 = Linear(linear_size, linear_size)
self._linear3 = Linear(linear_size, 10) self._linear3 = Linear(linear_size, 10)
......
...@@ -18,7 +18,6 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main ...@@ -18,7 +18,6 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.fluid.dygraph.nn import Embedding
paddle.seed(123) paddle.seed(123)
np.random.seed(2021) np.random.seed(2021)
...@@ -29,10 +28,10 @@ class SimpleNet(fluid.Layer): ...@@ -29,10 +28,10 @@ class SimpleNet(fluid.Layer):
super().__init__() super().__init__()
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.embedding = Embedding( self.embedding = paddle.nn.Embedding(
size=[self.vocab_size, self.hidden_size], self.vocab_size,
dtype='float32', self.hidden_size,
is_sparse=is_sparse, sparse=is_sparse,
) )
self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size) self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size)
......
...@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main ...@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, to_variable from paddle.fluid.dygraph import Layer, to_variable
from paddle.optimizer.lr import NoamDecay from paddle.optimizer.lr import NoamDecay
""" """
...@@ -513,11 +513,11 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -513,11 +513,11 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_emb_dim = src_emb_dim self._src_emb_dim = src_emb_dim
self._src_vocab_size = src_vocab_size self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._input_emb = Embedding( self._input_emb = paddle.nn.Embedding(
size=[src_vocab_size, src_emb_dim], src_vocab_size,
is_sparse=is_sparse, src_emb_dim,
padding_idx=0, sparse=is_sparse,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name=word_emb_param_name, name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
), ),
...@@ -527,10 +527,11 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -527,10 +527,11 @@ class PrepareEncoderDecoderLayer(Layer):
pos_inp = pos_inp1 pos_inp = pos_inp1
else: else:
pos_inp = pos_inp2 pos_inp = pos_inp2
self._pos_emb = Embedding( self._pos_emb = paddle.nn.Embedding(
size=[self._src_max_len, src_emb_dim], self._src_max_len,
is_sparse=is_sparse, src_emb_dim,
param_attr=fluid.ParamAttr( sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=pos_enc_param_name, name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
trainable=False, trainable=False,
......
...@@ -16,7 +16,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer ...@@ -16,7 +16,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, Layer from paddle.fluid.dygraph import Layer
from paddle.jit.api import declarative from paddle.jit.api import declarative
from paddle.nn import Linear from paddle.nn import Linear
...@@ -208,29 +208,29 @@ class BertModelLayer(Layer): ...@@ -208,29 +208,29 @@ class BertModelLayer(Layer):
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range'] scale=config['initializer_range']
) )
paddle.set_default_dtype(self._dtype)
self._src_emb = Embedding( self._src_emb = paddle.nn.Embedding(
size=[self._voc_size, self._emb_size], self._voc_size,
param_attr=fluid.ParamAttr( self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer name=self._word_emb_name, initializer=self._param_initializer
), ),
dtype=self._dtype,
) )
self._pos_emb = Embedding( self._pos_emb = paddle.nn.Embedding(
size=[self._max_position_seq_len, self._emb_size], self._max_position_seq_len,
param_attr=fluid.ParamAttr( self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer name=self._pos_emb_name, initializer=self._param_initializer
), ),
dtype=self._dtype,
) )
self._sent_emb = Embedding( self._sent_emb = paddle.nn.Embedding(
size=[self._sent_types, self._emb_size], self._sent_types,
param_attr=fluid.ParamAttr( self._emb_size,
weight_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer name=self._sent_emb_name, initializer=self._param_initializer
), ),
dtype=self._dtype,
) )
self.pooled_fc = Linear( self.pooled_fc = Linear(
......
...@@ -21,8 +21,8 @@ import paddle.fluid as fluid ...@@ -21,8 +21,8 @@ import paddle.fluid as fluid
from paddle.fluid import ParamAttr, layers from paddle.fluid import ParamAttr, layers
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit.api import declarative from paddle.jit.api import declarative
from paddle.nn import Embedding
INF = 1.0 * 1e5 INF = 1.0 * 1e5
alpha = 0.6 alpha = 0.6
...@@ -122,16 +122,18 @@ class BaseModel(fluid.dygraph.Layer): ...@@ -122,16 +122,18 @@ class BaseModel(fluid.dygraph.Layer):
forget_bias = 1.0 forget_bias = 1.0
self.src_embeder = Embedding( self.src_embeder = Embedding(
size=[self.src_vocab_size, self.hidden_size], self.src_vocab_size,
param_attr=fluid.ParamAttr( self.hidden_size,
weight_attr=fluid.ParamAttr(
initializer=uniform_initializer(init_scale) initializer=uniform_initializer(init_scale)
), ),
) )
self.tar_embeder = Embedding( self.tar_embeder = Embedding(
size=[self.tar_vocab_size, self.hidden_size], self.tar_vocab_size,
is_sparse=False, self.hidden_size,
param_attr=fluid.ParamAttr( sparse=False,
weight_attr=fluid.ParamAttr(
initializer=uniform_initializer(init_scale) initializer=uniform_initializer(init_scale)
), ),
) )
...@@ -545,17 +547,19 @@ class AttentionModel(fluid.dygraph.Layer): ...@@ -545,17 +547,19 @@ class AttentionModel(fluid.dygraph.Layer):
forget_bias = 1.0 forget_bias = 1.0
self.src_embeder = Embedding( self.src_embeder = Embedding(
size=[self.src_vocab_size, self.hidden_size], self.src_vocab_size,
param_attr=fluid.ParamAttr( self.hidden_size,
weight_attr=fluid.ParamAttr(
name='source_embedding', name='source_embedding',
initializer=uniform_initializer(init_scale), initializer=uniform_initializer(init_scale),
), ),
) )
self.tar_embeder = Embedding( self.tar_embeder = Embedding(
size=[self.tar_vocab_size, self.hidden_size], self.tar_vocab_size,
is_sparse=False, self.hidden_size,
param_attr=fluid.ParamAttr( sparse=False,
weight_attr=fluid.ParamAttr(
name='target_embedding', name='target_embedding',
initializer=uniform_initializer(init_scale), initializer=uniform_initializer(init_scale),
), ),
......
...@@ -17,7 +17,7 @@ from functools import reduce ...@@ -17,7 +17,7 @@ from functools import reduce
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.param_attr as attr import paddle.fluid.param_attr as attr
from paddle.fluid.dygraph import Embedding, Layer from paddle.fluid.dygraph import Layer
from paddle.jit.api import declarative from paddle.jit.api import declarative
from paddle.static import Variable from paddle.static import Variable
...@@ -42,11 +42,12 @@ class EmbeddingLayer: ...@@ -42,11 +42,12 @@ class EmbeddingLayer:
""" """
# TODO(huihuangzheng): The original code set the is_sparse=True, but it # TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it. # causes crush in dy2stat. Set it to True after fixing it.
emb = Embedding( emb = paddle.nn.Embedding(
size=[self.dict_size, self.emb_dim], self.dict_size,
is_sparse=True, self.emb_dim,
sparse=True,
padding_idx=self.padding_idx, padding_idx=self.padding_idx,
param_attr=attr.ParamAttr( weight_attr=attr.ParamAttr(
name=self.name, initializer=fluid.initializer.Xavier() name=self.name, initializer=fluid.initializer.Xavier()
), ),
) )
......
...@@ -38,11 +38,12 @@ class EmbeddingLayer: ...@@ -38,11 +38,12 @@ class EmbeddingLayer:
""" """
# TODO(huihuangzheng): The original code set the is_sparse=True, but it # TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it. # causes crush in dy2stat. Set it to True after fixing it.
emb = paddle.fluid.dygraph.Embedding( emb = paddle.nn.Embedding(
size=[self.dict_size, self.emb_dim], self.dict_size,
is_sparse=True, self.emb_dim,
sparse=True,
padding_idx=self.padding_idx, padding_idx=self.padding_idx,
param_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
name=self.name, name=self.name,
initializer=paddle.nn.initializer.XavierUniform(), initializer=paddle.nn.initializer.XavierUniform(),
), ),
......
...@@ -25,7 +25,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "2" ...@@ -25,7 +25,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle import _legacy_C_ops from paddle import _legacy_C_ops
from paddle.fluid.dygraph import Embedding, to_variable from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.framework import _non_static_mode from paddle.fluid.framework import _non_static_mode
from paddle.jit import ProgramTranslator from paddle.jit import ProgramTranslator
...@@ -371,10 +371,10 @@ class LexNet(fluid.dygraph.Layer): ...@@ -371,10 +371,10 @@ class LexNet(fluid.dygraph.Layer):
self.bigru_num = args.bigru_num self.bigru_num = args.bigru_num
self.init_bound = 0.1 self.init_bound = 0.1
self.word_embedding = Embedding( self.word_embedding = paddle.nn.Embedding(
size=[self.vocab_size, self.word_emb_dim], self.vocab_size,
dtype='float32', self.word_emb_dim,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
learning_rate=self.emb_lr, learning_rate=self.emb_lr,
name="word_emb", name="word_emb",
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
......
...@@ -21,7 +21,6 @@ import numpy as np ...@@ -21,7 +21,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.jit import ProgramTranslator from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative from paddle.jit.api import declarative
...@@ -156,11 +155,11 @@ class PtbModel(fluid.Layer): ...@@ -156,11 +155,11 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout, dropout=dropout,
) )
self.embedding = Embedding( self.embedding = paddle.nn.Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype='float32', hidden_size,
is_sparse=False, sparse=False,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
......
...@@ -150,11 +150,11 @@ class PtbModel(paddle.nn.Layer): ...@@ -150,11 +150,11 @@ class PtbModel(paddle.nn.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout, dropout=dropout,
) )
self.embedding = paddle.fluid.dygraph.nn.Embedding( self.embedding = paddle.nn.Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype='float32', hidden_size,
is_sparse=False, sparse=False,
param_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=paddle.nn.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
......
...@@ -20,10 +20,9 @@ from test_lac import DynamicGRU ...@@ -20,10 +20,9 @@ from test_lac import DynamicGRU
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit import ProgramTranslator from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative from paddle.jit.api import declarative
from paddle.nn import Linear from paddle.nn import Embedding, Linear
SEED = 2020 SEED = 2020
program_translator = ProgramTranslator() program_translator = ProgramTranslator()
...@@ -73,9 +72,9 @@ class CNN(fluid.dygraph.Layer): ...@@ -73,9 +72,9 @@ class CNN(fluid.dygraph.Layer):
self.batch_size = batch_size self.batch_size = batch_size
self.seq_len = seq_len self.seq_len = seq_len
self.embedding = Embedding( self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim], self.dict_dim + 1,
dtype='float32', self.emb_dim,
is_sparse=False, sparse=False,
) )
self._simple_conv_pool_1 = SimpleConvPool( self._simple_conv_pool_1 = SimpleConvPool(
self.channels, self.channels,
...@@ -124,9 +123,9 @@ class BOW(fluid.dygraph.Layer): ...@@ -124,9 +123,9 @@ class BOW(fluid.dygraph.Layer):
self.batch_size = batch_size self.batch_size = batch_size
self.seq_len = seq_len self.seq_len = seq_len
self.embedding = Embedding( self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim], self.dict_dim + 1,
dtype='float32', self.emb_dim,
is_sparse=False, sparse=False,
) )
self._fc1 = Linear(self.hid_dim, self.hid_dim) self._fc1 = Linear(self.hid_dim, self.hid_dim)
self._fc2 = Linear(self.hid_dim, self.fc_hid_dim) self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
...@@ -167,10 +166,10 @@ class GRU(fluid.dygraph.Layer): ...@@ -167,10 +166,10 @@ class GRU(fluid.dygraph.Layer):
self.batch_size = batch_size self.batch_size = batch_size
self.seq_len = seq_len self.seq_len = seq_len
self.embedding = Embedding( self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim], self.dict_dim + 1,
dtype='float32', self.emb_dim,
param_attr=fluid.ParamAttr(learning_rate=30), weight_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False, sparse=False,
) )
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0) h_0 = to_variable(h_0)
...@@ -213,10 +212,10 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -213,10 +212,10 @@ class BiGRU(fluid.dygraph.Layer):
self.batch_size = batch_size self.batch_size = batch_size
self.seq_len = seq_len self.seq_len = seq_len
self.embedding = Embedding( self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim], self.dict_dim + 1,
dtype='float32', self.emb_dim,
param_attr=fluid.ParamAttr(learning_rate=30), weight_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False, sparse=False,
) )
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0) h_0 = to_variable(h_0)
......
...@@ -20,9 +20,9 @@ import numpy as np ...@@ -20,9 +20,9 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Embedding
from paddle.jit import ProgramTranslator from paddle.jit import ProgramTranslator
from paddle.jit.api import declarative from paddle.jit.api import declarative
from paddle.nn import Embedding
def fake_text(): def fake_text():
...@@ -227,9 +227,9 @@ class SkipGram(fluid.dygraph.Layer): ...@@ -227,9 +227,9 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size = embedding_size self.embedding_size = embedding_size
self.embedding = Embedding( self.embedding = Embedding(
size=[self.vocab_size, self.embedding_size], self.vocab_size,
dtype='float32', self.embedding_size,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-0.5 / self.embedding_size, low=-0.5 / self.embedding_size,
...@@ -239,9 +239,9 @@ class SkipGram(fluid.dygraph.Layer): ...@@ -239,9 +239,9 @@ class SkipGram(fluid.dygraph.Layer):
) )
self.embedding_out = Embedding( self.embedding_out = Embedding(
size=[self.vocab_size, self.embedding_size], self.vocab_size,
dtype='float32', self.embedding_size,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_out_para', name='embedding_out_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-0.5 / self.embedding_size, low=-0.5 / self.embedding_size,
......
...@@ -18,7 +18,7 @@ import paddle ...@@ -18,7 +18,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, to_variable from paddle.fluid.dygraph import Layer, to_variable
from paddle.fluid.layers.utils import map_structure from paddle.fluid.layers.utils import map_structure
from paddle.jit.api import dygraph_to_static_func from paddle.jit.api import dygraph_to_static_func
from paddle.nn import Linear from paddle.nn import Linear
...@@ -276,10 +276,10 @@ class Encoder(Layer): ...@@ -276,10 +276,10 @@ class Encoder(Layer):
class Embedder(Layer): class Embedder(Layer):
def __init__(self, vocab_size, emb_dim, bos_idx=0): def __init__(self, vocab_size, emb_dim, bos_idx=0):
super().__init__() super().__init__()
self.word_embedder = Embedding( self.word_embedder = paddle.nn.Embedding(
size=[vocab_size, emb_dim], vocab_size,
padding_idx=bos_idx, emb_dim,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5)
), ),
) )
...@@ -311,9 +311,10 @@ class WrapEncoder(Layer): ...@@ -311,9 +311,10 @@ class WrapEncoder(Layer):
self.emb_dropout = prepostprocess_dropout self.emb_dropout = prepostprocess_dropout
self.emb_dim = d_model self.emb_dim = d_model
self.word_embedder = word_embedder self.word_embedder = word_embedder
self.pos_encoder = Embedding( self.pos_encoder = paddle.nn.Embedding(
size=[max_length, self.emb_dim], max_length,
param_attr=fluid.ParamAttr( self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
position_encoding_init(max_length, self.emb_dim) position_encoding_init(max_length, self.emb_dim)
), ),
...@@ -499,9 +500,10 @@ class WrapDecoder(Layer): ...@@ -499,9 +500,10 @@ class WrapDecoder(Layer):
self.emb_dropout = prepostprocess_dropout self.emb_dropout = prepostprocess_dropout
self.emb_dim = d_model self.emb_dim = d_model
self.word_embedder = word_embedder self.word_embedder = word_embedder
self.pos_encoder = Embedding( self.pos_encoder = paddle.nn.Embedding(
size=[max_length, self.emb_dim], max_length,
param_attr=fluid.ParamAttr( self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
position_encoding_init(max_length, self.emb_dim) position_encoding_init(max_length, self.emb_dim)
), ),
......
...@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main ...@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding from paddle.nn import Embedding
class SimpleNet(fluid.Layer): class SimpleNet(fluid.Layer):
...@@ -37,10 +37,10 @@ class SimpleNet(fluid.Layer): ...@@ -37,10 +37,10 @@ class SimpleNet(fluid.Layer):
self.init_scale = init_scale self.init_scale = init_scale
self.num_steps = num_steps self.num_steps = num_steps
self.embedding = Embedding( self.embedding = Embedding(
size=[self.vocab_size, self.hidden_size], self.vocab_size,
dtype=dtype, self.hidden_size,
is_sparse=is_sparse, sparse=is_sparse,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
) )
......
...@@ -19,6 +19,7 @@ import numpy as np ...@@ -19,6 +19,7 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Embedding
from paddle.tensor import random from paddle.tensor import random
...@@ -122,8 +123,8 @@ class AutoPruneLayer3(fluid.Layer): ...@@ -122,8 +123,8 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer): class MyLayer(fluid.Layer):
def __init__(self, input_size, vocab_size, size, dtype="float32"): def __init__(self, input_size, vocab_size, size, dtype="float32"):
super().__init__(dtype=dtype) super().__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size)) self.embed0 = Embedding(vocab_size, size)
self.embed1 = fluid.Embedding(size=(vocab_size, size)) self.embed1 = Embedding(vocab_size, size)
self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_0 = paddle.nn.Linear(input_size, size)
self.linear_1 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size)
...@@ -144,8 +145,8 @@ class MyLayer(fluid.Layer): ...@@ -144,8 +145,8 @@ class MyLayer(fluid.Layer):
class MyLayer2(fluid.Layer): class MyLayer2(fluid.Layer):
def __init__(self, input_size, vocab_size, size, dtype="float32"): def __init__(self, input_size, vocab_size, size, dtype="float32"):
super().__init__(dtype=dtype) super().__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size)) self.embed0 = Embedding(vocab_size, size)
self.embed1 = fluid.Embedding(size=(vocab_size, size)) self.embed1 = Embedding(vocab_size, size)
self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_0 = paddle.nn.Linear(input_size, size)
self.linear_1 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size)
......
...@@ -21,7 +21,7 @@ import numpy as np ...@@ -21,7 +21,7 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.dygraph.nn import BatchNorm, Embedding from paddle.fluid.dygraph.nn import BatchNorm
from paddle.nn import Linear from paddle.nn import Linear
...@@ -206,8 +206,8 @@ class TestDygraphLoadStatic(unittest.TestCase): ...@@ -206,8 +206,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
self.batch_norm_1 = BatchNorm(10) self.batch_norm_1 = BatchNorm(10)
self.batch_norm_2 = BatchNorm(10) self.batch_norm_2 = BatchNorm(10)
self.emb1 = Embedding([1000, 100]) self.emb1 = paddle.nn.Embedding(1000, 100)
self.emb2 = Embedding([2000, 200]) self.emb2 = paddle.nn.Embedding(2000, 200)
self.layer_norm_1 = paddle.nn.LayerNorm([10]) self.layer_norm_1 = paddle.nn.LayerNorm([10])
self.layer_norm_2 = paddle.nn.LayerNorm(10) self.layer_norm_2 = paddle.nn.LayerNorm(10)
......
...@@ -22,7 +22,6 @@ import paddle ...@@ -22,7 +22,6 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
...@@ -42,11 +41,12 @@ class SimpleNet(fluid.Layer): ...@@ -42,11 +41,12 @@ class SimpleNet(fluid.Layer):
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.init_scale = init_scale self.init_scale = init_scale
self.num_steps = num_steps self.num_steps = num_steps
self.embedding = Embedding( paddle.set_default_dtype(dtype)
size=[vocab_size, hidden_size], self.embedding = paddle.nn.Embedding(
dtype=dtype, vocab_size,
is_sparse=is_sparse, hidden_size,
param_attr=fluid.ParamAttr( sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
......
...@@ -101,7 +101,7 @@ class TestImperativeNamedParameters(unittest.TestCase): ...@@ -101,7 +101,7 @@ class TestImperativeNamedParameters(unittest.TestCase):
self.linear1 = paddle.nn.Linear(10, 10) self.linear1 = paddle.nn.Linear(10, 10)
self.linear2 = paddle.nn.Linear(5, 5) self.linear2 = paddle.nn.Linear(5, 5)
self.conv2d = paddle.nn.Conv2D(3, 2, 3) self.conv2d = paddle.nn.Conv2D(3, 2, 3)
self.embedding = fluid.dygraph.Embedding(size=[128, 16]) self.embedding = paddle.nn.Embedding(128, 16)
self.h_0 = fluid.dygraph.to_variable( self.h_0 = fluid.dygraph.to_variable(
np.zeros([10, 10]).astype('float32') np.zeros([10, 10]).astype('float32')
) )
......
...@@ -21,7 +21,7 @@ import paddle ...@@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import BatchNorm, Embedding from paddle.fluid.dygraph.nn import BatchNorm
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Linear from paddle.nn import Linear
...@@ -371,8 +371,8 @@ class OCRAttention(fluid.dygraph.Layer): ...@@ -371,8 +371,8 @@ class OCRAttention(fluid.dygraph.Layer):
Config.decoder_size, Config.decoder_size,
bias_attr=False, bias_attr=False,
) )
self.embedding = Embedding( self.embedding = paddle.nn.Embedding(
[Config.num_classes + 2, Config.word_vector_dim], dtype='float32' Config.num_classes + 2, Config.word_vector_dim
) )
self.gru_decoder_with_attention = GRUDecoderWithAttention( self.gru_decoder_with_attention = GRUDecoderWithAttention(
Config.decoder_size, Config.num_classes Config.decoder_size, Config.num_classes
......
...@@ -23,9 +23,9 @@ import paddle.fluid as fluid ...@@ -23,9 +23,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.nn import Embedding
class SimpleLSTMRNN(fluid.Layer): class SimpleLSTMRNN(fluid.Layer):
...@@ -172,10 +172,10 @@ class PtbModel(fluid.Layer): ...@@ -172,10 +172,10 @@ class PtbModel(fluid.Layer):
dropout=dropout, dropout=dropout,
) )
self.embedding = Embedding( self.embedding = Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype='float32', hidden_size,
is_sparse=is_sparse, sparse=is_sparse,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
......
...@@ -22,9 +22,9 @@ import paddle.fluid as fluid ...@@ -22,9 +22,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import Adam from paddle.fluid.optimizer import Adam
from paddle.nn import Embedding
class SimpleLSTMRNN(fluid.Layer): class SimpleLSTMRNN(fluid.Layer):
...@@ -167,10 +167,10 @@ class PtbModel(fluid.Layer): ...@@ -167,10 +167,10 @@ class PtbModel(fluid.Layer):
dropout=dropout, dropout=dropout,
) )
self.embedding = Embedding( self.embedding = Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype='float32', hidden_size,
is_sparse=False, sparse=False,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
...@@ -991,7 +991,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -991,7 +991,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_testOnlyLoadParams(self): def func_testOnlyLoadParams(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
...@@ -1011,7 +1011,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -1011,7 +1011,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_load_compatible_with_keep_name_table(self): def func_test_load_compatible_with_keep_name_table(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
......
...@@ -23,8 +23,8 @@ import paddle.fluid as fluid ...@@ -23,8 +23,8 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Embedding
from paddle.optimizer import Adam from paddle.optimizer import Adam
...@@ -168,10 +168,10 @@ class PtbModel(fluid.Layer): ...@@ -168,10 +168,10 @@ class PtbModel(fluid.Layer):
dropout=dropout, dropout=dropout,
) )
self.embedding = Embedding( self.embedding = Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype='float32', hidden_size,
is_sparse=False, sparse=False,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
...@@ -1015,7 +1015,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -1015,7 +1015,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_testOnlyLoadParams(self): def func_testOnlyLoadParams(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
paddle.save( paddle.save(
state_dict, state_dict,
...@@ -1028,7 +1028,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -1028,7 +1028,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_no_state_in_input_dict(self): def func_test_no_state_in_input_dict(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
paddle.save( paddle.save(
state_dict, state_dict,
...@@ -1044,7 +1044,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -1044,7 +1044,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def func_test_state_shape_mismatch(self): def func_test_state_shape_mismatch(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() state_dict = emb.state_dict()
paddle.save( paddle.save(
state_dict, state_dict,
......
...@@ -27,11 +27,11 @@ from paddle.fluid.optimizer import SGDOptimizer ...@@ -27,11 +27,11 @@ from paddle.fluid.optimizer import SGDOptimizer
class SimpleNet(paddle.nn.Layer): class SimpleNet(paddle.nn.Layer):
def __init__(self, vocab_size, hidden_size, dtype): def __init__(self, vocab_size, hidden_size, dtype):
super().__init__() super().__init__()
self.emb = fluid.dygraph.Embedding( self.emb = paddle.nn.Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype=dtype, hidden_size,
param_attr='emb.w', weight_attr='emb.w',
is_sparse=True, sparse=True,
) )
def forward(self, input): def forward(self, input):
......
...@@ -22,9 +22,9 @@ import paddle.fluid as fluid ...@@ -22,9 +22,9 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.nn import Embedding
class SimpleNet(fluid.Layer): class SimpleNet(fluid.Layer):
...@@ -42,11 +42,12 @@ class SimpleNet(fluid.Layer): ...@@ -42,11 +42,12 @@ class SimpleNet(fluid.Layer):
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.init_scale = init_scale self.init_scale = init_scale
self.num_steps = num_steps self.num_steps = num_steps
paddle.set_default_dtype(dtype)
self.embedding = Embedding( self.embedding = Embedding(
size=[vocab_size, hidden_size], vocab_size,
dtype=dtype, hidden_size,
is_sparse=is_sparse, sparse=is_sparse,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
......
...@@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope ...@@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.fluid import Embedding, Layer, core from paddle.fluid import Layer, core
from paddle.fluid.dygraph import guard, to_variable from paddle.fluid.dygraph import guard, to_variable
from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
from paddle.nn import Linear from paddle.nn import Linear
...@@ -664,11 +664,11 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -664,11 +664,11 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_emb_dim = src_emb_dim self._src_emb_dim = src_emb_dim
self._src_vocab_size = src_vocab_size self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._input_emb = Embedding( self._input_emb = paddle.nn.Embedding(
size=[src_vocab_size, src_emb_dim], src_vocab_size,
is_sparse=is_sparse, src_emb_dim,
padding_idx=0, sparse=is_sparse,
param_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name=word_emb_param_name, name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
), ),
...@@ -678,10 +678,11 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -678,10 +678,11 @@ class PrepareEncoderDecoderLayer(Layer):
pos_inp = pos_inp1 pos_inp = pos_inp1
else: else:
pos_inp = pos_inp2 pos_inp = pos_inp2
self._pos_emb = Embedding( self._pos_emb = paddle.nn.Embedding(
size=[self._src_max_len, src_emb_dim], self._src_max_len,
is_sparse=is_sparse, src_emb_dim,
param_attr=fluid.ParamAttr( sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=pos_enc_param_name, name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
trainable=False, trainable=False,
......
...@@ -26,7 +26,7 @@ import paddle.fluid.layers as layers ...@@ -26,7 +26,7 @@ import paddle.fluid.layers as layers
import paddle.fluid.nets as nets import paddle.fluid.nets as nets
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph import base, nn, to_variable from paddle.fluid.dygraph import base, to_variable
from paddle.fluid.framework import ( from paddle.fluid.framework import (
Program, Program,
_test_eager_guard, _test_eager_guard,
...@@ -732,8 +732,8 @@ class TestLayer(LayerTest): ...@@ -732,8 +732,8 @@ class TestLayer(LayerTest):
)[0] )[0]
with self.static_graph(): with self.static_graph():
data_t = layers.data(name='word', shape=[1], dtype='int64') data_t = layers.data(name='word', shape=[1], dtype='int64')
emb2 = nn.Embedding( emb2 = paddle.nn.Embedding(
size=[dict_size, 32], param_attr='emb.w', is_sparse=False dict_size, 32, weight_attr='emb.w', sparse=False
) )
emb_rlt = emb2(data_t) emb_rlt = emb2(data_t)
static_rlt2 = self.get_static_graph_result( static_rlt2 = self.get_static_graph_result(
...@@ -741,16 +741,17 @@ class TestLayer(LayerTest): ...@@ -741,16 +741,17 @@ class TestLayer(LayerTest):
)[0] )[0]
with self.dynamic_graph(): with self.dynamic_graph():
with _test_eager_guard(): with _test_eager_guard():
emb2 = nn.Embedding( emb2 = paddle.nn.Embedding(
size=[dict_size, 32], dict_size,
param_attr='eager_emb.w', 32,
is_sparse=False, weight_attr='eager_emb.w',
sparse=False,
) )
dy_eager_rlt = emb2(base.to_variable(inp_word)) dy_eager_rlt = emb2(base.to_variable(inp_word))
dy_eager_rlt_value = dy_eager_rlt.numpy() dy_eager_rlt_value = dy_eager_rlt.numpy()
emb2 = nn.Embedding( emb2 = paddle.nn.Embedding(
size=[dict_size, 32], param_attr='emb.w', is_sparse=False dict_size, 32, weight_attr='emb.w', sparse=False
) )
dy_rlt = emb2(base.to_variable(inp_word)) dy_rlt = emb2(base.to_variable(inp_word))
dy_rlt_value = dy_rlt.numpy() dy_rlt_value = dy_rlt.numpy()
...@@ -767,11 +768,12 @@ class TestLayer(LayerTest): ...@@ -767,11 +768,12 @@ class TestLayer(LayerTest):
custom_weight custom_weight
) )
) )
emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False)
emb2 = nn.Embedding( emb2 = paddle.nn.Embedding(
size=[dict_size, 32], dict_size,
param_attr=weight_attr, 32,
is_sparse=False, weight_attr=weight_attr,
sparse=False,
) )
rep1 = emb1(base.to_variable(inp_word)) rep1 = emb1(base.to_variable(inp_word))
rep2 = emb2(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word))
...@@ -797,9 +799,9 @@ class TestLayer(LayerTest): ...@@ -797,9 +799,9 @@ class TestLayer(LayerTest):
custom_weight custom_weight
) )
) )
emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False)
emb2 = nn.Embedding( emb2 = paddle.nn.Embedding(
size=[dict_size, 32], param_attr=weight_attr, is_sparse=False dict_size, 32, weight_attr=weight_attr, sparse=False
) )
rep1 = emb1(base.to_variable(inp_word)) rep1 = emb1(base.to_variable(inp_word))
rep2 = emb2(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word))
......
...@@ -689,9 +689,7 @@ class TestBeamSearch(ModuleApiTest): ...@@ -689,9 +689,7 @@ class TestBeamSearch(ModuleApiTest):
beam_size=4, beam_size=4,
max_step_num=20, max_step_num=20,
): ):
embedder = paddle.fluid.dygraph.Embedding( embedder = paddle.nn.Embedding(vocab_size, embed_dim)
size=[vocab_size, embed_dim], dtype="float64"
)
output_layer = nn.Linear(hidden_size, vocab_size) output_layer = nn.Linear(hidden_size, vocab_size)
cell = nn.LSTMCell(embed_dim, hidden_size) cell = nn.LSTMCell(embed_dim, hidden_size)
self.max_step_num = max_step_num self.max_step_num = max_step_num
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册