diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 9515a8bd1704e5968d376f40caf985c2d068c40e..d3e91295d43a29654f2b21bd11382503adf21818 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -83,9 +83,10 @@ def save_dygraph(state_dict, model_path): .. code-block:: python import paddle.fluid as fluid + import paddle with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph( state_dict, "paddle_dy") diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index 77d4f2c2573f9407f5e3cd4d6ad2e7d2e7135e62..cf794ad4cef899d8899241ad662d3511fe50bd6a 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -170,10 +170,11 @@ class PiecewiseDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle boundaries = [10000, 20000] values = [1.0, 0.5, 0.1] with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( [10, 10] ) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0), parameter_list = emb.parameters() ) @@ -240,9 +241,10 @@ class NaturalExpDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle base_lr = 0.1 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.NaturalExpDecay( learning_rate=base_lr, @@ -403,9 +405,10 @@ class InverseTimeDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle base_lr = 0.1 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.InverseTimeDecay( learning_rate=base_lr, @@ -487,11 +490,12 @@ class PolynomialDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle start_lr = 0.01 total_step = 5000 end_lr = 0 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( [10, 10]) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate = fluid.dygraph.PolynomialDecay( start_lr, total_step, end_lr, power=1.0), @@ -639,10 +643,11 @@ class NoamDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle warmup_steps = 100 learning_rate = 0.01 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate = fluid.dygraph.NoamDecay( 1/(warmup_steps *(learning_rate ** 2)), diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index f0b761fff82905f744acda32144c11ef0ccbcc1e..77436e9293d644a954d966fe71eb47857181f34d 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -51,7 +51,6 @@ from paddle import _C_ops, _legacy_C_ops __all__ = [ 'BatchNorm', - 'Embedding', ] @@ -360,187 +359,6 @@ class BatchNorm(layers.Layer): return self._helper.append_activation(batch_norm_out, self._act) -class Embedding(layers.Layer): - r""" - :alias_main: paddle.nn.Embedding - :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding - :old_api: paddle.fluid.dygraph.Embedding - - **Embedding Layer** - - This interface is used to construct a callable object of the ``Embedding`` class. - For specific usage, refer to code examples. It implements the function of the Embedding Layer. - This layer is used to lookup embeddings vector of ids provided by :attr:`input` . - It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - - The shape of output Tensor is generated by appending an emb_size dimension to the - last dimension of the input Tensor shape. - - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , - otherwise the program will throw an exception and exit. - - .. code-block:: text - - Case 1: - - input is a Tensor. padding_idx = -1 - input.data = [[1, 3], [2, 4], [4, 127] - input.shape = [3, 2] - Given size = [128, 16] - output is a Tensor: - out.shape = [3, 2, 16] - out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654]], - - [[0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365]], - - [[0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]]] # padding data - The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. - - Parameters: - size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size - of the dictionary of embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. This parameter only - affects the performance of the backwards gradient update. It is recommended to set - True because sparse update is faster. But some optimizer does not support sparse update, - such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , - :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , - :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . - In these case, is_sparse must be False. Default: False. - is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used - in multi-machine distributed CPU training. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). - If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup - encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. - If set None, it makes no effect to output. Default: None. - param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, - user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. - The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` - is used to load custom or pre-trained word vectors. See code example 2 for details. - dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor. - It must be "float32" or "float64". Default: "float32". - - Attribute: - **weight** (Parameter): the learnable weights of this layer. - - Returns: - Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - import paddle.fluid.dygraph.base as base - import numpy as np - - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( - size=[dict_size, 32], - param_attr='emb.w', - is_sparse=False) - static_rlt3 = emb(base.to_variable(inp_word)) - static_rlt3.shape # [2, 3, 32] - - # example 2: load custom or pre-trained word vectors - weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format - w_param_attrs = fluid.ParamAttr( - name="emb_weight", - learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(weight_data), - trainable=True) - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( - size=[128, 100], - param_attr= w_param_attrs, - is_sparse=False) - static_rlt3 = emb(base.to_variable(inp_word)) - """ - - def __init__( - self, - size, - is_sparse=False, - is_distributed=False, - padding_idx=None, - param_attr=None, - dtype='float32', - ): - super().__init__() - self._size = size - self._is_sparse = is_sparse - self._is_distributed = is_distributed - self._padding_idx = ( - -1 - if padding_idx is None - else padding_idx - if padding_idx >= 0 - else (size[0] + padding_idx) - ) - - self._param_attr = param_attr - self._dtype = dtype - self._remote_prefetch = self._is_sparse and (not self._is_distributed) - if self._remote_prefetch: - assert self._is_sparse is True and self._is_distributed is False - - self.weight = self.create_parameter( - attr=self._param_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False, - ) - - def forward(self, input): - if _non_static_mode(): - return _legacy_C_ops.lookup_table_v2( - self.weight, - input, - 'is_sparse', - self._is_sparse, - 'is_distributed', - self._is_distributed, - 'remote_prefetch', - self._remote_prefetch, - 'padding_idx', - self._padding_idx, - ) - - check_variable_and_dtype( - input, - 'input', - ['uint8', 'int8', 'int16', 'int32', 'int64'], - 'Embedding', - ) - attrs = { - 'is_sparse': self._is_sparse, - 'is_distributed': self._is_distributed, - 'remote_prefetch': self._remote_prefetch, - 'padding_idx': self._padding_idx, - } - - out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type='lookup_table_v2', - inputs={'Ids': input, 'W': self.weight}, - outputs={'Out': out}, - attrs=attrs, - ) - - return out - - class RowConv(layers.Layer): """ ***Row-convolution operator*** diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index cb030f71a45bc5ef753d7978c58978de13bfa3d6..3432baf442e2adc9d6706ba98e665dbf23623818 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -723,10 +723,6 @@ class DataParallel(layers.Layer): def check_layer_sparse(sublayer): if isinstance(sublayer, paddle.nn.layer.common.Embedding): return sublayer._sparse - # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding - # is removed in the future, the check will also be removed here. - if isinstance(sublayer, paddle.fluid.dygraph.Embedding): - return sublayer._is_sparse return False is_sparse_gradient = [ @@ -875,8 +871,8 @@ class DataParallel(layers.Layer): dist.init_parallel_env() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb) + emb = paddle.nn.Embedding(10, 10) + emb = paddle.fluid.dygraph.DataParallel(emb) state_dict = emb.state_dict() paddle.save(state_dict, "paddle_dy.pdparams") @@ -910,7 +906,7 @@ class DataParallel(layers.Layer): dist.init_parallel_env() emb = paddle.nn.Embedding(10, 10) - emb = fluid.dygraph.DataParallel(emb) + emb = paddle.fluid.dygraph.DataParallel(emb) state_dict = emb.state_dict() paddle.save(state_dict, "paddle_dy.pdparams") diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index da4f609c401ac33783b2e7564c6b291cd74ddb9b..7393c6104f38ed64d96bcf250fe4ffdb50af9665 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1660,10 +1660,11 @@ class Variable(metaclass=VariableMetaClass): # example2: return tuple of ndarray with fluid.dygraph.guard(): - embedding = fluid.dygraph.Embedding( - size=[20, 32], - param_attr='emb.w', - is_sparse=True) + embedding = paddle.nn.Embedding( + 20, + 32, + weight_attr='emb.w', + sparse=True) x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, 3, 1)) x = fluid.dygraph.base.to_variable(x_data) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 71167f30e026a601d3320c65c0b19d33e11e1ccb..42c57193941f30a5b07e94230bc7e389358aa9d6 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -214,9 +214,10 @@ class Optimizer: .. code-block:: python import paddle.fluid as fluid + import paddle with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = fluid.optimizer.Adam(0.001, parameter_list=emb.parameters()) state_dict = adam.state_dict() @@ -582,7 +583,7 @@ class Optimizer: # example1: LearningRateDecay is not used, return value is all the same with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters()) lr = adam.current_step_lr() print(lr) # 0.001 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py index f63cfc089ed8a5d13eb264e883d0fcbd8960b84c..ef85aab80f6c95e864bc2ed7f4b089c9a9f3c564 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py @@ -39,7 +39,6 @@ from paddle.distributed.fleet.meta_parallel.parallel_layers.pp_layers import ( from paddle.distributed.sharding.group_sharded import group_sharded_parallel from paddle.distributed.utils.log_utils import get_logger from paddle.fluid.dataloader.dataset import IterableDataset -from paddle.fluid.dygraph.nn import Embedding from paddle.incubate.distributed.utils.io import save_for_auto_inference from paddle.nn import Linear @@ -131,7 +130,7 @@ class MLP(fluid.Layer): bias_attr=None, ): super(MLP, self).__init__() - self.embedding = Embedding((embedding_size, linear_size)) + self.embedding = paddle.nn.Embedding(embedding_size, linear_size) self._linear1 = Linear(linear_size, linear_size) self._linear2 = Linear(linear_size, linear_size) self._linear3 = Linear(linear_size, 10) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py index 48ec09552d7f98a946eaa32b080be258f0af3ec4..deaf9779d44f60e59bb27e826df9b1608dc668e7 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py @@ -18,7 +18,6 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid.dygraph.nn import Embedding paddle.seed(123) np.random.seed(2021) @@ -29,10 +28,10 @@ class SimpleNet(fluid.Layer): super().__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size - self.embedding = Embedding( - size=[self.vocab_size, self.hidden_size], - dtype='float32', - is_sparse=is_sparse, + self.embedding = paddle.nn.Embedding( + self.vocab_size, + self.hidden_size, + sparse=is_sparse, ) self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index 8d49434ac54e8bdfec82931467032d6d3f24eea7..a8ddeb0bfdbede2aa5ffbc3adc8357b7d5680383 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid.dygraph import Embedding, Layer, to_variable +from paddle.fluid.dygraph import Layer, to_variable from paddle.optimizer.lr import NoamDecay """ @@ -513,11 +513,11 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( + self._input_emb = paddle.nn.Embedding( + src_vocab_size, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=word_emb_param_name, initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), ), @@ -527,10 +527,11 @@ class PrepareEncoderDecoderLayer(Layer): pos_inp = pos_inp1 else: pos_inp = pos_inp2 - self._pos_emb = Embedding( - size=[self._src_max_len, src_emb_dim], - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._src_max_len, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=pos_enc_param_name, initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 43f7f0f6d2b5e1af2b2f886cc499a46b3177ad96..a6e4f09564dfa0150bdeffb8b22eba0b62a63b09 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -16,7 +16,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer import paddle import paddle.fluid as fluid -from paddle.fluid.dygraph import Embedding, Layer +from paddle.fluid.dygraph import Layer from paddle.jit.api import declarative from paddle.nn import Linear @@ -208,29 +208,29 @@ class BertModelLayer(Layer): self._param_initializer = fluid.initializer.TruncatedNormal( scale=config['initializer_range'] ) - - self._src_emb = Embedding( - size=[self._voc_size, self._emb_size], - param_attr=fluid.ParamAttr( + paddle.set_default_dtype(self._dtype) + self._src_emb = paddle.nn.Embedding( + self._voc_size, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) - self._pos_emb = Embedding( - size=[self._max_position_seq_len, self._emb_size], - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._max_position_seq_len, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._pos_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) - self._sent_emb = Embedding( - size=[self._sent_types, self._emb_size], - param_attr=fluid.ParamAttr( + self._sent_emb = paddle.nn.Embedding( + self._sent_types, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._sent_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) self.pooled_fc = Linear( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index bbca449bde67a3fca02457c3a03264944752cb15..eceba1198fa47451f990a59f31ee267fa2d1b556 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -21,8 +21,8 @@ import paddle.fluid as fluid from paddle.fluid import ParamAttr, layers from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.jit.api import declarative +from paddle.nn import Embedding INF = 1.0 * 1e5 alpha = 0.6 @@ -122,16 +122,18 @@ class BaseModel(fluid.dygraph.Layer): forget_bias = 1.0 self.src_embeder = Embedding( - size=[self.src_vocab_size, self.hidden_size], - param_attr=fluid.ParamAttr( + self.src_vocab_size, + self.hidden_size, + weight_attr=fluid.ParamAttr( initializer=uniform_initializer(init_scale) ), ) self.tar_embeder = Embedding( - size=[self.tar_vocab_size, self.hidden_size], - is_sparse=False, - param_attr=fluid.ParamAttr( + self.tar_vocab_size, + self.hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( initializer=uniform_initializer(init_scale) ), ) @@ -545,17 +547,19 @@ class AttentionModel(fluid.dygraph.Layer): forget_bias = 1.0 self.src_embeder = Embedding( - size=[self.src_vocab_size, self.hidden_size], - param_attr=fluid.ParamAttr( + self.src_vocab_size, + self.hidden_size, + weight_attr=fluid.ParamAttr( name='source_embedding', initializer=uniform_initializer(init_scale), ), ) self.tar_embeder = Embedding( - size=[self.tar_vocab_size, self.hidden_size], - is_sparse=False, - param_attr=fluid.ParamAttr( + self.tar_vocab_size, + self.hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='target_embedding', initializer=uniform_initializer(init_scale), ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index d16f07d9a2e343c258706050ba5a9199835d5777..b6baf7ddf2913c7abb1d49f2d54b03f438109396 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -17,7 +17,7 @@ from functools import reduce import paddle import paddle.fluid as fluid import paddle.fluid.param_attr as attr -from paddle.fluid.dygraph import Embedding, Layer +from paddle.fluid.dygraph import Layer from paddle.jit.api import declarative from paddle.static import Variable @@ -42,11 +42,12 @@ class EmbeddingLayer: """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. - emb = Embedding( - size=[self.dict_size, self.emb_dim], - is_sparse=True, + emb = paddle.nn.Embedding( + self.dict_size, + self.emb_dim, + sparse=True, padding_idx=self.padding_idx, - param_attr=attr.ParamAttr( + weight_attr=attr.ParamAttr( name=self.name, initializer=fluid.initializer.Xavier() ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 06f460912b45be6d26a7df716e5860bfeaf76798..99fe330c692410d8a9db25750866a158b169ffd9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -38,11 +38,12 @@ class EmbeddingLayer: """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. - emb = paddle.fluid.dygraph.Embedding( - size=[self.dict_size, self.emb_dim], - is_sparse=True, + emb = paddle.nn.Embedding( + self.dict_size, + self.emb_dim, + sparse=True, padding_idx=self.padding_idx, - param_attr=paddle.ParamAttr( + weight_attr=paddle.ParamAttr( name=self.name, initializer=paddle.nn.initializer.XavierUniform(), ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index 7a5fbbc08424347710a30562dade3f3ebd5f1924..1ec320317d4c54fbab3a21fa62e53f393a74623c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -25,7 +25,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "2" import paddle import paddle.fluid as fluid from paddle import _legacy_C_ops -from paddle.fluid.dygraph import Embedding, to_variable +from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.framework import _non_static_mode from paddle.jit import ProgramTranslator @@ -371,10 +371,10 @@ class LexNet(fluid.dygraph.Layer): self.bigru_num = args.bigru_num self.init_bound = 0.1 - self.word_embedding = Embedding( - size=[self.vocab_size, self.word_emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr( + self.word_embedding = paddle.nn.Embedding( + self.vocab_size, + self.word_emb_dim, + weight_attr=fluid.ParamAttr( learning_rate=self.emb_lr, name="word_emb", initializer=fluid.initializer.Uniform( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index 46970eaa27bb6c7071ecf7c4aafde857c45b58f9..49e7c32d6e3186dade2435bec624c68d24c22917 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -21,7 +21,6 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.optimizer import SGDOptimizer from paddle.jit import ProgramTranslator from paddle.jit.api import declarative @@ -156,11 +155,11 @@ class PtbModel(fluid.Layer): init_scale=init_scale, dropout=dropout, ) - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py index f589c37c2fbfd1fde1b40864bdda8ddb6472f365..60712aeda7aac930ef0950c9ccb9265afdaaa227 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py @@ -150,11 +150,11 @@ class PtbModel(paddle.nn.Layer): init_scale=init_scale, dropout=dropout, ) - self.embedding = paddle.fluid.dygraph.nn.Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=paddle.ParamAttr( + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=False, + weight_attr=paddle.ParamAttr( name='embedding_para', initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py index 12e5099f257fe7e3b031efe1ecd66b0e2483d68c..d9eb993f72070731c7a1612a5282a1464f1ab46a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py @@ -20,10 +20,9 @@ from test_lac import DynamicGRU import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn import Linear +from paddle.nn import Embedding, Linear SEED = 2020 program_translator = ProgramTranslator() @@ -73,9 +72,9 @@ class CNN(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + sparse=False, ) self._simple_conv_pool_1 = SimpleConvPool( self.channels, @@ -124,9 +123,9 @@ class BOW(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + sparse=False, ) self._fc1 = Linear(self.hid_dim, self.hid_dim) self._fc2 = Linear(self.hid_dim, self.fc_hid_dim) @@ -167,10 +166,10 @@ class GRU(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + weight_attr=fluid.ParamAttr(learning_rate=30), + sparse=False, ) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) @@ -213,10 +212,10 @@ class BiGRU(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + weight_attr=fluid.ParamAttr(learning_rate=30), + sparse=False, ) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py index 13f1a9b882fed3beb9860c1339dda363dc73fa7b..e546e26a2304f3ed0f200548ae01e6dac9190f4d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py @@ -20,9 +20,9 @@ import numpy as np import paddle import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Embedding from paddle.jit import ProgramTranslator from paddle.jit.api import declarative +from paddle.nn import Embedding def fake_text(): @@ -227,9 +227,9 @@ class SkipGram(fluid.dygraph.Layer): self.embedding_size = embedding_size self.embedding = Embedding( - size=[self.vocab_size, self.embedding_size], - dtype='float32', - param_attr=fluid.ParamAttr( + self.vocab_size, + self.embedding_size, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / self.embedding_size, @@ -239,9 +239,9 @@ class SkipGram(fluid.dygraph.Layer): ) self.embedding_out = Embedding( - size=[self.vocab_size, self.embedding_size], - dtype='float32', - param_attr=fluid.ParamAttr( + self.vocab_size, + self.embedding_size, + weight_attr=fluid.ParamAttr( name='embedding_out_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / self.embedding_size, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 88cc415b4bbab3662692b7d3b51727146a05a586..f8641dd2ac4f3333b726a2b70338068495fcbfd6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -18,7 +18,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.nn.functional as F -from paddle.fluid.dygraph import Embedding, Layer, to_variable +from paddle.fluid.dygraph import Layer, to_variable from paddle.fluid.layers.utils import map_structure from paddle.jit.api import dygraph_to_static_func from paddle.nn import Linear @@ -276,10 +276,10 @@ class Encoder(Layer): class Embedder(Layer): def __init__(self, vocab_size, emb_dim, bos_idx=0): super().__init__() - self.word_embedder = Embedding( - size=[vocab_size, emb_dim], - padding_idx=bos_idx, - param_attr=fluid.ParamAttr( + self.word_embedder = paddle.nn.Embedding( + vocab_size, + emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) ), ) @@ -311,9 +311,10 @@ class WrapEncoder(Layer): self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder - self.pos_encoder = Embedding( - size=[max_length, self.emb_dim], - param_attr=fluid.ParamAttr( + self.pos_encoder = paddle.nn.Embedding( + max_length, + self.emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim) ), @@ -499,9 +500,10 @@ class WrapDecoder(Layer): self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder - self.pos_encoder = Embedding( - size=[max_length, self.emb_dim], - param_attr=fluid.ParamAttr( + self.pos_encoder = paddle.nn.Embedding( + max_length, + self.emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim) ), diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index e1103c1d595c0c1995b61d489920b9d036b051d3..11aacd02439e99fe132171a2012385eb08ba092a 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding +from paddle.nn import Embedding class SimpleNet(fluid.Layer): @@ -37,10 +37,10 @@ class SimpleNet(fluid.Layer): self.init_scale = init_scale self.num_steps = num_steps self.embedding = Embedding( - size=[self.vocab_size, self.hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self.vocab_size, + self.hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 5a301d3f0a5adc4688ac53da7b25b0bf201c8a98..e4dca329fe7c23fdc1c26a24334e7622841bbb10 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.framework import _test_eager_guard +from paddle.nn import Embedding from paddle.tensor import random @@ -122,8 +123,8 @@ class AutoPruneLayer3(fluid.Layer): class MyLayer(fluid.Layer): def __init__(self, input_size, vocab_size, size, dtype="float32"): super().__init__(dtype=dtype) - self.embed0 = fluid.Embedding(size=(vocab_size, size)) - self.embed1 = fluid.Embedding(size=(vocab_size, size)) + self.embed0 = Embedding(vocab_size, size) + self.embed1 = Embedding(vocab_size, size) self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size) @@ -144,8 +145,8 @@ class MyLayer(fluid.Layer): class MyLayer2(fluid.Layer): def __init__(self, input_size, vocab_size, size, dtype="float32"): super().__init__(dtype=dtype) - self.embed0 = fluid.Embedding(size=(vocab_size, size)) - self.embed1 = fluid.Embedding(size=(vocab_size, size)) + self.embed0 = Embedding(vocab_size, size) + self.embed1 = Embedding(vocab_size, size) self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index f864e2829046b720fa76666e8cf185b7e70f3829..2003e685327b8f03430d5c2f753b413369c64744 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -21,7 +21,7 @@ import numpy as np import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework -from paddle.fluid.dygraph.nn import BatchNorm, Embedding +from paddle.fluid.dygraph.nn import BatchNorm from paddle.nn import Linear @@ -206,8 +206,8 @@ class TestDygraphLoadStatic(unittest.TestCase): self.batch_norm_1 = BatchNorm(10) self.batch_norm_2 = BatchNorm(10) - self.emb1 = Embedding([1000, 100]) - self.emb2 = Embedding([2000, 200]) + self.emb1 = paddle.nn.Embedding(1000, 100) + self.emb2 = paddle.nn.Embedding(2000, 200) self.layer_norm_1 = paddle.nn.LayerNorm([10]) self.layer_norm_2 = paddle.nn.LayerNorm(10) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index fe706a78f8fe69873601986959f6c1d10e737a57..0027cbfa2a9bff3ac86d524d0c5236531365cd24 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -22,7 +22,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer @@ -42,11 +41,12 @@ class SimpleNet(fluid.Layer): self.vocab_size = vocab_size self.init_scale = init_scale self.num_steps = num_steps - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + paddle.set_default_dtype(dtype) + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py index faaa02ea46a5d0a5d11c21661d92fb85bfd04c2a..0984104269c42098d2a0bf2b93d03ec94f3c04ea 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py @@ -101,7 +101,7 @@ class TestImperativeNamedParameters(unittest.TestCase): self.linear1 = paddle.nn.Linear(10, 10) self.linear2 = paddle.nn.Linear(5, 5) self.conv2d = paddle.nn.Conv2D(3, 2, 3) - self.embedding = fluid.dygraph.Embedding(size=[128, 16]) + self.embedding = paddle.nn.Embedding(128, 16) self.h_0 = fluid.dygraph.to_variable( np.zeros([10, 10]).astype('float32') ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 1df0a4148c9c637aa77b3a8c6e587515b66bbb6d..12118beaffe3b9c2f613bd505bd263d350510c1d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -21,7 +21,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import BatchNorm, Embedding +from paddle.fluid.dygraph.nn import BatchNorm from paddle.fluid.framework import _test_eager_guard from paddle.nn import Linear @@ -371,8 +371,8 @@ class OCRAttention(fluid.dygraph.Layer): Config.decoder_size, bias_attr=False, ) - self.embedding = Embedding( - [Config.num_classes + 2, Config.word_vector_dim], dtype='float32' + self.embedding = paddle.nn.Embedding( + Config.num_classes + 2, Config.word_vector_dim ) self.gru_decoder_with_attention = GRUDecoderWithAttention( Config.decoder_size, Config.num_classes diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 2a59dd396f000b9400f09d6f0a16bfeefee03c05..6bbf0a70c2e34739abeebd38003d37c92becad06 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -23,9 +23,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer +from paddle.nn import Embedding class SimpleLSTMRNN(fluid.Layer): @@ -172,10 +172,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 2e30ea41a18cd2a8a10227aee3fa5974309cde2f..4e30f591686dc13557eb6818eb8c958e7ae242b7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -22,9 +22,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import Adam +from paddle.nn import Embedding class SimpleLSTMRNN(fluid.Layer): @@ -167,10 +167,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale @@ -991,7 +991,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_testOnlyLoadParams(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) @@ -1011,7 +1011,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_load_compatible_with_keep_name_table(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 4a3c6c64a6f6e5dd00eab5d302a8cd3c456d2d93..a567a443e44859c8c48dcb6e60ae6045aec27065 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -23,8 +23,8 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard +from paddle.nn import Embedding from paddle.optimizer import Adam @@ -168,10 +168,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale @@ -1015,7 +1015,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_testOnlyLoadParams(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, @@ -1028,7 +1028,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_no_state_in_input_dict(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, @@ -1044,7 +1044,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_state_shape_mismatch(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 72c77e753f54b6bcc1da74c1e8467a6ca5ce2f56..498317b2a33f9f06a34565a296e33dc7951bc840 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -27,11 +27,11 @@ from paddle.fluid.optimizer import SGDOptimizer class SimpleNet(paddle.nn.Layer): def __init__(self, vocab_size, hidden_size, dtype): super().__init__() - self.emb = fluid.dygraph.Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - param_attr='emb.w', - is_sparse=True, + self.emb = paddle.nn.Embedding( + vocab_size, + hidden_size, + weight_attr='emb.w', + sparse=True, ) def forward(self, input): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index dd490e8d5553bbcd4a28257df745088ec3ae7afb..220bde8e5b235f49dc10e2f34e6fa387e2aaced2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -22,9 +22,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer +from paddle.nn import Embedding class SimpleNet(fluid.Layer): @@ -42,11 +42,12 @@ class SimpleNet(fluid.Layer): self.vocab_size = vocab_size self.init_scale = init_scale self.num_steps = num_steps + paddle.set_default_dtype(dtype) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 3cc07ee6a3378b18da823a3fe94f59f71481b6a0..f73e94363844cdd0c5d9f51e761a0dfc233da5cd 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid import Embedding, Layer, core +from paddle.fluid import Layer, core from paddle.fluid.dygraph import guard, to_variable from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard from paddle.nn import Linear @@ -664,11 +664,11 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( + self._input_emb = paddle.nn.Embedding( + src_vocab_size, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=word_emb_param_name, initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), ), @@ -678,10 +678,11 @@ class PrepareEncoderDecoderLayer(Layer): pos_inp = pos_inp1 else: pos_inp = pos_inp2 - self._pos_emb = Embedding( - size=[self._src_max_len, src_emb_dim], - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._src_max_len, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=pos_enc_param_name, initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 01a9c098b0e913d0376612cac2cc18777c0f39f0..83cec6d60443fad89d8f5d586c41904099d6d161 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -26,7 +26,7 @@ import paddle.fluid.layers as layers import paddle.fluid.nets as nets import paddle.nn.functional as F from paddle.fluid import core -from paddle.fluid.dygraph import base, nn, to_variable +from paddle.fluid.dygraph import base, to_variable from paddle.fluid.framework import ( Program, _test_eager_guard, @@ -732,8 +732,8 @@ class TestLayer(LayerTest): )[0] with self.static_graph(): data_t = layers.data(name='word', shape=[1], dtype='int64') - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr='emb.w', sparse=False ) emb_rlt = emb2(data_t) static_rlt2 = self.get_static_graph_result( @@ -741,16 +741,17 @@ class TestLayer(LayerTest): )[0] with self.dynamic_graph(): with _test_eager_guard(): - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr='eager_emb.w', - is_sparse=False, + emb2 = paddle.nn.Embedding( + dict_size, + 32, + weight_attr='eager_emb.w', + sparse=False, ) dy_eager_rlt = emb2(base.to_variable(inp_word)) dy_eager_rlt_value = dy_eager_rlt.numpy() - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr='emb.w', sparse=False ) dy_rlt = emb2(base.to_variable(inp_word)) dy_rlt_value = dy_rlt.numpy() @@ -767,11 +768,12 @@ class TestLayer(LayerTest): custom_weight ) ) - emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr=weight_attr, - is_sparse=False, + emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) + emb2 = paddle.nn.Embedding( + dict_size, + 32, + weight_attr=weight_attr, + sparse=False, ) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) @@ -797,9 +799,9 @@ class TestLayer(LayerTest): custom_weight ) ) - emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr=weight_attr, is_sparse=False + emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr=weight_attr, sparse=False ) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py index bfa6966e543b092012f8affd0517e37cc29186b1..f990c2171b92e32928c83bf8f2ddb0c664956e6c 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py @@ -689,9 +689,7 @@ class TestBeamSearch(ModuleApiTest): beam_size=4, max_step_num=20, ): - embedder = paddle.fluid.dygraph.Embedding( - size=[vocab_size, embed_dim], dtype="float64" - ) + embedder = paddle.nn.Embedding(vocab_size, embed_dim) output_layer = nn.Linear(hidden_size, vocab_size) cell = nn.LSTMCell(embed_dim, hidden_size) self.max_step_num = max_step_num