From 6e356c68c6f8902e42284565f576ff981302efea Mon Sep 17 00:00:00 2001 From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com> Date: Sat, 10 Dec 2022 17:49:41 +0800 Subject: [PATCH] [remove fluid] fluid dygraph Embedding (#48806) * [remove fluid] fluid dygraph Embedding * [remove fluid] fluid dygraph Embedding * [remove fluid] fluid dygraph Embedding * [remove fluid] fluid dygraph Embedding * [remove fluid] fluid dygraph Embedding * [remove fluid] fluid dygraph Embedding --- python/paddle/fluid/dygraph/checkpoint.py | 3 +- .../fluid/dygraph/learning_rate_scheduler.py | 15 +- python/paddle/fluid/dygraph/nn.py | 182 ------------------ python/paddle/fluid/dygraph/parallel.py | 10 +- python/paddle/fluid/framework.py | 9 +- python/paddle/fluid/optimizer.py | 5 +- .../fleet/dygraph_save_for_auto_infer.py | 3 +- ...parallel_dygraph_control_flow_different.py | 9 +- .../fleet/parallel_dygraph_transformer.py | 21 +- .../dygraph_to_static/bert_dygraph_model.py | 28 +-- .../seq2seq_dygraph_model.py | 26 +-- .../dygraph_to_static/simnet_dygraph_model.py | 11 +- .../simnet_dygraph_model_v2.py | 9 +- .../unittests/dygraph_to_static/test_lac.py | 10 +- .../dygraph_to_static/test_ptb_lm.py | 11 +- .../dygraph_to_static/test_ptb_lm_v2.py | 10 +- .../dygraph_to_static/test_sentiment.py | 31 ++- .../dygraph_to_static/test_word2vec.py | 14 +- .../transformer_dygraph_model.py | 24 +-- .../parallel_dygraph_sparse_embedding.py | 10 +- .../unittests/test_imperative_auto_prune.py | 9 +- .../test_imperative_load_static_param.py | 6 +- ..._imperative_lod_tensor_to_selected_rows.py | 12 +- .../test_imperative_named_members.py | 2 +- .../test_imperative_ocr_attention_model.py | 6 +- .../unittests/test_imperative_ptb_rnn.py | 10 +- .../unittests/test_imperative_save_load.py | 14 +- .../unittests/test_imperative_save_load_v2.py | 16 +- .../test_imperative_selected_rows.py | 10 +- ..._imperative_selected_rows_to_lod_tensor.py | 11 +- ..._imperative_transformer_sorted_gradient.py | 21 +- .../fluid/tests/unittests/test_layers.py | 36 ++-- .../tests/unittests/test_rnn_decode_api.py | 4 +- 33 files changed, 214 insertions(+), 384 deletions(-) diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 9515a8bd170..d3e91295d43 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -83,9 +83,10 @@ def save_dygraph(state_dict, model_path): .. code-block:: python import paddle.fluid as fluid + import paddle with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph( state_dict, "paddle_dy") diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index 77d4f2c2573..cf794ad4cef 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -170,10 +170,11 @@ class PiecewiseDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle boundaries = [10000, 20000] values = [1.0, 0.5, 0.1] with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( [10, 10] ) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0), parameter_list = emb.parameters() ) @@ -240,9 +241,10 @@ class NaturalExpDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle base_lr = 0.1 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.NaturalExpDecay( learning_rate=base_lr, @@ -403,9 +405,10 @@ class InverseTimeDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle base_lr = 0.1 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.dygraph.InverseTimeDecay( learning_rate=base_lr, @@ -487,11 +490,12 @@ class PolynomialDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle start_lr = 0.01 total_step = 5000 end_lr = 0 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( [10, 10]) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate = fluid.dygraph.PolynomialDecay( start_lr, total_step, end_lr, power=1.0), @@ -639,10 +643,11 @@ class NoamDecay(LearningRateDecay): .. code-block:: python import paddle.fluid as fluid + import paddle warmup_steps = 100 learning_rate = 0.01 with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) optimizer = fluid.optimizer.SGD( learning_rate = fluid.dygraph.NoamDecay( 1/(warmup_steps *(learning_rate ** 2)), diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index f0b761fff82..77436e9293d 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -51,7 +51,6 @@ from paddle import _C_ops, _legacy_C_ops __all__ = [ 'BatchNorm', - 'Embedding', ] @@ -360,187 +359,6 @@ class BatchNorm(layers.Layer): return self._helper.append_activation(batch_norm_out, self._act) -class Embedding(layers.Layer): - r""" - :alias_main: paddle.nn.Embedding - :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding - :old_api: paddle.fluid.dygraph.Embedding - - **Embedding Layer** - - This interface is used to construct a callable object of the ``Embedding`` class. - For specific usage, refer to code examples. It implements the function of the Embedding Layer. - This layer is used to lookup embeddings vector of ids provided by :attr:`input` . - It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - - The shape of output Tensor is generated by appending an emb_size dimension to the - last dimension of the input Tensor shape. - - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , - otherwise the program will throw an exception and exit. - - .. code-block:: text - - Case 1: - - input is a Tensor. padding_idx = -1 - input.data = [[1, 3], [2, 4], [4, 127] - input.shape = [3, 2] - Given size = [128, 16] - output is a Tensor: - out.shape = [3, 2, 16] - out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654]], - - [[0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365]], - - [[0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]]] # padding data - The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. - - Parameters: - size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size - of the dictionary of embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. This parameter only - affects the performance of the backwards gradient update. It is recommended to set - True because sparse update is faster. But some optimizer does not support sparse update, - such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , - :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , - :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . - In these case, is_sparse must be False. Default: False. - is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used - in multi-machine distributed CPU training. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). - If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup - encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. - If set None, it makes no effect to output. Default: None. - param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, - user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. - The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` - is used to load custom or pre-trained word vectors. See code example 2 for details. - dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor. - It must be "float32" or "float64". Default: "float32". - - Attribute: - **weight** (Parameter): the learnable weights of this layer. - - Returns: - Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - import paddle.fluid.dygraph.base as base - import numpy as np - - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( - size=[dict_size, 32], - param_attr='emb.w', - is_sparse=False) - static_rlt3 = emb(base.to_variable(inp_word)) - static_rlt3.shape # [2, 3, 32] - - # example 2: load custom or pre-trained word vectors - weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format - w_param_attrs = fluid.ParamAttr( - name="emb_weight", - learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(weight_data), - trainable=True) - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding( - size=[128, 100], - param_attr= w_param_attrs, - is_sparse=False) - static_rlt3 = emb(base.to_variable(inp_word)) - """ - - def __init__( - self, - size, - is_sparse=False, - is_distributed=False, - padding_idx=None, - param_attr=None, - dtype='float32', - ): - super().__init__() - self._size = size - self._is_sparse = is_sparse - self._is_distributed = is_distributed - self._padding_idx = ( - -1 - if padding_idx is None - else padding_idx - if padding_idx >= 0 - else (size[0] + padding_idx) - ) - - self._param_attr = param_attr - self._dtype = dtype - self._remote_prefetch = self._is_sparse and (not self._is_distributed) - if self._remote_prefetch: - assert self._is_sparse is True and self._is_distributed is False - - self.weight = self.create_parameter( - attr=self._param_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False, - ) - - def forward(self, input): - if _non_static_mode(): - return _legacy_C_ops.lookup_table_v2( - self.weight, - input, - 'is_sparse', - self._is_sparse, - 'is_distributed', - self._is_distributed, - 'remote_prefetch', - self._remote_prefetch, - 'padding_idx', - self._padding_idx, - ) - - check_variable_and_dtype( - input, - 'input', - ['uint8', 'int8', 'int16', 'int32', 'int64'], - 'Embedding', - ) - attrs = { - 'is_sparse': self._is_sparse, - 'is_distributed': self._is_distributed, - 'remote_prefetch': self._remote_prefetch, - 'padding_idx': self._padding_idx, - } - - out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type='lookup_table_v2', - inputs={'Ids': input, 'W': self.weight}, - outputs={'Out': out}, - attrs=attrs, - ) - - return out - - class RowConv(layers.Layer): """ ***Row-convolution operator*** diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index cb030f71a45..3432baf442e 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -723,10 +723,6 @@ class DataParallel(layers.Layer): def check_layer_sparse(sublayer): if isinstance(sublayer, paddle.nn.layer.common.Embedding): return sublayer._sparse - # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding - # is removed in the future, the check will also be removed here. - if isinstance(sublayer, paddle.fluid.dygraph.Embedding): - return sublayer._is_sparse return False is_sparse_gradient = [ @@ -875,8 +871,8 @@ class DataParallel(layers.Layer): dist.init_parallel_env() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb) + emb = paddle.nn.Embedding(10, 10) + emb = paddle.fluid.dygraph.DataParallel(emb) state_dict = emb.state_dict() paddle.save(state_dict, "paddle_dy.pdparams") @@ -910,7 +906,7 @@ class DataParallel(layers.Layer): dist.init_parallel_env() emb = paddle.nn.Embedding(10, 10) - emb = fluid.dygraph.DataParallel(emb) + emb = paddle.fluid.dygraph.DataParallel(emb) state_dict = emb.state_dict() paddle.save(state_dict, "paddle_dy.pdparams") diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index da4f609c401..7393c6104f3 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1660,10 +1660,11 @@ class Variable(metaclass=VariableMetaClass): # example2: return tuple of ndarray with fluid.dygraph.guard(): - embedding = fluid.dygraph.Embedding( - size=[20, 32], - param_attr='emb.w', - is_sparse=True) + embedding = paddle.nn.Embedding( + 20, + 32, + weight_attr='emb.w', + sparse=True) x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, 3, 1)) x = fluid.dygraph.base.to_variable(x_data) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 71167f30e02..42c57193941 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -214,9 +214,10 @@ class Optimizer: .. code-block:: python import paddle.fluid as fluid + import paddle with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = fluid.optimizer.Adam(0.001, parameter_list=emb.parameters()) state_dict = adam.state_dict() @@ -582,7 +583,7 @@ class Optimizer: # example1: LearningRateDecay is not used, return value is all the same with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters()) lr = adam.current_step_lr() print(lr) # 0.001 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py index f63cfc089ed..ef85aab80f6 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py @@ -39,7 +39,6 @@ from paddle.distributed.fleet.meta_parallel.parallel_layers.pp_layers import ( from paddle.distributed.sharding.group_sharded import group_sharded_parallel from paddle.distributed.utils.log_utils import get_logger from paddle.fluid.dataloader.dataset import IterableDataset -from paddle.fluid.dygraph.nn import Embedding from paddle.incubate.distributed.utils.io import save_for_auto_inference from paddle.nn import Linear @@ -131,7 +130,7 @@ class MLP(fluid.Layer): bias_attr=None, ): super(MLP, self).__init__() - self.embedding = Embedding((embedding_size, linear_size)) + self.embedding = paddle.nn.Embedding(embedding_size, linear_size) self._linear1 = Linear(linear_size, linear_size) self._linear2 = Linear(linear_size, linear_size) self._linear3 = Linear(linear_size, 10) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py index 48ec09552d7..deaf9779d44 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_different.py @@ -18,7 +18,6 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid.dygraph.nn import Embedding paddle.seed(123) np.random.seed(2021) @@ -29,10 +28,10 @@ class SimpleNet(fluid.Layer): super().__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size - self.embedding = Embedding( - size=[self.vocab_size, self.hidden_size], - dtype='float32', - is_sparse=is_sparse, + self.embedding = paddle.nn.Embedding( + self.vocab_size, + self.hidden_size, + sparse=is_sparse, ) self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index 8d49434ac54..a8ddeb0bfdb 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid.dygraph import Embedding, Layer, to_variable +from paddle.fluid.dygraph import Layer, to_variable from paddle.optimizer.lr import NoamDecay """ @@ -513,11 +513,11 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( + self._input_emb = paddle.nn.Embedding( + src_vocab_size, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=word_emb_param_name, initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), ), @@ -527,10 +527,11 @@ class PrepareEncoderDecoderLayer(Layer): pos_inp = pos_inp1 else: pos_inp = pos_inp2 - self._pos_emb = Embedding( - size=[self._src_max_len, src_emb_dim], - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._src_max_len, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=pos_enc_param_name, initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 43f7f0f6d2b..a6e4f09564d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -16,7 +16,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer import paddle import paddle.fluid as fluid -from paddle.fluid.dygraph import Embedding, Layer +from paddle.fluid.dygraph import Layer from paddle.jit.api import declarative from paddle.nn import Linear @@ -208,29 +208,29 @@ class BertModelLayer(Layer): self._param_initializer = fluid.initializer.TruncatedNormal( scale=config['initializer_range'] ) - - self._src_emb = Embedding( - size=[self._voc_size, self._emb_size], - param_attr=fluid.ParamAttr( + paddle.set_default_dtype(self._dtype) + self._src_emb = paddle.nn.Embedding( + self._voc_size, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) - self._pos_emb = Embedding( - size=[self._max_position_seq_len, self._emb_size], - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._max_position_seq_len, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._pos_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) - self._sent_emb = Embedding( - size=[self._sent_types, self._emb_size], - param_attr=fluid.ParamAttr( + self._sent_emb = paddle.nn.Embedding( + self._sent_types, + self._emb_size, + weight_attr=fluid.ParamAttr( name=self._sent_emb_name, initializer=self._param_initializer ), - dtype=self._dtype, ) self.pooled_fc = Linear( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index bbca449bde6..eceba1198fa 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -21,8 +21,8 @@ import paddle.fluid as fluid from paddle.fluid import ParamAttr, layers from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.jit.api import declarative +from paddle.nn import Embedding INF = 1.0 * 1e5 alpha = 0.6 @@ -122,16 +122,18 @@ class BaseModel(fluid.dygraph.Layer): forget_bias = 1.0 self.src_embeder = Embedding( - size=[self.src_vocab_size, self.hidden_size], - param_attr=fluid.ParamAttr( + self.src_vocab_size, + self.hidden_size, + weight_attr=fluid.ParamAttr( initializer=uniform_initializer(init_scale) ), ) self.tar_embeder = Embedding( - size=[self.tar_vocab_size, self.hidden_size], - is_sparse=False, - param_attr=fluid.ParamAttr( + self.tar_vocab_size, + self.hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( initializer=uniform_initializer(init_scale) ), ) @@ -545,17 +547,19 @@ class AttentionModel(fluid.dygraph.Layer): forget_bias = 1.0 self.src_embeder = Embedding( - size=[self.src_vocab_size, self.hidden_size], - param_attr=fluid.ParamAttr( + self.src_vocab_size, + self.hidden_size, + weight_attr=fluid.ParamAttr( name='source_embedding', initializer=uniform_initializer(init_scale), ), ) self.tar_embeder = Embedding( - size=[self.tar_vocab_size, self.hidden_size], - is_sparse=False, - param_attr=fluid.ParamAttr( + self.tar_vocab_size, + self.hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='target_embedding', initializer=uniform_initializer(init_scale), ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index d16f07d9a2e..b6baf7ddf29 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -17,7 +17,7 @@ from functools import reduce import paddle import paddle.fluid as fluid import paddle.fluid.param_attr as attr -from paddle.fluid.dygraph import Embedding, Layer +from paddle.fluid.dygraph import Layer from paddle.jit.api import declarative from paddle.static import Variable @@ -42,11 +42,12 @@ class EmbeddingLayer: """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. - emb = Embedding( - size=[self.dict_size, self.emb_dim], - is_sparse=True, + emb = paddle.nn.Embedding( + self.dict_size, + self.emb_dim, + sparse=True, padding_idx=self.padding_idx, - param_attr=attr.ParamAttr( + weight_attr=attr.ParamAttr( name=self.name, initializer=fluid.initializer.Xavier() ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 06f460912b4..99fe330c692 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -38,11 +38,12 @@ class EmbeddingLayer: """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. - emb = paddle.fluid.dygraph.Embedding( - size=[self.dict_size, self.emb_dim], - is_sparse=True, + emb = paddle.nn.Embedding( + self.dict_size, + self.emb_dim, + sparse=True, padding_idx=self.padding_idx, - param_attr=paddle.ParamAttr( + weight_attr=paddle.ParamAttr( name=self.name, initializer=paddle.nn.initializer.XavierUniform(), ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index 7a5fbbc0842..1ec320317d4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -25,7 +25,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "2" import paddle import paddle.fluid as fluid from paddle import _legacy_C_ops -from paddle.fluid.dygraph import Embedding, to_variable +from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.framework import _non_static_mode from paddle.jit import ProgramTranslator @@ -371,10 +371,10 @@ class LexNet(fluid.dygraph.Layer): self.bigru_num = args.bigru_num self.init_bound = 0.1 - self.word_embedding = Embedding( - size=[self.vocab_size, self.word_emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr( + self.word_embedding = paddle.nn.Embedding( + self.vocab_size, + self.word_emb_dim, + weight_attr=fluid.ParamAttr( learning_rate=self.emb_lr, name="word_emb", initializer=fluid.initializer.Uniform( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index 46970eaa27b..49e7c32d6e3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -21,7 +21,6 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.optimizer import SGDOptimizer from paddle.jit import ProgramTranslator from paddle.jit.api import declarative @@ -156,11 +155,11 @@ class PtbModel(fluid.Layer): init_scale=init_scale, dropout=dropout, ) - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py index f589c37c2fb..60712aeda7a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py @@ -150,11 +150,11 @@ class PtbModel(paddle.nn.Layer): init_scale=init_scale, dropout=dropout, ) - self.embedding = paddle.fluid.dygraph.nn.Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=paddle.ParamAttr( + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=False, + weight_attr=paddle.ParamAttr( name='embedding_para', initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py index 12e5099f257..d9eb993f720 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py @@ -20,10 +20,9 @@ from test_lac import DynamicGRU import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn import Linear +from paddle.nn import Embedding, Linear SEED = 2020 program_translator = ProgramTranslator() @@ -73,9 +72,9 @@ class CNN(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + sparse=False, ) self._simple_conv_pool_1 = SimpleConvPool( self.channels, @@ -124,9 +123,9 @@ class BOW(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + sparse=False, ) self._fc1 = Linear(self.hid_dim, self.hid_dim) self._fc2 = Linear(self.hid_dim, self.fc_hid_dim) @@ -167,10 +166,10 @@ class GRU(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + weight_attr=fluid.ParamAttr(learning_rate=30), + sparse=False, ) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) @@ -213,10 +212,10 @@ class BiGRU(fluid.dygraph.Layer): self.batch_size = batch_size self.seq_len = seq_len self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False, + self.dict_dim + 1, + self.emb_dim, + weight_attr=fluid.ParamAttr(learning_rate=30), + sparse=False, ) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py index 13f1a9b882f..e546e26a230 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py @@ -20,9 +20,9 @@ import numpy as np import paddle import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Embedding from paddle.jit import ProgramTranslator from paddle.jit.api import declarative +from paddle.nn import Embedding def fake_text(): @@ -227,9 +227,9 @@ class SkipGram(fluid.dygraph.Layer): self.embedding_size = embedding_size self.embedding = Embedding( - size=[self.vocab_size, self.embedding_size], - dtype='float32', - param_attr=fluid.ParamAttr( + self.vocab_size, + self.embedding_size, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / self.embedding_size, @@ -239,9 +239,9 @@ class SkipGram(fluid.dygraph.Layer): ) self.embedding_out = Embedding( - size=[self.vocab_size, self.embedding_size], - dtype='float32', - param_attr=fluid.ParamAttr( + self.vocab_size, + self.embedding_size, + weight_attr=fluid.ParamAttr( name='embedding_out_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / self.embedding_size, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 88cc415b4bb..f8641dd2ac4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -18,7 +18,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.nn.functional as F -from paddle.fluid.dygraph import Embedding, Layer, to_variable +from paddle.fluid.dygraph import Layer, to_variable from paddle.fluid.layers.utils import map_structure from paddle.jit.api import dygraph_to_static_func from paddle.nn import Linear @@ -276,10 +276,10 @@ class Encoder(Layer): class Embedder(Layer): def __init__(self, vocab_size, emb_dim, bos_idx=0): super().__init__() - self.word_embedder = Embedding( - size=[vocab_size, emb_dim], - padding_idx=bos_idx, - param_attr=fluid.ParamAttr( + self.word_embedder = paddle.nn.Embedding( + vocab_size, + emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) ), ) @@ -311,9 +311,10 @@ class WrapEncoder(Layer): self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder - self.pos_encoder = Embedding( - size=[max_length, self.emb_dim], - param_attr=fluid.ParamAttr( + self.pos_encoder = paddle.nn.Embedding( + max_length, + self.emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim) ), @@ -499,9 +500,10 @@ class WrapDecoder(Layer): self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder - self.pos_encoder = Embedding( - size=[max_length, self.emb_dim], - param_attr=fluid.ParamAttr( + self.pos_encoder = paddle.nn.Embedding( + max_length, + self.emb_dim, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim) ), diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index e1103c1d595..11aacd02439 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding +from paddle.nn import Embedding class SimpleNet(fluid.Layer): @@ -37,10 +37,10 @@ class SimpleNet(fluid.Layer): self.init_scale = init_scale self.num_steps = num_steps self.embedding = Embedding( - size=[self.vocab_size, self.hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self.vocab_size, + self.hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 5a301d3f0a5..e4dca329fe7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.framework import _test_eager_guard +from paddle.nn import Embedding from paddle.tensor import random @@ -122,8 +123,8 @@ class AutoPruneLayer3(fluid.Layer): class MyLayer(fluid.Layer): def __init__(self, input_size, vocab_size, size, dtype="float32"): super().__init__(dtype=dtype) - self.embed0 = fluid.Embedding(size=(vocab_size, size)) - self.embed1 = fluid.Embedding(size=(vocab_size, size)) + self.embed0 = Embedding(vocab_size, size) + self.embed1 = Embedding(vocab_size, size) self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size) @@ -144,8 +145,8 @@ class MyLayer(fluid.Layer): class MyLayer2(fluid.Layer): def __init__(self, input_size, vocab_size, size, dtype="float32"): super().__init__(dtype=dtype) - self.embed0 = fluid.Embedding(size=(vocab_size, size)) - self.embed1 = fluid.Embedding(size=(vocab_size, size)) + self.embed0 = Embedding(vocab_size, size) + self.embed1 = Embedding(vocab_size, size) self.linear_0 = paddle.nn.Linear(input_size, size) self.linear_1 = paddle.nn.Linear(input_size, size) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index f864e282904..2003e685327 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -21,7 +21,7 @@ import numpy as np import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework -from paddle.fluid.dygraph.nn import BatchNorm, Embedding +from paddle.fluid.dygraph.nn import BatchNorm from paddle.nn import Linear @@ -206,8 +206,8 @@ class TestDygraphLoadStatic(unittest.TestCase): self.batch_norm_1 = BatchNorm(10) self.batch_norm_2 = BatchNorm(10) - self.emb1 = Embedding([1000, 100]) - self.emb2 = Embedding([2000, 200]) + self.emb1 = paddle.nn.Embedding(1000, 100) + self.emb2 = paddle.nn.Embedding(2000, 200) self.layer_norm_1 = paddle.nn.LayerNorm([10]) self.layer_norm_2 = paddle.nn.LayerNorm(10) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index fe706a78f8f..0027cbfa2a9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -22,7 +22,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer @@ -42,11 +41,12 @@ class SimpleNet(fluid.Layer): self.vocab_size = vocab_size self.init_scale = init_scale self.num_steps = num_steps - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + paddle.set_default_dtype(dtype) + self.embedding = paddle.nn.Embedding( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py index faaa02ea46a..0984104269c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py @@ -101,7 +101,7 @@ class TestImperativeNamedParameters(unittest.TestCase): self.linear1 = paddle.nn.Linear(10, 10) self.linear2 = paddle.nn.Linear(5, 5) self.conv2d = paddle.nn.Conv2D(3, 2, 3) - self.embedding = fluid.dygraph.Embedding(size=[128, 16]) + self.embedding = paddle.nn.Embedding(128, 16) self.h_0 = fluid.dygraph.to_variable( np.zeros([10, 10]).astype('float32') ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 1df0a4148c9..12118beaffe 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -21,7 +21,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import BatchNorm, Embedding +from paddle.fluid.dygraph.nn import BatchNorm from paddle.fluid.framework import _test_eager_guard from paddle.nn import Linear @@ -371,8 +371,8 @@ class OCRAttention(fluid.dygraph.Layer): Config.decoder_size, bias_attr=False, ) - self.embedding = Embedding( - [Config.num_classes + 2, Config.word_vector_dim], dtype='float32' + self.embedding = paddle.nn.Embedding( + Config.num_classes + 2, Config.word_vector_dim ) self.gru_decoder_with_attention = GRUDecoderWithAttention( Config.decoder_size, Config.num_classes diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 2a59dd396f0..6bbf0a70c2e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -23,9 +23,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer +from paddle.nn import Embedding class SimpleLSTMRNN(fluid.Layer): @@ -172,10 +172,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 2e30ea41a18..4e30f591686 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -22,9 +22,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import Adam +from paddle.nn import Embedding class SimpleLSTMRNN(fluid.Layer): @@ -167,10 +167,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale @@ -991,7 +991,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_testOnlyLoadParams(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) @@ -1011,7 +1011,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_load_compatible_with_keep_name_table(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 4a3c6c64a6f..a567a443e44 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -23,8 +23,8 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard +from paddle.nn import Embedding from paddle.optimizer import Adam @@ -168,10 +168,10 @@ class PtbModel(fluid.Layer): dropout=dropout, ) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=False, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale @@ -1015,7 +1015,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_testOnlyLoadParams(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, @@ -1028,7 +1028,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_no_state_in_input_dict(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, @@ -1044,7 +1044,7 @@ class TestDygraphPtbRnn(unittest.TestCase): def func_test_state_shape_mismatch(self): with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.save( state_dict, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 72c77e753f5..498317b2a33 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -27,11 +27,11 @@ from paddle.fluid.optimizer import SGDOptimizer class SimpleNet(paddle.nn.Layer): def __init__(self, vocab_size, hidden_size, dtype): super().__init__() - self.emb = fluid.dygraph.Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - param_attr='emb.w', - is_sparse=True, + self.emb = paddle.nn.Embedding( + vocab_size, + hidden_size, + weight_attr='emb.w', + sparse=True, ) def forward(self, input): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index dd490e8d555..220bde8e5b2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -22,9 +22,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.fluid.optimizer import SGDOptimizer +from paddle.nn import Embedding class SimpleNet(fluid.Layer): @@ -42,11 +42,12 @@ class SimpleNet(fluid.Layer): self.vocab_size = vocab_size self.init_scale = init_scale self.num_steps = num_steps + paddle.set_default_dtype(dtype) self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + vocab_size, + hidden_size, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 3cc07ee6a33..f73e9436384 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope import paddle import paddle.fluid as fluid import paddle.nn.functional as F -from paddle.fluid import Embedding, Layer, core +from paddle.fluid import Layer, core from paddle.fluid.dygraph import guard, to_variable from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard from paddle.nn import Linear @@ -664,11 +664,11 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( + self._input_emb = paddle.nn.Embedding( + src_vocab_size, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=word_emb_param_name, initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), ), @@ -678,10 +678,11 @@ class PrepareEncoderDecoderLayer(Layer): pos_inp = pos_inp1 else: pos_inp = pos_inp2 - self._pos_emb = Embedding( - size=[self._src_max_len, src_emb_dim], - is_sparse=is_sparse, - param_attr=fluid.ParamAttr( + self._pos_emb = paddle.nn.Embedding( + self._src_max_len, + src_emb_dim, + sparse=is_sparse, + weight_attr=fluid.ParamAttr( name=pos_enc_param_name, initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 01a9c098b0e..83cec6d6044 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -26,7 +26,7 @@ import paddle.fluid.layers as layers import paddle.fluid.nets as nets import paddle.nn.functional as F from paddle.fluid import core -from paddle.fluid.dygraph import base, nn, to_variable +from paddle.fluid.dygraph import base, to_variable from paddle.fluid.framework import ( Program, _test_eager_guard, @@ -732,8 +732,8 @@ class TestLayer(LayerTest): )[0] with self.static_graph(): data_t = layers.data(name='word', shape=[1], dtype='int64') - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr='emb.w', sparse=False ) emb_rlt = emb2(data_t) static_rlt2 = self.get_static_graph_result( @@ -741,16 +741,17 @@ class TestLayer(LayerTest): )[0] with self.dynamic_graph(): with _test_eager_guard(): - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr='eager_emb.w', - is_sparse=False, + emb2 = paddle.nn.Embedding( + dict_size, + 32, + weight_attr='eager_emb.w', + sparse=False, ) dy_eager_rlt = emb2(base.to_variable(inp_word)) dy_eager_rlt_value = dy_eager_rlt.numpy() - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr='emb.w', sparse=False ) dy_rlt = emb2(base.to_variable(inp_word)) dy_rlt_value = dy_rlt.numpy() @@ -767,11 +768,12 @@ class TestLayer(LayerTest): custom_weight ) ) - emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr=weight_attr, - is_sparse=False, + emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) + emb2 = paddle.nn.Embedding( + dict_size, + 32, + weight_attr=weight_attr, + sparse=False, ) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) @@ -797,9 +799,9 @@ class TestLayer(LayerTest): custom_weight ) ) - emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr=weight_attr, is_sparse=False + emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) + emb2 = paddle.nn.Embedding( + dict_size, 32, weight_attr=weight_attr, sparse=False ) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py index bfa6966e543..f990c2171b9 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py @@ -689,9 +689,7 @@ class TestBeamSearch(ModuleApiTest): beam_size=4, max_step_num=20, ): - embedder = paddle.fluid.dygraph.Embedding( - size=[vocab_size, embed_dim], dtype="float64" - ) + embedder = paddle.nn.Embedding(vocab_size, embed_dim) output_layer = nn.Linear(hidden_size, vocab_size) cell = nn.LSTMCell(embed_dim, hidden_size) self.max_step_num = max_step_num -- GitLab