From df7cc3a0d633ce2a7967f63bae7515f453945c99 Mon Sep 17 00:00:00 2001 From: GGBond8488 <33050871+GGBond8488@users.noreply.github.com> Date: Thu, 23 Feb 2023 15:26:45 +0800 Subject: [PATCH] Fluid clean move dygraph profiler, fluid.input.on_hot and fluid.input.embedding (#50141) * remove dygraph.profiler * remove fluid.input.one-hot and move embedding to paddle.static.nn * fix unitest error * fix type error * fix type error * fix xpu test error * fxi sample code error * fxi sample code error * fix sample code error * remove test.py * remove variable in docstr --- python/paddle/fluid/__init__.py | 3 - python/paddle/fluid/dygraph/profiler.py | 28 -- python/paddle/fluid/input.py | 348 ------------------ python/paddle/fluid/layers/nn.py | 2 +- .../tests/unittests/dist_fleet_simnet_bow.py | 6 +- .../unittests/mlu/test_one_hot_v2_op_mlu.py | 8 +- .../unittests/npu/test_one_hot_v2_op_npu.py | 6 +- .../fluid/tests/unittests/test_backward.py | 8 +- .../test_embedding_id_stop_gradient.py | 4 +- .../test_imperative_load_static_param.py | 4 +- .../unittests/test_lookup_table_v2_bf16_op.py | 2 +- .../unittests/test_lookup_table_v2_op.py | 16 +- .../tests/unittests/test_one_hot_v2_op.py | 10 +- .../tests/unittests/test_run_program_op.py | 2 +- .../fluid/tests/unittests/test_sgd_op.py | 4 +- .../xpu/test_lookup_table_v2_op_xpu.py | 14 +- .../unittests/xpu/test_one_hot_v2_op_xpu.py | 10 +- .../tests/unittests/xpu/test_sgd_op_xpu.py | 4 +- python/paddle/static/nn/__init__.py | 2 +- python/paddle/static/nn/common.py | 161 ++++++++ 20 files changed, 217 insertions(+), 425 deletions(-) delete mode 100644 python/paddle/fluid/dygraph/profiler.py delete mode 100644 python/paddle/fluid/input.py diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 6ab933d3ea8..a7019bd9541 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -66,7 +66,6 @@ from . import average from . import metrics from . import transpiler from . import incubate -from .input import embedding, one_hot from .param_attr import ParamAttr, WeightNormParamAttr from .data_feeder import DataFeeder @@ -129,8 +128,6 @@ __all__ = ( + [ 'io', 'initializer', - 'embedding', - 'one_hot', 'layers', 'contrib', 'data', diff --git a/python/paddle/fluid/dygraph/profiler.py b/python/paddle/fluid/dygraph/profiler.py deleted file mode 100644 index 8c4e07ce78d..00000000000 --- a/python/paddle/fluid/dygraph/profiler.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .. import core - -__all__ = [ - 'start_gperf_profiler', - 'stop_gperf_profiler', -] - - -def start_gperf_profiler(): - core.start_imperative_gperf_profiler() - - -def stop_gperf_profiler(): - core.stop_imperative_gperf_profiler() diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py deleted file mode 100644 index 255a17d6483..00000000000 --- a/python/paddle/fluid/input.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -from .framework import Variable, _non_static_mode, static_only -from .layer_helper import LayerHelper -from .data_feeder import check_variable_and_dtype, check_dtype -from ..utils import deprecated - -__all__ = ['one_hot', 'embedding'] - - -@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot') -def one_hot(input, depth, allow_out_of_range=False): - """ - :alias_main: paddle.nn.functional.one_hot - :alias: paddle.nn.functional.one_hot,paddle.nn.functional.common.one_hot - :old_api: paddle.fluid.one_hot - - The operator converts each id in the input to an one-hot vector with a - depth length. The value in the vector dimension corresponding to the id - is 1, and the value in the remaining dimension is 0. - - The shape of output Tensor or LoDTensor is generated by appending depth dimension - behind the last dimension of the input shape. - - .. code-block:: text - - Example 1 (allow_out_of_range=False): - - input: - X.shape = [4] - X.data = [1, 1, 3, 0] - depth = 4 - - output: - Out.shape = [4, 4] - Out.data = [[0., 1., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 0., 1.], - [1., 0., 0., 0.]] - - Example 2 (allow_out_of_range=True): - - input: - X.shape = [4] - X.data = [1, 1, 5, 0] - depth = 4 - allow_out_of_range = True - - output: - Out.shape = [4, 4] - Out.data = [[0., 1., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data. - [1., 0., 0., 0.]] - - Example 3 (allow_out_of_range=False): - - input: - X.shape = [4] - X.data = [1, 1, 5, 0] - depth = 4 - allow_out_of_range = False - - output: Throw an exception for Illegal value - The second dimension in X is 5, which is greater than depth. - Allow_out_of_range =False means that does not allow the word id to exceed depth, - so it throws an exception. - - - Args: - input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` , - which contains at least one dimension. The data type is int32 or int64. - depth(int): An integer defining the depth of the one hot dimension. If input - is word id, depth is generally the dictionary size. - allow_out_of_range(bool): A bool value indicating whether the input - indices could be out of range :math:`[0, depth)` . When input indices are - out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range` - is False, or zero-filling representations is created if it is set True. - Default: False. - - Returns: - Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - - # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4]. - label = fluid.data(name="label", shape=[4], dtype="int64") - one_hot_label = fluid.one_hot(input=label, depth=4) - """ - check_variable_and_dtype(input, 'input', ['int32', 'int64'], 'one_hot_v2') - helper = LayerHelper("one_hot_v2", **locals()) - - one_hot_out = helper.create_variable_for_type_inference(dtype='float32') - - if _non_static_mode(): - inputs = {'X': input} - attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range} - else: - if not isinstance(depth, Variable): - # user attribute - inputs = {'X': input} - attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range} - else: - depth.stop_gradient = True - inputs = {'X': input, 'depth_tensor': depth} - attrs = {'allow_out_of_range': allow_out_of_range} - helper.append_op( - type="one_hot_v2", - inputs=inputs, - attrs=attrs, - outputs={'Out': one_hot_out}, - stop_gradient=True, - ) - return one_hot_out - - -@static_only -@deprecated(since='2.0.0', update_to='paddle.nn.functional.embedding') -def embedding( - input, - size, - is_sparse=False, - is_distributed=False, - padding_idx=None, - param_attr=None, - dtype='float32', -): - r""" - :api_attr: Static Graph - - The operator is used to lookup embeddings vector of ids provided by :attr:`input` . - It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - - The shape of output Tensor is generated by appending an emb_size dimension to the - last dimension of the input Tensor shape. - - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , - otherwise the program will throw an exception and exit. - - .. code-block:: text - - Case 1: - - input is a Tensor. padding_idx = -1 - input.data = [[1, 3], [2, 4], [4, 127]] - input.shape = [3, 2] - Given size = [128, 16] - output is a Tensor: - out.shape = [3, 2, 16] - out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654]], - - [[0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365]], - - [[0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]]] # padding data - The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. - - Case 2: - - input is a LoDTensor with 1-level LoD. padding_idx = 0 - input.lod = [[2, 3]] - input.data = [[1], [3], [2], [4], [0]] - input.shape = [5, 1] - Given size = [128, 16] - output is a LoDTensor: - out.lod = [[2, 3]] - out.shape = [5, 1, 16] - out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]], - [[0.345421456, 0.524563927, ..., 0.144534654]], - [[0.345249859, 0.124939536, ..., 0.194353745]], - [[0.945345345, 0.435394634, ..., 0.435345365]], - [[0.0, 0.0, ..., 0.0 ]]] # padding data - It will pad all-zero data when ids is 0. - - - Args: - input(Variable): A Tensor or LoDTensor with type int64, which contains the id information. - The value of the input id should satisfy :math:`0<= id < size[0]` . - size(tuple|list): The shape of lookup table parameter. It should have two elements which - indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. This parameter only - affects the performance of the backwards gradient update. It is recommended to set - True because sparse update is faster. But some optimizer does not support sparse update - In these case, is_sparse must be False. Default: False. - is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used - in multi-machine distributed CPU training. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). - If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup - encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. - If set None, it makes no effect to output. Default: None. - param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. In addition, - user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. - The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . - dtype(str): It refers to the data type of output Tensor. - It must be float32 or float64. Default: float32. - - Returns: - Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . - - Static Examples: - .. code-block:: python - - import paddle - import numpy as np - paddle.enable_static() - - x = paddle.static.data(name="x", shape = [2, 4], dtype=np.int64) - embedding = paddle.nn.Embedding(10, 3, - weight_attr=paddle.nn.initializer.Constant(value=1.0)) - adam = paddle.optimizer.SGD(parameters=[embedding.weight], learning_rate=0.01) - output = embedding(x) - m_output=paddle.mean(output) - - adam.minimize(m_output) - - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - - x = np.array([[7, 2, 4, 5],[4, 3, 2, 9]], dtype=np.int64) - - # x is a Numpy. - # x.data = [[7, 2, 4, 5], [4, 3, 2, 9]] - # x.shape = [2, 4] - - out, = exe.run(paddle.static.default_main_program(), feed={'x':x}, fetch_list=[output]) - - # out is a Numpy. - # out.data = [[1., 1., 1.], - # [1., 1., 1.], - # [1., 1., 1.], - # [1., 1., 1.]], - # - # [[1., 1., 1.], - # [1., 1., 1.], - # [1., 1., 1.], - # [0., 0., 0.]]] - # out.shape = [2, 4, 3] - - - Dygraph Examples: - .. code-block:: python - - import paddle - import numpy as np - - x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) - - # x is a Tensor. - # x.data = [[3], [4], [5]] - # x.shape = [3, 1] - x = paddle.to_tensor(x_data, stop_gradient=False) - - # embedding weight shape = [10, 3] - embedding = paddle.nn.Embedding(10, 3, sparse=True) - - # embedding weight data = [10, 3] - w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32) - - # embedding.weight.shape = [10, 3] - # embedding.weight.data = - # [[2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.], - # [2., 2., 2.]] - embedding.weight.set_value(w0) - - adam = paddle.optimizer.Adam( - parameters=[embedding.weight], learning_rate=0.01) - adam.clear_grad() - - # out is Tensor - # out.shape: [3, 1, 3] - # out.layout: NCHW - # out.dtype: float - # out.data: [2 2 2 2 2 2 2 2 2] - out = embedding(x) - - out.backward() - adam.step() - - """ - - helper = LayerHelper('embedding', **locals()) - check_variable_and_dtype(input, 'input', ['int64'], 'fluid.embedding') - check_dtype( - dtype, - 'dtype', - ['float16', 'float32', 'float64', 'uint16'], - 'fluid.embedding', - ) - remote_prefetch = is_sparse and (not is_distributed) - if remote_prefetch: - assert is_sparse is True and is_distributed is False - w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False - ) - tmp = helper.create_variable_for_type_inference(dtype) - padding_idx = ( - -1 - if padding_idx is None - else padding_idx - if padding_idx >= 0 - else (size[0] + padding_idx) - ) - helper.append_op( - type='lookup_table_v2', - inputs={'Ids': input, 'W': w}, - outputs={'Out': tmp}, - attrs={ - 'is_sparse': is_sparse, - 'is_distributed': is_distributed, - 'remote_prefetch': remote_prefetch, - 'padding_idx': padding_idx, - }, - ) - return tmp diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 2816f6d4906..ab94a66953e 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -210,7 +210,7 @@ def embedding( data = fluid.data(name='x', shape=[None, 1], dtype='int64') # example 1 - emb_1 = fluid.embedding(input=data, size=[128, 64]) + emb_1 = paddle.static.nn.embedding(input=data, size=[128, 64]) # example 2: load custom or pre-trained word vectors weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 4e8f6a42506..22ae37010f7 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -119,7 +119,7 @@ def train_network( ) # embedding - q_emb = fluid.embedding( + q_emb = paddle.static.nn.embedding( input=q, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -147,7 +147,7 @@ def train_network( ) # embedding - pt_emb = fluid.embedding( + pt_emb = paddle.static.nn.embedding( input=pt, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -176,7 +176,7 @@ def train_network( ) # embedding - nt_emb = fluid.embedding( + nt_emb = paddle.static.nn.embedding( input=nt, is_distributed=is_distributed, size=[dict_dim, emb_dim], diff --git a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py index 1fc06f72e89..154a7780d68 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py @@ -196,10 +196,6 @@ class TestOneHotOpApi(unittest.TestCase): [np.random.randint(0, depth - 1) for i in range(6)] ).reshape([6, 1]) with fluid.dygraph.guard(): - one_hot_label = fluid.one_hot( - input=fluid.dygraph.to_variable(label), depth=depth - ) - one_hot_label = paddle.nn.functional.one_hot( fluid.dygraph.to_variable(label), depth ) @@ -208,7 +204,7 @@ class TestOneHotOpApi(unittest.TestCase): def _run(self, depth): label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - one_hot_label = fluid.one_hot(input=label, depth=depth) + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth) label_data = np.array( [np.random.randint(0, 10 - 1) for i in range(6)] @@ -239,7 +235,7 @@ class BadInputTestOnehotV2(unittest.TestCase): shape=[4], dtype="float32", ) - one_hot_label = fluid.one_hot(input=label, depth=4) + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=4) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py index 7b5633c335c..dc4873500bf 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py @@ -219,13 +219,13 @@ class TestOneHotOpApi(unittest.TestCase): [np.random.randint(0, depth - 1) for i in range(6)] ).reshape([6, 1]) with fluid.dygraph.guard(paddle.NPUPlace(0)): - one_hot_label = fluid.one_hot( - input=fluid.dygraph.to_variable(label), depth=depth + one_hot_label = paddle.nn.functional.one_hot( + x=fluid.dygraph.to_variable(label), num_classes=depth ) def _run(self, depth): label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - one_hot_label = fluid.one_hot(input=label, depth=depth) + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth) place = fluid.NPUPlace(0) label_data = np.array( diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py index c3d44d7b343..41fa23658c4 100644 --- a/python/paddle/fluid/tests/unittests/test_backward.py +++ b/python/paddle/fluid/tests/unittests/test_backward.py @@ -234,13 +234,13 @@ class SimpleNet(BackwardNet): ) # shared layer, the grad of 'w2v' will be summed and renamed. # To test _addup_repetitive_outputs_ - x_emb = fluid.embedding( + x_emb = paddle.static.nn.embedding( x, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v') ) - x2_emb = fluid.embedding( + x2_emb = paddle.static.nn.embedding( x2, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v') ) - x3_emb = fluid.embedding( + x3_emb = paddle.static.nn.embedding( x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v') ) # merge layers @@ -331,7 +331,7 @@ class TestAppendBackwardWithError(unittest.TestCase): def build_net(self): x = fluid.data(name='x', shape=[None, 13], dtype='int64') y = fluid.data(name='y', shape=[None, 1], dtype='float32') - x_emb = fluid.embedding(x, size=[100, 256]) + x_emb = paddle.static.nn.embedding(x, size=[100, 256]) y_predict = paddle.static.nn.fc(x=x_emb, size=1, name='my_fc') loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_loss = paddle.mean(loss) diff --git a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py index d5facd83e2e..7c5776cb4ab 100644 --- a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py @@ -58,7 +58,9 @@ class TestEmbeddingIdStopGradientBase(unittest.TestCase): x.stop_gradient = stop_gradient - emb = fluid.embedding(x, size=[10, 32], dtype='float32') + emb = paddle.static.nn.embedding( + x, size=[10, 32], dtype='float32' + ) avg_cost = paddle.mean(emb, name='mean_loss') optim = fluid.optimizer.SGD(learning_rate=0.001) optim.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index abf7a95bfb7..d7a7e535991 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -60,8 +60,8 @@ class TestDygraphLoadStatic(unittest.TestCase): batchnorm_out_2 = paddle.static.nn.batch_norm(batchnorm_in) emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64') - emb_out_1 = fluid.embedding(emb_in, [1000, 100]) - emb_out_2 = fluid.embedding(emb_in, [2000, 200]) + emb_out_1 = paddle.static.nn.embedding(emb_in, [1000, 100]) + emb_out_2 = paddle.static.nn.embedding(emb_in, [2000, 200]) layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32') layernorm_1 = paddle.static.nn.layer_norm(layernorm) diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py index 0f6affcd26c..f951d524242 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py @@ -105,7 +105,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): x = paddle.static.data( name='x', shape=[-1] + self.ids_shape, dtype='int64' ) - self.emb = fluid.input.embedding( + self.emb = paddle.static.nn.embedding( input=x, size=self.w_shape, param_attr=fluid.ParamAttr( diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 6aea5ef118c..dc0c8f3174b 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -203,7 +203,7 @@ class TestLookupTableIsSparse(unittest.TestCase): with fluid.program_guard(main_program, fluid.Program()): x = paddle.static.data(name='x', shape=[-1, 5], dtype='int64') y_ = paddle.static.data(name='y_', shape=[-1, 5], dtype='float32') - emb = fluid.input.embedding( + emb = paddle.static.nn.embedding( input=x, size=[10, 16], param_attr=fluid.ParamAttr( @@ -246,7 +246,7 @@ class TestLookupTableIsSparse(unittest.TestCase): class TestLookupTableApi(unittest.TestCase): def test_api(self): x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64') - emb = fluid.embedding(input=x, size=[128, 64]) + emb = paddle.static.nn.embedding(input=x, size=[128, 64]) place = fluid.CPUPlace() x_data = np.random.randint(0, 127, [2, 20]).astype("int64") @@ -269,25 +269,29 @@ class TestEmbedOpError(unittest.TestCase): def test_Variable(): # the input type must be Variable - fluid.embedding(input=input_data, size=(10, 64)) + paddle.static.nn.embedding(input=input_data, size=(10, 64)) self.assertRaises(TypeError, test_Variable) def test_input_dtype(): # the input dtype must be int64 input = fluid.data(name='x1', shape=[4, 6], dtype='float32') - fluid.embedding(input=input, size=(10, 64)) + paddle.static.nn.embedding(input=input, size=(10, 64)) self.assertRaises(TypeError, test_input_dtype) def test_param_dtype(): # dtype must be float32 or float64 input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64') - fluid.embedding(input=input2, size=(10, 64), dtype='int64') + paddle.static.nn.embedding( + input=input2, size=(10, 64), dtype='int64' + ) self.assertRaises(TypeError, test_param_dtype) input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64') - fluid.embedding(input=input3, size=(10, 64), dtype='float16') + paddle.static.nn.embedding( + input=input3, size=(10, 64), dtype='float16' + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py index 85993b34a29..311ee24887b 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py @@ -188,10 +188,6 @@ class TestOneHotOpApi(unittest.TestCase): [np.random.randint(0, depth - 1) for i in range(6)] ).reshape([6, 1]) with fluid.dygraph.guard(): - one_hot_label = fluid.one_hot( - input=fluid.dygraph.to_variable(label), depth=depth - ) - one_hot_label = paddle.nn.functional.one_hot( fluid.dygraph.to_variable(label), depth ) @@ -202,7 +198,7 @@ class TestOneHotOpApi(unittest.TestCase): def _run(self, depth): label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") label.desc.set_need_check_feed(False) - one_hot_label = fluid.one_hot(input=label, depth=depth) + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth) place = fluid.CPUPlace() label_data = np.array( @@ -231,7 +227,9 @@ class BadInputTestOnehotV2(unittest.TestCase): dtype="float32", ) label.desc.set_need_check_feed(False) - one_hot_label = fluid.one_hot(input=label, depth=4) + one_hot_label = paddle.nn.functional.one_hot( + x=label, num_classes=4 + ) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py index 35c31deb3f8..51f4bb1686f 100644 --- a/python/paddle/fluid/tests/unittests/test_run_program_op.py +++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py @@ -463,7 +463,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): x = paddle.static.data( name=self.input_names['X'][0], shape=[-1, 5], dtype='int64' ) - emb = fluid.input.embedding( + emb = paddle.static.nn.embedding( input=x, size=[10, 16], param_attr=fluid.ParamAttr( diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index 801866c9023..f9d6b729ce4 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -200,7 +200,9 @@ class TestSGDOpWithLargeInput(unittest.TestCase): label = fluid.layers.fill_constant( shape=[1, 150], value=0.5, dtype='float32' ) - emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32') + emb = paddle.static.nn.embedding( + input=data, size=(10000000, 150), dtype='float32' + ) out = paddle.nn.functional.normalize(x=emb, axis=-1) cost = paddle.nn.functional.square_error_cost(input=out, label=label) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py index 94645bcf9b2..fcbf724ccbc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py @@ -168,7 +168,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows( class TestLookupTableApi(unittest.TestCase): def test_api(self): x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64') - emb = fluid.embedding(input=x, size=[128, 64]) + emb = paddle.static.nn.embedding(input=x, size=[128, 64]) place = paddle.XPUPlace(0) x_data = np.random.randint(0, 127, [2, 20]).astype("int64") @@ -191,25 +191,29 @@ class TestEmbedOpError(unittest.TestCase): def test_Variable(): # the input type must be Variable - fluid.embedding(input=input_data, size=(10, 64)) + paddle.static.nn.embedding(input=input_data, size=(10, 64)) self.assertRaises(TypeError, test_Variable) def test_input_dtype(): # the input dtype must be int64 input = fluid.data(name='x1', shape=[4, 6], dtype='float32') - fluid.embedding(input=input, size=(10, 64)) + paddle.static.nn.embedding(input=input, size=(10, 64)) self.assertRaises(TypeError, test_input_dtype) def test_param_dtype(): # dtype must be float32 or float64 input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64') - fluid.embedding(input=input2, size=(10, 64), dtype='int64') + paddle.static.nn.embedding( + input=input2, size=(10, 64), dtype='int64' + ) self.assertRaises(TypeError, test_param_dtype) input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64') - fluid.embedding(input=input3, size=(10, 64), dtype='float16') + paddle.static.nn.embedding( + input=input3, size=(10, 64), dtype='float16' + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py index a49ad6d327d..e68b88ef860 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py @@ -162,13 +162,13 @@ class TestOneHotOpApi(unittest.TestCase): [np.random.randint(0, depth - 1) for i in range(6)] ).reshape([6, 1]) with fluid.dygraph.guard(): - one_hot_label = fluid.one_hot( - input=fluid.dygraph.to_variable(label), depth=depth + one_hot_label = paddle.nn.functional.one_hot( + x=fluid.dygraph.to_variable(label), num_classes=depth ) def _run(self, depth): label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - one_hot_label = fluid.one_hot(input=label, depth=depth) + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=depth) place = fluid.XPUPlace(0) label_data = np.array( @@ -196,7 +196,9 @@ class BadInputTestOnehotV2(unittest.TestCase): shape=[4], dtype="float32", ) - one_hot_label = fluid.one_hot(input=label, depth=4) + one_hot_label = paddle.nn.functional.one_hot( + x=label, num_classes=4 + ) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py index 06abe1f76b0..b859c06b7e1 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py @@ -72,7 +72,9 @@ class TestSGDOpWithLargeInput(unittest.TestCase): label = fluid.layers.fill_constant( shape=[1, 150], value=0.5, dtype='float32' ) - emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32') + emb = paddle.static.nn.embedding( + input=data, size=(10000, 150), dtype='float32' + ) out = paddle.nn.functional.normalize(x=emb, axis=-1) cost = paddle.nn.functional.square_error_cost(input=out, label=label) diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index 53dd06bd1f5..6c528c68865 100755 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -38,7 +38,7 @@ from .common import prelu # noqa: F401 from .common import layer_norm # noqa: F401 -from ...fluid.input import embedding # noqa: F401 +from .common import embedding # noqa: F401 from ...fluid.contrib.layers import sparse_embedding # noqa: F401 from ...fluid.layers import StaticRNN # noqa: F401 diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index c7e0794430e..db68df4ea4e 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -3631,3 +3631,164 @@ def layer_norm( ) return helper.append_activation(layer_norm_out) + + +@static_only +def embedding( + input, + size, + is_sparse=False, + is_distributed=False, + padding_idx=None, + param_attr=None, + dtype='float32', +): + r""" + :api_attr: Static Graph + + The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the + input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + + The shape of output Tensor is generated by appending an emb_size dimension to the + last dimension of the input Tensor shape. + + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[1, 3], [2, 4], [4, 127]] + input.shape = [3, 2] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Case 2: + + input is a LoDTensor with 1-level LoD. padding_idx = 0 + input.lod = [[2, 3]] + input.data = [[1], [3], [2], [4], [0]] + input.shape = [5, 1] + Given size = [128, 16] + output is a LoDTensor: + out.lod = [[2, 3]] + out.shape = [5, 1, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]], + [[0.345421456, 0.524563927, ..., 0.144534654]], + [[0.345249859, 0.124939536, ..., 0.194353745]], + [[0.945345345, 0.435394634, ..., 0.435345365]], + [[0.0, 0.0, ..., 0.0 ]]] # padding data + It will pad all-zero data when ids is 0. + + + Args: + input(Tensor): A Tensor or LoDTensor with type int64, which contains the id information. + The value of the input id should satisfy :math:`0<= id < size[0]` . + size(tuple|list): The shape of lookup table parameter. It should have two elements which + indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. + is_sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizer does not support sparse update + In these case, is_sparse must be False. Default: False. + is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used + in multi-machine distributed CPU training. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. In addition, + user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. + The local word vector needs to be transformed into numpy format, and the shape of local word + vector should be consistent with :attr:`size` . + dtype(str): It refers to the data type of output Tensor. + It must be float32 or float64. Default: float32. + + Returns: + Tensor: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . + + Static Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.enable_static() + + x = paddle.static.data(name="x", shape = [2, 4], dtype=np.int64) + output = paddle.static.nn.embedding(x, (10, 3), + param_attr=paddle.nn.initializer.Constant(value=1.0)) + m_output=paddle.mean(output) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + + x = np.array([[7, 2, 4, 5],[4, 3, 2, 9]], dtype=np.int64) + + # x is a Numpy. + # x.data = [[7, 2, 4, 5], [4, 3, 2, 9]] + # x.shape = [2, 4] + + out, = exe.run(paddle.static.default_main_program(), feed={'x':x}, fetch_list=[output]) + + # out is a Numpy. + # out.data = [[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]], + # + # [[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.], + # [0., 0., 0.]]] + # out.shape = [2, 4, 3] + """ + + helper = LayerHelper('embedding', **locals()) + check_variable_and_dtype(input, 'input', ['int64'], 'embedding') + check_dtype( + dtype, + 'dtype', + ['float16', 'float32', 'float64', 'uint16'], + 'embedding', + ) + remote_prefetch = is_sparse and (not is_distributed) + if remote_prefetch: + assert is_sparse is True and is_distributed is False + w = helper.create_parameter( + attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False + ) + tmp = helper.create_variable_for_type_inference(dtype) + padding_idx = ( + -1 + if padding_idx is None + else padding_idx + if padding_idx >= 0 + else (size[0] + padding_idx) + ) + helper.append_op( + type='lookup_table_v2', + inputs={'Ids': input, 'W': w}, + outputs={'Out': tmp}, + attrs={ + 'is_sparse': is_sparse, + 'is_distributed': is_distributed, + 'remote_prefetch': remote_prefetch, + 'padding_idx': padding_idx, + }, + ) + return tmp -- GitLab