From 73cf08d6944e3e6890b665f310c2bb4a7fbab73b Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Wed, 9 Oct 2019 21:32:40 +0800 Subject: [PATCH] fix en_doc api of one-hot and embedding (#20187) * fix en_doc of one-hot and embedding test=develop, test=document_fix * modify into fluid.data test=develop, test=document_fix * modify api.spec test=develop, test=document_fix * fix api.spec conflict, test=develop, test=document_fix --- paddle/fluid/API.spec | 8 +- python/paddle/fluid/input.py | 188 ++++++++++++++++++++++------ python/paddle/fluid/layers/nn.py | 202 +++++++++++++++++++++++++------ 3 files changed, 320 insertions(+), 78 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c522e63fc1..42b87093cb 100755 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -123,11 +123,11 @@ paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywo paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'eaa04fd68661a3af59abd0e19b3b6eda')) paddle.fluid.initializer.NumpyArrayInitializer ('paddle.fluid.initializer.NumpyArrayInitializer', ('document', '7b0c371a233f9eb6feab75bbef8a74cc')) paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', 'd4ac047e0d5e6b7b1c5ff6ef7d7cfff5')) -paddle.fluid.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'eef66730acc806088f9e8ba90252bda1')) +paddle.fluid.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', 'c830c324bdc58e8e023d85eb616c3940')) +paddle.fluid.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'e822420dcdc743526ab5caebd89a4b4f')) paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, None)), ('document', 'e28421f1253a3545d9bfe81a8028ea68')) paddle.fluid.layers.center_loss (ArgSpec(args=['input', 'label', 'num_classes', 'alpha', 'param_attr', 'update_center'], varargs=None, keywords=None, defaults=(True,)), ('document', '18112442f55b5862bbec8feee841c905')) -paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', 'd8e405486a1e4e189b51d6ee28d67b1e')) +paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', 'c51fcac7a4f5786ca41f27fa60bd22c5')) paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', '6d3ee14da70adfa36d85c40b18716ef2')) paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'c37d51aad655c8a9f9b045c64717320a')) paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3')) @@ -192,7 +192,7 @@ paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'par paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '9461e67095a6fc5d568fb2ce8fef66ff')) paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413')) paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'cbe8940643ac80ef75e1abdfbdb09e88')) -paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ec4115591be842868c86b2e5334245c6')) +paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'cdf5dc2078f1e20dc61dd0bec7e28a29')) paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '98e7927f09ee2270535b29f048e481ec')) paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'ca73fdc4551c5765c92eb00f24874289')) paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbac07662a6e22e8e299ced880c7775')) diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 8afbd662ad..917f50d3d5 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -21,26 +21,80 @@ __all__ = ['one_hot', 'embedding'] def one_hot(input, depth, allow_out_of_range=False): """ - This layer creates the one-hot representations for input indices. + + The operator converts each id in the input to an one-hot vector with a + depth length. The value in the vector dimension corresponding to the id + is 1, and the value in the remaining dimension is 0. + + The shape of output Tensor or LoDTensor is generated by appending depth dimension + behind the last dimension of the input shape. + + .. code-block:: text + + Example 1 (allow_out_of_range=False): + + input: + X.shape = [4] + X.data = [1, 1, 3, 0] + depth = 4 + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] + + Example 2 (allow_out_of_range=True): + + input: + X.shape = [4] + X.data = [1, 1, 5, 0] + depth = 4 + allow_out_of_range = True + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data. + [1., 0., 0., 0.]] + + Example 3 (allow_out_of_range=False): + + input: + X.shape = [4] + X.data = [1, 1, 5, 0] + depth = 4 + allow_out_of_range = False + + output: Throw an exception for Illegal value + The second dimension in X is 5, which is greater than depth. + Allow_out_of_range =False means that does not allow the word id to exceed depth, + so it throws an exception. + Args: - input(Variable): Input indices represent locations, which takes value 1.0 - in indices, while all other locations take value 0. - depth(scalar): An interger defining the depth of the one-hot dimension. + input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` , + which contains at least one dimension. The data type is int32 or int64. + depth(int): An integer defining the depth of the one hot dimension. If input + is word id, depth is generally the dictionary size. allow_out_of_range(bool): A bool value indicating whether the input - indices could be out of range [0, depth). When input indices are - out of range, exceptions is raised if allow_out_of_range is False, - or zero-filling representations is created if it is set True + indices could be out of range :math:`[0, depth)` . When input indices are + out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range` + is False, or zero-filling representations is created if it is set True. + Default: False. Returns: - Variable: The one-hot representations of input. + Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32. Examples: .. code-block:: python import paddle.fluid as fluid - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - one_hot_label = fluid.one_hot(input=label, depth=10) + # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4]. + label = fluid.data(name="label", shape=[4], dtype="int64") + one_hot_label = fluid.one_hot(input=label, depth=4) """ helper = LayerHelper("one_hot_v2", **locals()) @@ -75,43 +129,105 @@ def embedding(input, param_attr=None, dtype='float32'): """ - **Embedding Layer** - This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in - a lookup table. The result of this lookup is the embedding of each ID in the - :attr:`input`. + The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the + input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + + The shape of output Tensor is generated by appending an emb_size dimension to the + last dimension of the input Tensor shape. + + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[1, 3], [2, 4], [4, 127]] + input.shape = [3, 2] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Case 2: + + input is a LoDTensor with 1-level LoD. padding_idx = 0 + input.lod = [[2, 3]] + input.data = [[1], [3], [2], [4], [0]] + input.shape = [5, 1] + Given size = [128, 16] + output is a LoDTensor: + out.lod = [[2, 3]] + out.shape = [5, 1, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]], + [[0.345421456, 0.524563927, ..., 0.144534654]], + [[0.345249859, 0.124939536, ..., 0.194353745]], + [[0.945345345, 0.435394634, ..., 0.435345365]], + [[0.0, 0.0, ..., 0.0 ]]] # padding data + It will pad all-zero data when ids is 0. - All the input variables are passed in as local variables to the LayerHelper - constructor. Args: - input(Variable): Input is a Tensor Variable, which contains the IDs information. - The value of the input IDs should satisfy :math:`0<= id < size[0]`. - size(tuple|list): The shape of the look up table parameter. It should - have two elements which indicate the size of the dictionary of - embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. - is_distributed(bool): Whether to run lookup table from remote parameter server. - padding_idx(int|long|None): It will output all-zero padding data whenever - lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes - no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx` - will automatically be converted to :math:`size[0] + padding\_idx` to use. - Default: None. - param_attr(ParamAttr): Parameters for this layer. - dtype(np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output - tensor. It can be float32, float_16, int etc. + input(Variable): A Tensor or LoDTensor with type int64, which contains the id information. + The value of the input id should satisfy :math:`0<= id < size[0]` . + size(tuple|list): The shape of lookup table parameter. It should have two elements which + indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. + is_sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizer does not support sparse update, + such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , + :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , + :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . + In these case, is_sparse must be False. Default: False. + is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used + in multi-machine distributed CPU training. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. + The local word vector needs to be transformed into numpy format, and the shape of local word + vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example 2 for details. + dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. + It must be float32 or float64. Default: float32. Returns: - Variable: The tensor variable storing the embeddings of the \ - supplied inputs. + Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . Examples: .. code-block:: python import paddle.fluid as fluid - # [batch_size, 20] -> [batch_size, 20, 64] - data = fluid.layers.data(name='sequence', shape=[20], dtype='int64') - emb = fluid.embedding(input=data, size=[128, 64]) + import numpy as np + data = fluid.data(name='x', shape=[None, 10], dtype='int64') + + # exampel 1 + emb_1 = fluid.embedding(input=data, size=[128, 64]) + + # example 2: load custom or pre-trained word vectors + weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format + w_param_attrs = fluid.ParamAttr( + name="emb_weight", + learning_rate=0.5, + initializer=fluid.initializer.NumpyArrayInitializer(weight_data), + trainable=True) + emb_2 = fluid.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') """ helper = LayerHelper('embedding', **locals()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bb4b8e707f..45fb3bc62a 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -476,42 +476,110 @@ def embedding(input, param_attr=None, dtype='float32'): """ - **Embedding Layer** - This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in - a lookup table. The result of this lookup is the embedding of each ID in the - :attr:`input`. + **WARING:** This OP will be deprecated in a future release. This OP requires the + last dimension of Tensor shape must be equal to 1. It is recommended to use + fluid. :ref:`api_fluid_embedding` . - All the input variables are passed in as local variables to the LayerHelper - constructor. - - Args: - input(Variable): Input is a Tensor Variable, which contains the IDs information. - The value of the input IDs should satisfy :math:`0<= id < size[0]`. - size(tuple|list): The shape of the look up table parameter. It should - have two elements which indicate the size of the dictionary of - embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. - is_distributed(bool): Whether to run lookup table from remote parameter server. - padding_idx(int|long|None): It will output all-zero padding data whenever - lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes - no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx` - will automatically be converted to :math:`size[0] + padding\_idx` to use. - Default: None. - param_attr(ParamAttr): Parameters for this layer. - dtype(np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output - tensor. It can be float32, float_16, int etc. + The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the + input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - Returns: - Variable: The tensor variable storing the embeddings of the \ - supplied inputs. + This OP requires the last dimension of Tensor shape must be equal to 1. The shape + of output Tensor is generated by replacing the last dimension of the input Tensor shape + with emb_size. + + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[[1], [3]], [[2], [4]], [[4], [127]]] + input.shape = [3, 2, 1] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Case 2: + + input is a LoDTensor with 1-level LoD. padding_idx = 0 + input.lod = [[2, 3]] + input.data = [[1], [3], [2], [4], [0]] + input.shape = [5, 1] + Given size = [128, 16] + output is a LoDTensor: + out.lod = [[2, 3]] + out.shape = [5, 16] + out.data = [[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654], + [0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]] # padding data + It will pad all-zero data when ids is 0. + + Args: + input(Variable): A Tensor or LoDTensor with type int64, which contains the id information. + The last dimension of Tensor shape must be equal to 1. The value of the input id should + satisfy :math:`0<= id < size[0]` . + size(tuple|list): The shape of lookup table parameter. It should have two elements which + indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. + is_sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizer does not support sparse update, + such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , + :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , + :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . + In these case, is_sparse must be False. Default: False. + is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used + in multi-machine distributed CPU training. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. + The local word vector needs to be transformed into numpy format, and the shape of local word + vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example 2 for details. + dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. + It must be float32 or float64. Default: float32. + + Returns: + Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . Examples: .. code-block:: python import paddle.fluid as fluid - data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1) - emb = fluid.layers.embedding(input=data, size=[128, 64]) + import numpy as np + data = fluid.data(name='x', shape=[None, 1], dtype='int64') + + # exampel 1 + emb_1 = fluid.embedding(input=data, size=[128, 64]) + + # example 2: load custom or pre-trained word vectors + weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format + w_param_attrs = fluid.ParamAttr( + name="emb_weight", + learning_rate=0.5, + initializer=fluid.initializer.NumpyArrayInitializer(weight_data), + trainable=True) + emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') """ helper = LayerHelper('embedding', **locals()) @@ -7841,25 +7909,83 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): def one_hot(input, depth, allow_out_of_range=False): """ - This layer creates the one-hot representations for input indices. - Args: - input(Variable): Input indices, last dimension must be 1. - depth(scalar): An interger defining the depth of the one-hot dimension. + **WARING:** This OP requires the last dimension of Tensor shape must be equal to 1. + This OP will be deprecated in a future release. It is recommended to use fluid. :ref:`api_fluid_one_hot` . + + The operator converts each id in the input to an one-hot vector with a + :attr:`depth` length. The value in the vector dimension corresponding to the id + is 1, and the value in the remaining dimension is 0. + + The shape of output Tensor or LoDTensor is generated by adding :attr:`depth` dimension + behind the last dimension of the input shape. + + .. code-block:: text + + Example 1 (allow_out_of_range=False): + + input: + X.shape = [4, 1] + X.data = [[1], [1], [3], [0]] + depth = 4 + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] + + Example 2 (allow_out_of_range=True): + + input: + X.shape = [4, 1] + X.data = [[1], [1], [5], [0]] + depth = 4 + allow_out_of_range = True + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data. + [1., 0., 0., 0.]] + + Example 3 (allow_out_of_range=False): + + input: + X.shape = [4, 1] + X.data = [[1], [1], [5], [0]] + depth = 4 + allow_out_of_range = False + + output: Throw an exception for Illegal value + The second dimension in X is 5, which is greater than depth. + Allow_out_of_range =False means that does not allow the word id to exceed depth, + so it throws an exception. + + Args: + input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k, 1]` , + which contains at least one dimension and the last dimension must be 1. + The data type is int32 or int64. + depth(scalar): An integer defining the :attr:`depth` of the one hot dimension. If input + is word id, depth is generally the dictionary size. allow_out_of_range(bool): A bool value indicating whether the input - indices could be out of range [0, depth). When input indices are - out of range, exceptions is raised if allow_out_of_range is False, - or zero-filling representations is created if it is set True + indices could be out of range :math:`[0, depth)` . When input indices are + out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range` + is False, or zero-filling representations is created if it is set True. + Default: False. Returns: - Variable: The one-hot representations of input. + Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32. Examples: .. code-block:: python import paddle.fluid as fluid - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - one_hot_label = fluid.layers.one_hot(input=label, depth=10) + # Correspond to the first example above, where label.shape is [4, 1] and one_hot_label.shape is [4, 4]. + label = fluid.data(name="label", shape=[4, 1], dtype="int64") + one_hot_label = fluid.layers.one_hot(input=label, depth=4) """ helper = LayerHelper("one_hot", **locals()) -- GitLab