# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import warnings from .framework import Variable, in_dygraph_mode from .layer_helper import LayerHelper from .data_feeder import convert_dtype __all__ = ['one_hot', 'embedding'] def one_hot(input, depth, allow_out_of_range=False): """ The operator converts each id in the input to an one-hot vector with a depth length. The value in the vector dimension corresponding to the id is 1, and the value in the remaining dimension is 0. The shape of output Tensor or LoDTensor is generated by appending depth dimension behind the last dimension of the input shape. .. code-block:: text Example 1 (allow_out_of_range=False): input: X.shape = [4] X.data = [1, 1, 3, 0] depth = 4 output: Out.shape = [4, 4] Out.data = [[0., 1., 0., 0.], [0., 1., 0., 0.], [0., 0., 0., 1.], [1., 0., 0., 0.]] Example 2 (allow_out_of_range=True): input: X.shape = [4] X.data = [1, 1, 5, 0] depth = 4 allow_out_of_range = True output: Out.shape = [4, 4] Out.data = [[0., 1., 0., 0.], [0., 1., 0., 0.], [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data. [1., 0., 0., 0.]] Example 3 (allow_out_of_range=False): input: X.shape = [4] X.data = [1, 1, 5, 0] depth = 4 allow_out_of_range = False output: Throw an exception for Illegal value The second dimension in X is 5, which is greater than depth. Allow_out_of_range =False means that does not allow the word id to exceed depth, so it throws an exception. Args: input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` , which contains at least one dimension. The data type is int32 or int64. depth(int): An integer defining the depth of the one hot dimension. If input is word id, depth is generally the dictionary size. allow_out_of_range(bool): A bool value indicating whether the input indices could be out of range :math:`[0, depth)` . When input indices are out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range` is False, or zero-filling representations is created if it is set True. Default: False. Returns: Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32. Examples: .. code-block:: python import paddle.fluid as fluid # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4]. label = fluid.data(name="label", shape=[4], dtype="int64") one_hot_label = fluid.one_hot(input=label, depth=4) """ helper = LayerHelper("one_hot_v2", **locals()) one_hot_out = helper.create_variable_for_type_inference(dtype='float32') if in_dygraph_mode(): inputs = {'X': input} attrs = {'depth': depth} else: if not isinstance(depth, Variable): # user attribute inputs = {'X': input} attrs = {'depth': depth} else: depth.stop_gradient = True inputs = {'X': input, 'depth_tensor': depth} attrs = {} helper.append_op( type="one_hot_v2", inputs=inputs, attrs=attrs, outputs={'Out': one_hot_out}, stop_gradient=True) return one_hot_out def embedding(input, size, is_sparse=False, is_distributed=False, padding_idx=None, param_attr=None, dtype='float32'): """ The operator is used to lookup embeddings vector of ids provided by :attr:`input` . It automatically constructs a 2D embedding matrix based on the input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . The shape of output Tensor is generated by appending an emb_size dimension to the last dimension of the input Tensor shape. **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise the program will throw an exception and exit. .. code-block:: text Case 1: input is a Tensor. padding_idx = -1 input.data = [[1, 3], [2, 4], [4, 127]] input.shape = [3, 2] Given size = [128, 16] output is a Tensor: out.shape = [3, 2, 16] out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], [0.345421456, 0.524563927, ..., 0.144534654]], [[0.345249859, 0.124939536, ..., 0.194353745], [0.945345345, 0.435394634, ..., 0.435345365]], [[0.945345345, 0.435394634, ..., 0.435345365], [0.0, 0.0, ..., 0.0 ]]] # padding data The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 It will pad all-zero data when ids is 127. Case 2: input is a LoDTensor with 1-level LoD. padding_idx = 0 input.lod = [[2, 3]] input.data = [[1], [3], [2], [4], [0]] input.shape = [5, 1] Given size = [128, 16] output is a LoDTensor: out.lod = [[2, 3]] out.shape = [5, 1, 16] out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]], [[0.345421456, 0.524563927, ..., 0.144534654]], [[0.345249859, 0.124939536, ..., 0.194353745]], [[0.945345345, 0.435394634, ..., 0.435345365]], [[0.0, 0.0, ..., 0.0 ]]] # padding data It will pad all-zero data when ids is 0. Args: input(Variable): A Tensor or LoDTensor with type int64, which contains the id information. The value of the input id should satisfy :math:`0<= id < size[0]` . size(tuple|list): The shape of lookup table parameter. It should have two elements which indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. is_sparse(bool): The flag indicating whether to use sparse update. This parameter only affects the performance of the backwards gradient update. It is recommended to set True because sparse update is faster. But some optimizer does not support sparse update, such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . In these case, is_sparse must be False. Default: False. is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used in multi-machine distributed CPU training. Default: False. padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. If set None, it makes no effect to output. Default: None. param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs to be transformed into numpy format, and the shape of local word vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` is used to load custom or pre-trained word vectors. See code example 2 for details. dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. It must be float32 or float64. Default: float32. Returns: Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . Examples: .. code-block:: python import paddle.fluid as fluid import numpy as np data = fluid.data(name='x', shape=[None, 10], dtype='int64') # exampel 1 emb_1 = fluid.embedding(input=data, size=[128, 64]) # example 2: load custom or pre-trained word vectors weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format w_param_attrs = fluid.ParamAttr( name="emb_weight", learning_rate=0.5, initializer=fluid.initializer.NumpyArrayInitializer(weight_data), trainable=True) emb_2 = fluid.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') """ helper = LayerHelper('embedding', **locals()) if not isinstance(input, Variable): raise TypeError( "The type of 'input' in fluid.embedding must be Variable, but received %s" % (type(input))) if convert_dtype(input.dtype) not in ['int64']: raise TypeError( "The data type of 'input' in fluid.embedding must be int64, but received %s." % (convert_dtype(input.dtype))) if convert_dtype(dtype) in ['float16']: warnings.warn( "The 'dtype' of fluid.embedding only support float16 in GPU now.") if convert_dtype(dtype) not in ['float16', 'float32', 'float64']: raise TypeError( "The 'dtype' of fluid.embedding must be float16, float32 or float64, but received %s." % (convert_dtype(dtype))) remote_prefetch = is_sparse and (not is_distributed) if remote_prefetch: assert is_sparse is True and is_distributed is False w = helper.create_parameter( attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( size[0] + padding_idx) helper.append_op( type='lookup_table_v2', inputs={'Ids': input, 'W': w}, outputs={'Out': tmp}, attrs={ 'is_sparse': is_sparse, 'is_distributed': is_distributed, 'remote_prefetch': remote_prefetch, 'padding_idx': padding_idx }) return tmp