# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from .framework import Variable, in_dygraph_mode from .layer_helper import LayerHelper __all__ = ['one_hot', 'embedding'] def one_hot(input, depth, allow_out_of_range=False): """ This layer creates the one-hot representations for input indices. Args: input(Variable): Input indices represent locations, which takes value 1.0 in indices, while all other locations take value 0. depth(scalar): An interger defining the depth of the one-hot dimension. allow_out_of_range(bool): A bool value indicating whether the input indices could be out of range [0, depth). When input indices are out of range, exceptions is raised if allow_out_of_range is False, or zero-filling representations is created if it is set True Returns: Variable: The one-hot representations of input. Examples: .. code-block:: python import paddle.fluid as fluid label = fluid.layers.data(name="label", shape=[1], dtype="int64") one_hot_label = fluid.one_hot(input=label, depth=10) """ helper = LayerHelper("one_hot_v2", **locals()) one_hot_out = helper.create_variable_for_type_inference(dtype='float32') if in_dygraph_mode(): inputs = {'X': input} attrs = {'depth': depth} else: if not isinstance(depth, Variable): # user attribute inputs = {'X': input} attrs = {'depth': depth} else: depth.stop_gradient = True inputs = {'X': input, 'depth_tensor': depth} attrs = {} helper.append_op( type="one_hot_v2", inputs=inputs, attrs=attrs, outputs={'Out': one_hot_out}, stop_gradient=True) return one_hot_out def embedding(input, size, is_sparse=False, is_distributed=False, padding_idx=None, param_attr=None, dtype='float32'): """ **Embedding Layer** This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in a lookup table. The result of this lookup is the embedding of each ID in the :attr:`input`. All the input variables are passed in as local variables to the LayerHelper constructor. Args: input(Variable): Input is a Tensor Variable, which contains the IDs information. The value of the input IDs should satisfy :math:`0<= id < size[0]`. size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. is_sparse(bool): The flag indicating whether to use sparse update. is_distributed(bool): Whether to run lookup table from remote parameter server. padding_idx(int|long|None): It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted to :math:`size[0] + padding\_idx` to use. Default: None. param_attr(ParamAttr): Parameters for this layer. dtype(np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output tensor. It can be float32, float_16, int etc. Returns: Variable: The tensor variable storing the embeddings of the \ supplied inputs. Examples: .. code-block:: python import paddle.fluid as fluid # [batch_size, 20] -> [batch_size, 20, 64] data = fluid.layers.data(name='sequence', shape=[20], dtype='int64') emb = fluid.embedding(input=data, size=[128, 64]) """ helper = LayerHelper('embedding', **locals()) remote_prefetch = is_sparse and (not is_distributed) if remote_prefetch: assert is_sparse is True and is_distributed is False w = helper.create_parameter( attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( size[0] + padding_idx) helper.append_op( type='lookup_table_v2', inputs={'Ids': input, 'W': w}, outputs={'Out': tmp}, attrs={ 'is_sparse': is_sparse, 'is_distributed': is_distributed, 'remote_prefetch': remote_prefetch, 'padding_idx': padding_idx }) return tmp