input.py 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
16
import warnings
17
from .framework import Variable, in_dygraph_mode, static_only
18
from .layer_helper import LayerHelper
19
from .data_feeder import check_variable_and_dtype, check_dtype
20
from ..utils import deprecated
21

22
__all__ = ['one_hot', 'embedding']
23 24


25
@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot')
26 27
def one_hot(input, depth, allow_out_of_range=False):
    """
28 29 30
    :alias_main: paddle.nn.functional.one_hot
	:alias: paddle.nn.functional.one_hot,paddle.nn.functional.common.one_hot
	:old_api: paddle.fluid.one_hot
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82

    The operator converts each id in the input to an one-hot vector with a
    depth length. The value in the vector dimension corresponding to the id
    is 1, and the value in the remaining dimension is 0.

    The shape of output Tensor or LoDTensor is generated by appending depth dimension
    behind the last dimension of the input shape.

    .. code-block:: text

        Example 1 (allow_out_of_range=False):

        input:
            X.shape = [4]
            X.data = [1, 1, 3, 0]
            depth = 4

        output:
            Out.shape = [4, 4]
            Out.data = [[0., 1., 0., 0.],
                        [0., 1., 0., 0.],
                        [0., 0., 0., 1.],
                        [1., 0., 0., 0.]]

        Example 2 (allow_out_of_range=True):

        input:
            X.shape = [4]
            X.data = [1, 1, 5, 0]
            depth = 4
            allow_out_of_range = True

        output:
            Out.shape = [4, 4]
            Out.data = [[0., 1., 0., 0.],
                        [0., 1., 0., 0.], 
                        [0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data.
                        [1., 0., 0., 0.]]

        Example 3 (allow_out_of_range=False):

        input:
            X.shape = [4]
            X.data = [1, 1, 5, 0]
            depth = 4
            allow_out_of_range = False

        output: Throw an exception for Illegal value
            The second dimension in X is 5, which is greater than depth.  
            Allow_out_of_range =False means that does not allow the word id to exceed depth,
            so it throws an exception.

83 84

    Args:
85 86 87 88
        input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` ,
            which contains at least one dimension. The data type is int32 or int64.
        depth(int): An integer defining the depth of the one hot dimension. If input 
            is word id, depth is generally the dictionary size.
89
        allow_out_of_range(bool): A bool value indicating whether the input
90 91 92 93
            indices could be out of range :math:`[0, depth)` . When input indices are
            out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range`
            is False, or zero-filling representations is created if it is set True.
            Default: False.
94 95

    Returns:
96
        Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32.
97 98 99 100 101

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid
102 103 104
            # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4].
            label = fluid.data(name="label", shape=[4], dtype="int64")
            one_hot_label = fluid.one_hot(input=label, depth=4)
105
    """
106
    check_variable_and_dtype(input, 'input', ['int32', 'int64'], 'one_hot_v2')
107 108 109 110 111 112
    helper = LayerHelper("one_hot_v2", **locals())

    one_hot_out = helper.create_variable_for_type_inference(dtype='float32')

    if in_dygraph_mode():
        inputs = {'X': input}
113
        attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range}
114 115 116 117
    else:
        if not isinstance(depth, Variable):
            # user attribute 
            inputs = {'X': input}
118
            attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range}
119 120 121
        else:
            depth.stop_gradient = True
            inputs = {'X': input, 'depth_tensor': depth}
122
            attrs = {'allow_out_of_range': allow_out_of_range}
123 124 125 126 127 128 129
    helper.append_op(
        type="one_hot_v2",
        inputs=inputs,
        attrs=attrs,
        outputs={'Out': one_hot_out},
        stop_gradient=True)
    return one_hot_out
130 131


132
@static_only
T
tangwei12 已提交
133
@deprecated(since='2.0.0', update_to='paddle.nn.functional.embedding')
134 135 136 137 138 139 140
def embedding(input,
              size,
              is_sparse=False,
              is_distributed=False,
              padding_idx=None,
              param_attr=None,
              dtype='float32'):
141
    r"""
142
    :api_attr: Static Graph
143

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
    The operator is used to lookup embeddings vector of ids provided by :attr:`input` . 
    It automatically constructs a 2D embedding matrix based on the
    input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .

    The shape of output Tensor is generated by appending an emb_size dimension to the
    last dimension of the input Tensor shape.

    **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , 
    otherwise the program will throw an exception and exit.

    .. code-block:: text

        Case 1:

        input is a Tensor. padding_idx = -1
            input.data = [[1, 3], [2, 4], [4, 127]]
            input.shape = [3, 2]
        Given size = [128, 16]
        output is a Tensor:
            out.shape = [3, 2, 16]
            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
                        [0.345421456, 0.524563927, ..., 0.144534654]],

                        [[0.345249859, 0.124939536, ..., 0.194353745],
                        [0.945345345, 0.435394634, ..., 0.435345365]],
                        
                        [[0.945345345, 0.435394634, ..., 0.435345365],
                        [0.0,         0.0,         ..., 0.0        ]]]  # padding data
        The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
        It will pad all-zero data when ids is 127.
        
        Case 2:

        input is a LoDTensor with 1-level LoD. padding_idx = 0
            input.lod = [[2, 3]]
            input.data = [[1], [3], [2], [4], [0]]
            input.shape = [5, 1]
        Given size = [128, 16]
        output is a LoDTensor:
            out.lod = [[2, 3]]
            out.shape = [5, 1, 16]
            out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
                        [[0.345421456, 0.524563927, ..., 0.144534654]],
                        [[0.345249859, 0.124939536, ..., 0.194353745]],
                        [[0.945345345, 0.435394634, ..., 0.435345365]],
                        [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
        It will pad all-zero data when ids is 0.
191 192 193


    Args:
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
        input(Variable): A Tensor or LoDTensor with type int64, which contains the id information.
            The value of the input id should satisfy :math:`0<= id < size[0]` .
        size(tuple|list): The shape of lookup table parameter. It should have two elements which
            indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
        is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
            affects the performance of the backwards gradient update. It is recommended to set 
            True because sparse update is faster. But some optimizer does not support sparse update,
            such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , 
            :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
            :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
            In these case, is_sparse must be False. Default: False.
        is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
            in multi-machine distributed CPU training. Default: False.
        padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). 
            If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
            to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
            encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
            If set None, it makes no effect to output. Default: None.
        param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
            default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
            user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. 
            The local word vector needs to be transformed into numpy format, and the shape of local word
T
tianshuo78520a 已提交
216
            vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
217 218 219
            is used to load custom or pre-trained word vectors. See code example 2 for details.
        dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor.
            It must be float32 or float64. Default: float32.
220 221

    Returns:
222
        Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
223

T
tangwei12 已提交
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
    Static Examples:
        .. code-block:: python

            import paddle
            import numpy as np
            paddle.enable_static()
            
            x = paddle.static.data(name="x", shape = [2, 4], dtype=np.int64)
            embedding = paddle.nn.Embedding(10, 3,
                        weight_attr=paddle.nn.initializer.Constant(value=1.0))
            adam = paddle.optimizer.SGD(parameters=[embedding.weight], learning_rate=0.01)
            output = embedding(x)
            m_output=paddle.mean(output)
            
            adam.minimize(m_output)
            
            place = paddle.CPUPlace()
            exe = paddle.static.Executor(place)
            exe.run(paddle.static.default_startup_program())
            
            x = np.array([[7, 2, 4, 5],[4, 3, 2, 9]], dtype=np.int64)
            
            # x is a Numpy.
            # x.data = [[7, 2, 4, 5], [4, 3, 2, 9]]
            # x.shape = [2, 4]
            
            out, = exe.run(paddle.static.default_main_program(), feed={'x':x}, fetch_list=[output])
            
            # out is a Numpy.
            # out.data = [[1., 1., 1.],
            #             [1., 1., 1.],
            #             [1., 1., 1.],
            #             [1., 1., 1.]],
            #
            #            [[1., 1., 1.],
            #             [1., 1., 1.],
            #             [1., 1., 1.],
            #             [0., 0., 0.]]]
            # out.shape = [2, 4, 3]


    Dygraph Examples:
266 267
        .. code-block:: python

T
tangwei12 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
            import paddle
            import numpy as np
            
            paddle.disable_static()
            
            x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
            
            # x is a Tensor.
            # x.data = [[3], [4], [5]]
            # x.shape = [3, 1]
            x = paddle.to_tensor(x_data, stop_gradient=False)
            
            # embedding weight shape = [10, 3]
            embedding = paddle.nn.Embedding(10, 3, sparse=True)
            
            # embedding weight data = [10, 3]
            w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
            
            # embedding.weight.shape = [10, 3]
            # embedding.weight.data =
            #                        [[2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.],
            #                         [2., 2., 2.]]
            embedding.weight.set_value(w0)
            
            adam = paddle.optimizer.Adam(
                parameters=[embedding.weight], learning_rate=0.01)
            adam.clear_grad()
            
            # out is Tensor
            # out.shape: [3, 1, 3]
            # out.layout: NCHW
            # out.dtype: float
            # out.data: [2 2 2 2 2 2 2 2 2]
            out = embedding(x)
            
            out.backward()
            adam.step()

314 315 316
    """

    helper = LayerHelper('embedding', **locals())
317
    check_variable_and_dtype(input, 'input', ['int64'], 'fluid.embedding')
318 319
    check_dtype(dtype, 'dtype', ['float16', 'float32', 'float64'],
                'fluid.embedding')
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
    remote_prefetch = is_sparse and (not is_distributed)
    if remote_prefetch:
        assert is_sparse is True and is_distributed is False
    w = helper.create_parameter(
        attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
    tmp = helper.create_variable_for_type_inference(dtype)
    padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
        size[0] + padding_idx)
    helper.append_op(
        type='lookup_table_v2',
        inputs={'Ids': input,
                'W': w},
        outputs={'Out': tmp},
        attrs={
            'is_sparse': is_sparse,
            'is_distributed': is_distributed,
            'remote_prefetch': remote_prefetch,
            'padding_idx': padding_idx
        })
    return tmp