search.py 19.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
C
Chengmo 已提交
14
from __future__ import print_function
15
import numpy as np
C
Chengmo 已提交
16 17
from ..fluid.layer_helper import LayerHelper
from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
18
from ..fluid import core, layers
19

20 21 22 23 24 25 26
# TODO: define searching & indexing functions of a tensor  
from ..fluid.layers import argmin  #DEFINE_ALIAS
from ..fluid.layers import argsort  #DEFINE_ALIAS
from ..fluid.layers import has_inf  #DEFINE_ALIAS
from ..fluid.layers import has_nan  #DEFINE_ALIAS
from ..fluid.layers import topk  #DEFINE_ALIAS

27 28
__all__ = [
    'argmax',
29 30 31 32 33 34
    'argmin',
    'argsort',
    'has_inf',
    'has_nan',
    #       'masked_select',
    'topk',
35
    'where',
36 37
    'index_select',
    'nonzero',
C
Chengmo 已提交
38 39
    'sort',
    'index_sample'
40 41 42
]

from paddle.common_ops_import import *
43 44 45 46


def argmax(input, axis=None, dtype=None, out=None, keepdims=False, name=None):
    """
S
swtkiwi 已提交
47 48 49
	:alias_main: paddle.argmax
	:alias: paddle.argmax,paddle.tensor.argmax,paddle.tensor.search.argmax

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
    This OP computes the indices of the max elements of the input tensor's
    element along the provided axis.

    Args:
        input(Variable): An input N-D Tensor with type float32, float64, int16,
            int32, int64, uint8.
        axis(int, optional): Axis to compute indices along. The effective range
            is [-R, R), where R is Rank(input). when axis<0, it works the same way
            as axis+R. Default is None, it will use the last dim to select indices of max value.
        dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output tensor which can
                    be int32, int64. The default value is None, and it will
                    return the int64 indices.
        out(Variable, optional): Optional output which can be any created 
            Variable that meets the requirements to store the result of operation.
            if out is None, a new Varibale will be create to store the result. Defalut is None.
        keepdims(bool, optional): Keep the axis that do the select max.
        name(str, optional): The name of output variable, normally there is no need for user to set this this property. 
            Default value is None, the framework set the name of output variable.  


    Returns:
        Variable: A Tensor with data type int64.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.fluid as fluid
            import numpy as np

            in1 = np.array([[[5,8,9,5],
                            [0,0,1,7],
                            [6,9,2,4]],
                            [[5,2,4,2],
                            [4,7,7,9],
                            [1,7,0,6]]])
            with fluid.dygraph.guard():
                x = fluid.dygraph.to_variable(in1)
                out1 = paddle.argmax(input=x, axis=-1)
                out2 = paddle.argmax(input=x, axis=0)
                out3 = paddle.argmax(input=x, axis=1)
                out4 = paddle.argmax(input=x, axis=2)
                out5 = paddle.argmax(input=x, axis=2, keepdims=True)
                print(out1.numpy())
                # [[2 3 1]
                #  [0 3 1]]
                print(out2.numpy())
                # [[0 0 0 0]
                #  [1 1 1 1]
                #  [0 0 0 1]]
                print(out3.numpy())
                # [[2 2 0 1]
                #  [0 1 1 1]]
                print(out4.numpy())
                # [[2 3 1]
                #  [0 3 1]]
                print(out5.numpy())
                #array([[[2],
                #        [3],
                #        [1]],
                #       [[0],
                #        [3],
                #        [1]]])
    """
    helper = LayerHelper("arg_max", **locals())
    var_dtype = None
    attrs = {}
    if dtype is not None:
        check_dtype(dtype, 'create data type', ['int32', 'int64'], 'arg_max')
        var_dtype = convert_np_dtype_to_dtype_(dtype)
        attrs["dtype"] = var_dtype
    else:
        var_dtype = VarDesc.VarType.INT64
    if out is None:
        out = helper.create_variable_for_type_inference(var_dtype)
    if axis is None:
        axis = -1
    attrs['keepdims'] = keepdims
    attrs['axis'] = axis
    helper.append_op(
        type='arg_max',
        inputs={'X': input},
        outputs={'Out': [out]},
        attrs=attrs)
    out.stop_gradient = True
    return out
136 137


138 139
def index_select(input, index, dim=0):
    """
S
swtkiwi 已提交
140 141 142
	:alias_main: paddle.index_select
	:alias: paddle.index_select,paddle.tensor.index_select,paddle.tensor.search.index_select

143 144 145 146
    Returns a new tensor which indexes the `input` tensor along dimension `dim` using 
    the entries in `index` which is a Tensor. The returned tensor has the same number 
    of dimensions as the original `input` tensor. The dim-th dimension has the same 
    size as the length of `index`; other dimensions have the same size as in the `input` tensor. 
C
Chengmo 已提交
147

148 149 150 151 152 153 154
    Args:
        input (Variable): The input tensor variable.
        index (Variable): The 1-D tensor containing the indices to index.
        dim (int): The dimension in which we index.

    Returns:
        Variable: A Tensor with same data type as `input`.
C
Chengmo 已提交
155

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
    Examples:
        .. code-block:: python
            import paddle
            import paddle.fluid as fluid
            import numpy as np

            data = np.array([[1.0, 2.0, 3.0, 4.0],
                             [5.0, 6.0, 7.0, 8.0],
                             [9.0, 10.0, 11.0, 12.0]])
            data_index = np.array([0, 1, 1]).astype('int32')

            with fluid.dygraph.guard():
                x = fluid.dygraph.to_variable(data)
                index = fluid.dygraph.to_variable(data_index)
                out_z1 = paddle.index_select(x, index)
                print(out_z1.numpy())
                #[[1. 2. 3. 4.]
                # [5. 6. 7. 8.]
                # [5. 6. 7. 8.]]
                out_z2 = paddle.index_select(x, index, dim=1)
                print(out_z2.numpy())
                #[[ 1.  2.  2.]
                # [ 5.  6.  6.]
                # [ 9. 10. 10.]]
    """
    helper = LayerHelper("index_select", **locals())
    if in_dygraph_mode():
        return core.ops.index_select(input, index, 'dim', dim)

    check_variable_and_dtype(input, 'x',
                             ['float32', 'float64', 'int32', 'int64'],
                             'paddle.tensor.search.index_sample')
    check_variable_and_dtype(index, 'index', ['int32', 'int64'],
                             'paddle.tensor.search.index_sample')

    out = helper.create_variable_for_type_inference(input.dtype)

    helper.append_op(
        type='index_select',
        inputs={'X': input,
                'Index': index},
        outputs={'Out': out},
        attrs={'dim': dim})
    return out


def nonzero(input, as_tuple=False):
    """
S
swtkiwi 已提交
204 205 206
	:alias_main: paddle.nonzero
	:alias: paddle.nonzero,paddle.tensor.nonzero,paddle.tensor.search.nonzero

207 208 209 210 211 212 213
    Return a tensor containing the indices of all non-zero elements of the `input` 
    tensor. If as_tuple is True, return a tuple of 1-D tensors, one for each dimension 
    in `input`, each containing the indices (in that dimension) of all non-zero elements 
    of `input`. Given a n-Dimensional `input` tensor with shape [x_1, x_2, ..., x_n], If 
    as_tuple is False, we can get a output tensor with shape [z, n], where `z` is the 
    number of all non-zero elements in the `input` tensor. If as_tuple is True, we can get 
    a 1-D tensor tuple of length `n`, and the shape of each 1-D tensor is [z, 1].
C
Chengmo 已提交
214

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
    Args:
        inputs (Variable): The input tensor variable.
        as_tuple (bool): Return type, Tensor or tuple of Tensor.

    Returns:
        Variable. The data type is int64.

    Examples:
        .. code-block:: python
            import paddle
            import paddle.fluid as fluid
            import numpy as np

            data1 = np.array([[1.0, 0.0, 0.0],
                              [0.0, 2.0, 0.0],
                              [0.0, 0.0, 3.0]])
            data2 = np.array([0.0, 1.0, 0.0, 3.0])
            data3 = np.array([0.0, 0.0, 0.0])
            with fluid.dygraph.guard():
                x1 = fluid.dygraph.to_variable(data1)
                x2 = fluid.dygraph.to_variable(data2)
                x3 = fluid.dygraph.to_variable(data3)
                out_z1 = paddle.nonzero(x1)
                print(out_z1.numpy())
                #[[0 0]
                # [1 1]
                # [2 2]]
                out_z1_tuple = paddle.nonzero(x1, as_tuple=True)
                for out in out_z1_tuple:
                    print(out.numpy())
                #[[0]
                # [1]
                # [2]]
                #[[0]
                # [1]
                # [2]]
                out_z2 = paddle.nonzero(x2)
                print(out_z2.numpy())
                #[[1]
                # [3]]
                out_z2_tuple = paddle.nonzero(x2, as_tuple=True)
                for out in out_z2_tuple:
                    print(out.numpy())
                #[[1]
                # [3]]
                out_z3 = paddle.nonzero(x3)
                print(out_z3.numpy())
                #[]
                out_z3_tuple = paddle.nonzero(x3, as_tuple=True)
                for out in out_z3_tuple:
                    print(out.numpy())
                #[]                    
    """
    list_out = []
    shape = input.shape
    rank = len(shape)

    if in_dygraph_mode():
        outs = core.ops.where_index(input)
    else:
        outs = layers.where(input)

    if not as_tuple:
        return outs
    elif rank == 1:
        return tuple([outs])
    else:
        for i in range(rank):
            list_out.append(
                layers.slice(
                    outs, axes=[rank - 1], starts=[i], ends=[i + 1]))
        return tuple(list_out)


289 290
def sort(input, axis=-1, descending=False, out=None, name=None):
    """
S
swtkiwi 已提交
291 292 293
	:alias_main: paddle.sort
	:alias: paddle.sort,paddle.tensor.sort,paddle.tensor.search.sort

294 295 296
    This OP sorts the input along the given axis, and returns sorted output
    data Varibale and its corresponding index Variable with the same shape as
    :attr:`input`.
C
Chengmo 已提交
297

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
    **NOTICE**: The Variable in the output of this OP has gradient. You could\
        set Variable :attr:`stop_gradient`.
    Args:
        input(Variable): An input N-D Tensor with type float32, float64, int16,
            int32, int64, uint8.
        axis(int, optional): Axis to compute indices along. The effective range
            is [-R, R), where R is Rank(x). when axis<0, it works the same way
            as axis+R. Default is 0.
        descending(bool, optional) : Descending is a flag, if set to true,
            algorithm will sort by descending order, else sort by
            ascending order. Default is false.
        out(Variable, optional): The default value is None. Optional output 
            which can be any created Variable that meets the requirements to
            store the result of operation. if out is None, a new Varibale will
            be create to store the result.
        name(str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
            refer to :ref:`api_guide_Name`.
    Returns:
        tuple: A tuple of sorted data Variable(with the same shape and data
        type as input) and the sorted indices(with the same shape as input's
        and with data type int64).
    Examples:
        .. code-block:: python
            import paddle
            import paddle.fluid as fluid
            import numpy as np
            in1 = np.array([[[5,8,9,5],
                            [0,0,1,7],
                            [6,9,2,4]],
                            [[5,2,4,2],
                            [4,7,7,9],
                            [1,7,0,6]]]).astype(np.float32)
            with fluid.dygraph.guard():
                x = fluid.dygraph.to_variable(in1)
                out1 = paddle.sort(input=x, axis=-1)
                out2 = paddle.sort(input=x, axis=0)
                out3 = paddle.sort(input=x, axis=1)
                print(out1[0].numpy())
                # [[[5. 5. 8. 9.]
                #   [0. 0. 1. 7.]
                #   [2. 4. 6. 9.]]
                #  [[2. 2. 4. 5.]
                #   [4. 7. 7. 9.]
                #   [0. 1. 6. 7.]]]
                print(out1[1].numpy())
                # [[[0 3 1 2]
                #   [0 1 2 3]
                #   [2 3 0 1]]
                #  [[1 3 2 0]
                #   [0 1 2 3]
                #   [2 0 3 1]]]
                print(out2[0].numpy())
                # [[[5. 2. 4. 2.]
                #   [0. 0. 1. 7.]
                #   [1. 7. 0. 4.]]
                #  [[5. 8. 9. 5.]
                #   [4. 7. 7. 9.]
                #   [6. 9. 2. 6.]]]
                print(out3[0].numpy())
                # [[[0. 0. 1. 4.]
                #   [5. 8. 2. 5.]
                #   [6. 9. 9. 7.]]
                #  [[1. 2. 0. 2.]
                #   [4. 7. 4. 6.]
                #   [5. 7. 7. 9.]]]
    """
    helper = LayerHelper("sort", **locals())
    if out is None:
        out = helper.create_variable_for_type_inference(
            dtype=input.dtype, stop_gradient=False)
    ids = helper.create_variable_for_type_inference(
        VarDesc.VarType.INT64, stop_gradient=True)
    helper.append_op(
        type='argsort',
        inputs={'X': input},
        outputs={'Out': out,
                 'Indices': ids},
        attrs={'axis': axis,
               'descending': descending})
    return out, ids
C
Chengmo 已提交
379 380


381
def where(condition, x, y, name=None):
382
    """
S
swtkiwi 已提交
383 384 385
	:alias_main: paddle.where
	:alias: paddle.where,paddle.tensor.where,paddle.tensor.search.where

386 387 388
    Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.

    .. math::
C
Chengmo 已提交
389

390 391 392 393 394
      out_i =
      \\begin{cases}
      x_i, \quad  \\text{if}  \\ condition_i \\  is \\ True \\\\
      y_i, \quad  \\text{if}  \\ condition_i \\  is \\ False \\\\
      \\end{cases}
C
Chengmo 已提交
395

396

397
    Args:
398 399 400 401 402 403 404 405
        condition(Variable): The condition to choose x or y.
        x(Variable): x is a Tensor Variable with data type float32, float64, int32, int64.
        y(Variable): y is a Tensor Variable with data type float32, float64, int32, int64.

        name(str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
            refer to :ref:`api_guide_Name`.

406
    Returns:
407 408
        Variable: A Tensor with the same data dype as x. 

409 410 411
    Examples:
        .. code-block:: python

G
GaoWei8 已提交
412
          import paddle
413 414
          import numpy as np
          import paddle.fluid as fluid
415 416 417

          x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float32")
          y_i = np.array([1.0, 1.0, 1.0, 1.0]).astype("float32")
418 419 420 421 422

          with fluid.dygraph.guard():
              x = fluid.dygraph.to_variable(x_i)
              y = fluid.dygraph.to_variable(y_i)
              out = paddle.where(x>1, x, y)
423 424 425

          print(out.numpy())
          #out: [1.0, 1.0, 3.2, 1.2]
426 427
    """
    if not in_dygraph_mode():
428
        check_variable_and_dtype(condition, 'condition', ['bool'], 'where')
429
        check_variable_and_dtype(
430
            x, 'x', ['float32', 'float64', 'int32', 'int64'], 'where')
431
        check_variable_and_dtype(
432
            y, 'y', ['float32', 'float64', 'int32', 'int64'], 'where')
433

434 435 436
    x_shape = list(x.shape)
    y_shape = list(y.shape)
    if x_shape == y_shape:
437
        if in_dygraph_mode():
438
            return core.ops.where(condition, x, y)
439 440
        else:
            helper = LayerHelper("where", **locals())
G
GaoWei8 已提交
441
            out = helper.create_variable_for_type_inference(dtype=x.dtype)
442 443 444

            helper.append_op(
                type='where',
445 446 447
                inputs={'Condition': condition,
                        'X': x,
                        'Y': y},
448 449 450
                outputs={'Out': [out]})
            return out
    else:
451 452 453 454
        cond_int = layers.cast(condition, x.dtype)
        cond_not_int = layers.cast(layers.logical_not(condition), x.dtype)
        out1 = layers.elementwise_mul(x, cond_int)
        out2 = layers.elementwise_mul(y, cond_not_int)
455 456 457 458
        out = layers.elementwise_add(out1, out2)
        return out


C
Chengmo 已提交
459 460
def index_sample(x, index):
    """
S
swtkiwi 已提交
461 462 463
	:alias_main: paddle.index_sample
	:alias: paddle.index_sample,paddle.tensor.index_sample,paddle.tensor.search.index_sample

C
Chengmo 已提交
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
    **IndexSample Layer**

    IndexSample OP returns the element of the specified location of X, 
    and the location is specified by Index. 

    .. code-block:: text


                Given:

                X = [[1, 2, 3, 4, 5],
                     [6, 7, 8, 9, 10]]

                Index = [[0, 1, 3],
                         [0, 2, 4]]

                Then:

                Out = [[1, 2, 4],
                       [6, 8, 10]]

    Args:
        x (Variable): The source input tensor with 2-D shape. Supported data type is 
            int32, int64, float32, float64.
        index (Variable): The index input tensor with 2-D shape, first dimension should be same with X. 
            Data type is int32 or int64.

    Returns:
        output (Variable): The output is a tensor with the same shape as index.

    Examples:

        .. code-block:: python

            import paddle
            import paddle.fluid as fluid
            import numpy as np

C
Chengmo 已提交
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
            data = np.array([[1.0, 2.0, 3.0, 4.0],
                                [5.0, 6.0, 7.0, 8.0],
                                [9.0, 10.0, 11.0, 12.0]]).astype('float32')

            data_index = np.array([[0, 1, 2],
                                    [1, 2, 3],
                                    [0, 0, 0]]).astype('int32')

            target_data = np.array([[100, 200, 300, 400],
                                    [500, 600, 700, 800],
                                    [900, 1000, 1100, 1200]]).astype('int32')

            with fluid.dygraph.guard():
                x = fluid.dygraph.to_variable(data)
                index = fluid.dygraph.to_variable(data_index)
                target = fluid.dygraph.to_variable(target_data)

                out_z1 = paddle.index_sample(x, index)
                print(out_z1.numpy())
                #[[1. 2. 3.]
                # [6. 7. 8.]
                # [9. 9. 9.]]

                # Use the index of the maximum value by topk op
                # get the value of the element of the corresponding index in other tensors
                top_value, top_index = fluid.layers.topk(x, k=2)
                out_z2 = paddle.index_sample(target, top_index)
                print(top_value.numpy())
                #[[ 4.  3.]
                # [ 8.  7.]
                # [12. 11.]]

                print(top_index.numpy())
                #[[3 2]
                # [3 2]
                # [3 2]]

                print(out_z2.numpy())
                #[[ 400  300]
                # [ 800  700]
                # [1200 1100]]

C
Chengmo 已提交
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558

    """
    helper = LayerHelper("index_sample", **locals())
    check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'],
                             'paddle.tensor.search.index_sample')
    check_variable_and_dtype(index, 'index', ['int32', 'int64'],
                             'paddle.tensor.search.index_sample')
    out = helper.create_variable_for_type_inference(dtype=x.dtype)

    helper.append_op(
        type='index_sample',
        inputs={'X': x,
                'Index': index},
        outputs={'Out': out})
    return out