stat.py 19.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO: define statistical functions of a tensor  
16

17
import numpy as np
Z
zhiboniu 已提交
18
from ..static import Variable
19
from ..fluid.layer_helper import LayerHelper
Z
zhiboniu 已提交
20
from ..framework import core
21
from .search import where
L
Liufang Sang 已提交
22
from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
23
import paddle
W
wanghuancoder 已提交
24
from paddle import _C_ops
25

26 27
__all__ = []

28 29 30 31 32 33

def mean(x, axis=None, keepdim=False, name=None):
    """
    Computes the mean of the input tensor's elements along ``axis``.

    Args:
34
        x (Tensor): The input Tensor with data type float32, float64.
35 36 37 38 39 40 41
        axis (int|list|tuple, optional): The axis along which to perform mean
            calculations. ``axis`` should be int, list(int) or tuple(int). If
            ``axis`` is a list/tuple of dimension(s), mean is calculated along
            all element(s) of ``axis`` . ``axis`` or element(s) of ``axis``
            should be in range [-D, D), where D is the dimensions of ``x`` . If
            ``axis`` or element(s) of ``axis`` is less than 0, it works the
            same way as :math:`axis + D` . If ``axis`` is None, mean is
42
            calculated over all elements of ``x``. Default is None.
43
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
44
            in the output Tensor. If ``keepdim`` is True, the dimensions of
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of average along ``axis`` of ``x``, with the same data
        type as ``x``.

    Examples:
        .. code-block:: python

            import paddle

Z
zhupengyang 已提交
60 61 62 63 64 65
            x = paddle.to_tensor([[[1., 2., 3., 4.],
                                   [5., 6., 7., 8.],
                                   [9., 10., 11., 12.]],
                                  [[13., 14., 15., 16.],
                                   [17., 18., 19., 20.],
                                   [21., 22., 23., 24.]]])
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
            out1 = paddle.mean(x)
            # [12.5]
            out2 = paddle.mean(x, axis=-1)
            # [[ 2.5  6.5 10.5]
            #  [14.5 18.5 22.5]]
            out3 = paddle.mean(x, axis=-1, keepdim=True)
            # [[[ 2.5]
            #   [ 6.5]
            #   [10.5]]
            #  [[14.5]
            #   [18.5]
            #   [22.5]]]
            out4 = paddle.mean(x, axis=[0, 2])
            # [ 8.5 12.5 16.5]
    """

    if isinstance(axis, int):
        axis = [axis]
    reduce_all = True if axis is None \
        or len(axis)==0 \
        or len(axis) == len(x.shape) else False
    if axis is None or len(axis) == 0:
        axis = [0]

Z
zhiboniu 已提交
90
    if paddle.in_dynamic_mode():
W
wanghuancoder 已提交
91 92
        return _C_ops.reduce_mean(x, 'dim', axis, 'keep_dim', keepdim,
                                  'reduce_all', reduce_all)
93

S
sneaxiy 已提交
94 95
    check_variable_and_dtype(x, 'x/input',
                             ['uint16', 'float16', 'float32', 'float64'],
96
                             'mean/reduce_mean')
97 98 99 100
    check_type(axis, 'axis/dim', (int, list, tuple), 'mean/reduce_mean')
    if isinstance(axis, (list, tuple)):
        for item in axis:
            check_type(item, 'elements of axis/dim', (int), 'mean/reduce_mean')
101 102 103 104 105 106 107

    helper = LayerHelper('mean', **locals())
    attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all}
    out = helper.create_variable_for_type_inference(x.dtype)
    helper.append_op(
        type='reduce_mean', inputs={'X': x}, outputs={'Out': out}, attrs=attrs)
    return out
108 109


110
def var(x, axis=None, unbiased=True, keepdim=False, name=None):
111
    """
112
    Computes the variance of ``x`` along ``axis`` .
113 114

    Args:
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
        x (Tensor): The input Tensor with data type float32, float64.
        axis (int|list|tuple, optional): The axis along which to perform
            variance calculations. ``axis`` should be int, list(int) or
            tuple(int). If ``axis`` is a list/tuple of dimension(s), variance
            is calculated along all element(s) of ``axis`` . ``axis`` or
            element(s) of ``axis`` should be in range [-D, D), where D is the
            dimensions of ``x`` . If ``axis`` or element(s) of ``axis`` is less
            than 0, it works the same way as :math:`axis + D` . If ``axis`` is
            None, variance is calculated over all elements of ``x``. Default
            is None.
        unbiased (bool, optional): Whether to use the unbiased estimation. If
            ``unbiased`` is True, the divisor used in the computation is
            :math:`N - 1`, where :math:`N` represents the number of elements
            along ``axis`` , otherwise the divisor is :math:`N`. Default is True.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.
136 137

    Returns:
138 139
        Tensor, results of variance along ``axis`` of ``x``, with the same data
        type as ``x``.
140 141 142 143 144

    Examples:
        .. code-block:: python

            import paddle
145

Z
zhupengyang 已提交
146
            x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
147 148 149 150
            out1 = paddle.var(x)
            # [2.66666667]
            out2 = paddle.var(x, axis=1)
            # [1.         4.33333333]
151
    """
Z
zhiboniu 已提交
152
    if not paddle.in_dynamic_mode():
153 154 155 156
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'var')

    u = mean(x, axis, True, name)
    out = paddle.sum((x - u)**2, axis, keepdim=keepdim, name=name)
157

158 159
    n = paddle.cast(paddle.numel(x), x.dtype) \
        / paddle.cast(paddle.numel(out), x.dtype)
160
    if unbiased:
161 162 163 164 165
        one_const = paddle.ones([1], x.dtype)
        n = where(n > one_const, n - 1., one_const)
    out /= n
    return out

S
swtkiwi 已提交
166

167 168 169
def std(x, axis=None, unbiased=True, keepdim=False, name=None):
    """
    Computes the standard-deviation of ``x`` along ``axis`` .
L
Liufang Sang 已提交
170 171

    Args:
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
        x (Tensor): The input Tensor with data type float32, float64.
        axis (int|list|tuple, optional): The axis along which to perform
            standard-deviation calculations. ``axis`` should be int, list(int)
            or tuple(int). If ``axis`` is a list/tuple of dimension(s),
            standard-deviation is calculated along all element(s) of ``axis`` .
            ``axis`` or element(s) of ``axis`` should be in range [-D, D),
            where D is the dimensions of ``x`` . If ``axis`` or element(s) of
            ``axis`` is less than 0, it works the same way as :math:`axis + D` .
            If ``axis`` is None, standard-deviation is calculated over all
            elements of ``x``. Default is None.
        unbiased (bool, optional): Whether to use the unbiased estimation. If
            ``unbiased`` is True, the standard-deviation is calculated via the
            unbiased estimator. If ``unbiased`` is True,  the divisor used in
            the computation is :math:`N - 1`, where :math:`N` represents the
            number of elements along ``axis`` , otherwise the divisor is
            :math:`N`. Default is True.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.
L
Liufang Sang 已提交
195 196

    Returns:
197 198 199
        Tensor, results of standard-deviation along ``axis`` of ``x``, with the
        same data type as ``x``.

L
Liufang Sang 已提交
200 201 202 203
    Examples:
        .. code-block:: python

            import paddle
204

Z
zhupengyang 已提交
205
            x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
206 207 208 209
            out1 = paddle.std(x)
            # [1.63299316]
            out2 = paddle.std(x, axis=1)
            # [1.       2.081666]
L
Liufang Sang 已提交
210
    """
Z
zhiboniu 已提交
211
    if not paddle.in_dynamic_mode():
212 213 214 215
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'std')

    out = var(**locals())
    return paddle.sqrt(out)
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231


def numel(x, name=None):
    """
    Returns the number of elements for a tensor, which is a int64 Tensor with shape [1] in static mode
    or a scalar value in imperative mode

    Args:
        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.

    Returns:
        Tensor: The number of elements for the input Tensor.

    Examples:
        .. code-block:: python

232 233 234 235
            import paddle
            
            x = paddle.full(shape=[4, 5, 7], fill_value=0, dtype='int32')
            numel = paddle.numel(x) # 140
236 237 238


    """
Z
zhiboniu 已提交
239
    if paddle.in_dynamic_mode():
W
wanghuancoder 已提交
240
        return _C_ops.size(x)
241 242 243 244 245 246 247 248

    if not isinstance(x, Variable):
        raise TypeError("x must be a Tensor in numel")
    helper = LayerHelper('numel', **locals())
    out = helper.create_variable_for_type_inference(
        dtype=core.VarDesc.VarType.INT64)
    helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out})
    return out
Z
zhulei 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323


def median(x, axis=None, keepdim=False, name=None):
    """
    Compute the median along the specified axis.

    Args:
        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
        axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of median along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32.

    Examples:
        .. code-block:: python

            import paddle

            x = paddle.arange(12).reshape([3, 4])
            # x is [[0 , 1 , 2 , 3 ],
            #       [4 , 5 , 6 , 7 ],
            #       [8 , 9 , 10, 11]]

            y1 = paddle.median(x)
            # y1 is [5.5]

            y2 = paddle.median(x, axis=0)
            # y2 is [4., 5., 6., 7.]

            y3 = paddle.median(x, axis=1)
            # y3 is [1.5, 5.5, 9.5]

            y4 = paddle.median(x, axis=0, keepdim=True)
            # y4 is [[4., 5., 6., 7.]]

    """
    if not isinstance(x, Variable):
        raise TypeError("In median, the input x should be a Tensor.")
    is_flatten = axis is None
    dims = len(x.shape)
    if is_flatten:
        x = paddle.flatten(x)
        axis = 0
    else:
        if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
            raise ValueError(
                "In median, axis should be none or an integer in range [-rank(x), rank(x))."
            )
        if axis < 0:
            axis += dims
    sz = x.shape[axis]
    kth = sz >> 1
    tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
    dtype = 'float64' if x.dtype == core.VarDesc.VarType.FP64 else 'float32'
    if sz & 1 == 0:
        out_tensor = paddle.slice(
            tensor_topk, axes=[axis], starts=[kth - 1],
            ends=[kth]) + paddle.slice(
                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1])
        out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
    else:
        out_tensor = paddle.cast(
            paddle.slice(
                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]),
            dtype=dtype)
324 325 326
    out_tensor = out_tensor + paddle.sum(
        paddle.cast(
            paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True)
Z
zhulei 已提交
327 328 329 330 331 332 333 334 335 336 337
    if not keepdim or is_flatten:
        if not is_flatten:
            newshape = x.shape[:axis] + x.shape[axis + 1:]
        elif not keepdim:
            newshape = [1]
        else:
            newshape = [1] * dims
    else:
        newshape = out_tensor.shape
    out_tensor = out_tensor.reshape(newshape, name=name)
    return out_tensor
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389


def quantile(x, q, axis=None, keepdim=False):
    """
    Compute the quantile of the input along the specified axis.

    Args:
        x (Tensor): The input Tensor, it's data type can be float32, float64.
        q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list, 
            each q will be calculated and the first dimension of output is same to the number of ``q`` .
        axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is a list, quantile is calculated over all elements of given axises.
            If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of quantile along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32.

    Examples:
        .. code-block:: python

            import paddle

            x = paddle.randn((2,3))
            #[[-1.28740597,  0.49533170, -1.00698614],
            # [-1.11656201, -1.01010525, -2.23457789]])

            y1 = paddle.quantile(x, q=0.5, axis=[0, 1])
            # y1 = -1.06333363

            y2 = paddle.quantile(x, q=0.5, axis=1)
            # y2 = [-1.00698614, -1.11656201]

            y3 = paddle.quantile(x, q=[0.3, 0.5], axis=1)
            # y3 =[[-1.11915410, -1.56376839],
            #      [-1.00698614, -1.11656201]]

            y4 = paddle.quantile(x, q=0.8, axis=1, keepdim=True)
            # y4 = [[-0.10559537],
            #       [-1.05268800]])
    """
    if not isinstance(x, Variable):
        raise TypeError("input x should be a Tensor.")
    dims = len(x.shape)
390
    out_shape = list(x.shape)
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
    if axis is None:
        x = paddle.flatten(x)
        axis = 0
        out_shape = [1] * dims
    else:
        if isinstance(axis, list):
            if (len(axis) <= 0):
                raise ValueError("axis should not be empty")
            axis_src, axis_dst = [], []
            for axis_single in axis:
                if not isinstance(axis_single, int) or not (
                        axis_single < dims and axis_single >= -dims):
                    raise ValueError(
                        "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                    )
                if axis_single < 0:
                    axis_single = axis_single + dims
                axis_src.append(axis_single)
                out_shape[axis_single] = 1
            axis_dst = list(range(-len(axis), 0))
            x = paddle.moveaxis(x, axis_src, axis_dst)
            x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
            axis = axis_dst[0]
        else:
            if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
                raise ValueError(
                    "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                )
            if axis < 0:
                axis += dims
            out_shape[axis] = 1
    indices = []
    if isinstance(q, (int, float)):
        if q < 0 or q > 1:
            raise ValueError("q should be in range [0, 1]")
        indices.append(q * (x.shape[axis] - 1))
    elif isinstance(q, (list, tuple)):
        if len(q) <= 0:
            raise ValueError("q should not be empty")
        for q_num in q:
            if q_num < 0 or q_num > 1:
                raise ValueError("q should be in range [0, 1]")
            indices.append(q_num * (x.shape[axis] - 1))
    else:
        raise TypeError("Type of q should be int, float, list or tuple.")
    sorted_tensor = paddle.sort(x, axis)
437 438 439
    indices_tensor = paddle.assign(indices).astype(paddle.float32)
    indices_below = paddle.floor(indices_tensor).astype(paddle.int32)
    indices_upper = paddle.ceil(indices_tensor).astype(paddle.int32)
440 441
    outputs = []

442 443 444 445 446 447 448
    def expand_dim(indices, sorted_tensor_shape, axis):
        assert axis < len(list(sorted_tensor_shape))
        expanded_shape = [1] * len(list(sorted_tensor_shape))
        expanded_shape = tuple(expanded_shape)
        indices = indices.reshape(expanded_shape)
        return indices

449 450 451
    # TODO(chenjianye): replace the for-loop to directly take elements.
    for i in range(len(indices)):
        if (indices_upper[i] != indices_below[i]):
452 453 454 455 456 457
            tensor_below = paddle.take_along_axis(
                sorted_tensor,
                expand_dim(indices_below[i], sorted_tensor.shape, axis), axis)
            tensor_upper = paddle.take_along_axis(
                sorted_tensor,
                expand_dim(indices_upper[i], sorted_tensor.shape, axis), axis)
458 459 460
            weights = (indices[i] - indices_below[i]).astype(x.dtype)
            out = paddle.lerp(tensor_below, tensor_upper, weights)
        else:
461 462 463
            out = paddle.take_along_axis(
                sorted_tensor,
                expand_dim(indices_below[i], sorted_tensor.shape, axis), axis)
464 465 466 467 468 469 470 471 472
        if not keepdim:
            out = paddle.squeeze(out, axis=axis)
        else:
            out = out.reshape(out_shape)
        outputs.append(out)
    if isinstance(q, (list, tuple)):
        return paddle.stack(outputs, 0)
    else:
        return outputs[0]