beta.py 5.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numbers

import paddle
17
from paddle.distribution import dirichlet, exponential_family
18 19


20
class Beta(exponential_family.ExponentialFamily):
21
    r"""
22 23
    Beta distribution parameterized by alpha and beta.

24 25 26 27 28
    In probability theory and statistics, the beta distribution is a family of
    continuous probability distributions defined on the interval [0, 1]
    parameterized by two positive shape parameters, denoted by alpha and beta,
    that appear as exponents of the random variable and control the shape of
    the distribution. The generalization to multiple variables is called a
29
    Dirichlet distribution.
30 31 32 33 34 35 36 37 38 39 40

    The probability density function (pdf) is

    .. math::

        f(x; \alpha, \beta) = \frac{1}{B(\alpha, \beta)}x^{\alpha-1}(1-x)^{\beta-1}

    where the normalization, B, is the beta function,

    .. math::

41
        B(\alpha, \beta) = \int_{0}^{1} t^{\alpha - 1} (1-t)^{\beta - 1}\mathrm{d}t
42 43 44


    Args:
45 46 47 48 49 50 51
        alpha (float|Tensor): Alpha parameter. It supports broadcast semantics.
            The value of alpha must be positive. When the parameter is a tensor,
            it represents multiple independent distribution with
            a batch_shape(refer to ``Distribution`` ).
        beta (float|Tensor): Beta parameter. It supports broadcast semantics.
            The value of beta must be positive(>0). When the parameter is tensor,
            it represent multiple independent distribution with
52
            a batch_shape(refer to ``Distribution`` ).
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

    Examples:

        .. code-block:: python

            import paddle

            # scale input
            beta = paddle.distribution.Beta(alpha=0.5, beta=0.5)
            print(beta.mean)
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.50000000])
            print(beta.variance)
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.12500000])
            print(beta.entropy())
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.12500000])

            # tensor input with broadcast
            beta = paddle.distribution.Beta(alpha=paddle.to_tensor([0.2, 0.4]), beta=0.6)
            print(beta.mean)
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.25000000, 0.40000001])
            print(beta.variance)
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.10416666, 0.12000000])
            print(beta.entropy())
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [-1.91923141, -0.38095069])
    """

    def __init__(self, alpha, beta):
        if isinstance(alpha, numbers.Real):
            alpha = paddle.full(shape=[1], fill_value=alpha)

        if isinstance(beta, numbers.Real):
            beta = paddle.full(shape=[1], fill_value=beta)

        self.alpha, self.beta = paddle.broadcast_tensors([alpha, beta])

94
        self._dirichlet = dirichlet.Dirichlet(
95 96
            paddle.stack([self.alpha, self.beta], -1)
        )
97

98
        super().__init__(self._dirichlet._batch_shape)
99 100 101

    @property
    def mean(self):
102
        """Mean of beta distribution."""
103 104 105 106
        return self.alpha / (self.alpha + self.beta)

    @property
    def variance(self):
107
        """Variance of beat distribution"""
108 109 110 111
        sum = self.alpha + self.beta
        return self.alpha * self.beta / (sum.pow(2) * (sum + 1))

    def prob(self, value):
112
        """Probability density funciotn evaluated at value
113 114

        Args:
115
            value (Tensor): Value to be evaluated.
116

117
        Returns:
118
            Tensor: Probability.
119 120 121 122
        """
        return paddle.exp(self.log_prob(value))

    def log_prob(self, value):
123
        """Log probability density funciton evaluated at value
124 125

        Args:
126
            value (Tensor): Value to be evaluated
127

128
        Returns:
129
            Tensor: Log probability.
130 131 132 133
        """
        return self._dirichlet.log_prob(paddle.stack([value, 1.0 - value], -1))

    def sample(self, shape=()):
134
        """Sample from beta distribution with sample shape.
135 136

        Args:
137
            shape (Sequence[int], optional): Sample shape.
138 139

        Returns:
140
            Sampled data with shape `sample_shape` + `batch_shape` + `event_shape`.
141 142
        """
        shape = shape if isinstance(shape, tuple) else tuple(shape)
143
        return paddle.squeeze(self._dirichlet.sample(shape)[..., 0], axis=-1)
144 145

    def entropy(self):
146
        """Entropy of dirichlet distribution
147 148

        Returns:
149
            Tensor: Entropy.
150 151 152 153 154 155 156 157 158
        """
        return self._dirichlet.entropy()

    @property
    def _natural_parameters(self):
        return (self.alpha, self.beta)

    def _log_normalizer(self, x, y):
        return paddle.lgamma(x) + paddle.lgamma(y) - paddle.lgamma(x + y)