beta.py 5.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numbers

import paddle
17
from paddle.distribution import dirichlet, exponential_family
18 19


20
class Beta(exponential_family.ExponentialFamily):
21
    r"""
22 23 24 25 26 27 28 29
    Beta distribution parameterized by alpha and beta.

    In probability theory and statistics, the beta distribution is a family of 
    continuous probability distributions defined on the interval [0, 1] 
    parameterized by two positive shape parameters, denoted by alpha and beta, 
    that appear as exponents of the random variable and control the shape of 
    the distribution. The generalization to multiple variables is called a 
    Dirichlet distribution.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44

    The probability density function (pdf) is

    .. math::

        f(x; \alpha, \beta) = \frac{1}{B(\alpha, \beta)}x^{\alpha-1}(1-x)^{\beta-1}

    where the normalization, B, is the beta function,

    .. math::

        B(\alpha, \beta) = \int_{0}^{1} t^{\alpha - 1} (1-t)^{\beta - 1}\mathrm{d}t 


    Args:
45 46 47 48 49 50 51 52
        alpha (float|Tensor): Alpha parameter. It supports broadcast semantics. 
            The value of alpha must be positive. When the parameter is a tensor, 
            it represents multiple independent distribution with 
            a batch_shape(refer to ``Distribution`` ).
        beta (float|Tensor): Beta parameter. It supports broadcast semantics. 
            The value of beta must be positive(>0). When the parameter is tensor, 
            it represent multiple independent distribution with 
            a batch_shape(refer to ``Distribution`` ). 
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

    Examples:

        .. code-block:: python

            import paddle

            # scale input
            beta = paddle.distribution.Beta(alpha=0.5, beta=0.5)
            print(beta.mean)
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.50000000])
            print(beta.variance)
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.12500000])
            print(beta.entropy())
            # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.12500000])

            # tensor input with broadcast
            beta = paddle.distribution.Beta(alpha=paddle.to_tensor([0.2, 0.4]), beta=0.6)
            print(beta.mean)
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.25000000, 0.40000001])
            print(beta.variance)
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [0.10416666, 0.12000000])
            print(beta.entropy())
            # Tensor(shape=[2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
            #        [-1.91923141, -0.38095069])
    """

    def __init__(self, alpha, beta):
        if isinstance(alpha, numbers.Real):
            alpha = paddle.full(shape=[1], fill_value=alpha)

        if isinstance(beta, numbers.Real):
            beta = paddle.full(shape=[1], fill_value=beta)

        self.alpha, self.beta = paddle.broadcast_tensors([alpha, beta])

94 95
        self._dirichlet = dirichlet.Dirichlet(
            paddle.stack([self.alpha, self.beta], -1))
96 97 98 99 100

        super(Beta, self).__init__(self._dirichlet._batch_shape)

    @property
    def mean(self):
101
        """Mean of beta distribution.
102 103 104 105 106
        """
        return self.alpha / (self.alpha + self.beta)

    @property
    def variance(self):
107
        """Variance of beat distribution
108 109 110 111 112
        """
        sum = self.alpha + self.beta
        return self.alpha * self.beta / (sum.pow(2) * (sum + 1))

    def prob(self, value):
113
        """Probability density funciotn evaluated at value
114 115

        Args:
116
            value (Tensor): Value to be evaluated.
117 118
        
        Returns:
119
            Tensor: Probability.
120 121 122 123
        """
        return paddle.exp(self.log_prob(value))

    def log_prob(self, value):
124
        """Log probability density funciton evaluated at value
125 126

        Args:
127
            value (Tensor): Value to be evaluated
128 129
        
        Returns:
130
            Tensor: Log probability.
131 132 133 134
        """
        return self._dirichlet.log_prob(paddle.stack([value, 1.0 - value], -1))

    def sample(self, shape=()):
135
        """Sample from beta distribution with sample shape.
136 137

        Args:
138
            shape (Sequence[int], optional): Sample shape.
139 140

        Returns:
141
            Sampled data with shape `sample_shape` + `batch_shape` + `event_shape`.
142 143
        """
        shape = shape if isinstance(shape, tuple) else tuple(shape)
144
        return paddle.squeeze(self._dirichlet.sample(shape)[..., 0], axis=-1)
145 146

    def entropy(self):
147
        """Entropy of dirichlet distribution
148 149

        Returns:
150
            Tensor: Entropy.
151 152 153 154 155 156 157 158 159
        """
        return self._dirichlet.entropy()

    @property
    def _natural_parameters(self):
        return (self.alpha, self.beta)

    def _log_normalizer(self, x, y):
        return paddle.lgamma(x) + paddle.lgamma(y) - paddle.lgamma(x + y)