未验证 提交 2963e6a0 编写于 作者: I Infinity_lee 提交者: GitHub

[docs] fix some format issue (#45752)

* fix some error

* fix

* fix some error

* fix bugs

* fix some errors

* fix

* Update transform.py

fix some docs errors

* Update normal.py

fix some doc errors

* Update uniform.py

fix some docs errors

* Update kl.py

fix some docs errors

* Update math.py

fix some docs error

* Update math.py

fix heaviside links

* Update loss.py

fix

* Update transform.py

fix bugs

* Update math.py

fix

* fix some format issue

* Update normal.py

* fix missing np

* order imports

* fix some flake8 warning

* Update python/paddle/tensor/math.py

* fix OP-->API

* fix op

* fix grid_sample format

* trim trailing whitespace

* empty commit, test=document_fix

* empty commit
Co-authored-by: NSigureMo <sigure.qaq@gmail.com>
Co-authored-by: NLigoml <39876205+Ligoml@users.noreply.github.com>
上级 11002430
......@@ -38,11 +38,11 @@ def kl_divergence(p, q):
KL(p||q) = \int p(x)log\frac{p(x)}{q(x)} \mathrm{d}x
Args:
p (Distribution): ``Distribution`` object.
q (Distribution): ``Distribution`` object.
p (Distribution): ``Distribution`` object. Inherits from the Distribution Base class.
q (Distribution): ``Distribution`` object. Inherits from the Distribution Base class.
Returns:
Tensor: Batchwise KL-divergence between distribution p and q.
Tensor, Batchwise KL-divergence between distribution p and q.
Examples:
......@@ -71,8 +71,8 @@ def register_kl(cls_p, cls_q):
implemention funciton by the decorator.
Args:
cls_p(Distribution): Subclass derived from ``Distribution``.
cls_q(Distribution): Subclass derived from ``Distribution``.
cls_p (Distribution): The Distribution type of Instance p. Subclass derived from ``Distribution``.
cls_q (Distribution): The Distribution type of Instance q. Subclass derived from ``Distribution``.
Examples:
.. code-block:: python
......
......@@ -36,7 +36,7 @@ class Normal(distribution.Distribution):
.. math::
pdf(x; \mu, \sigma) = \\frac{1}{Z}e^{\\frac {-0.5 (x - \mu)^2} {\sigma^2} }
pdf(x; \mu, \sigma) = \frac{1}{Z}e^{\frac {-0.5 (x - \mu)^2} {\sigma^2} }
.. math::
......@@ -49,43 +49,43 @@ class Normal(distribution.Distribution):
* :math:`Z`: is the normalization constant.
Args:
loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is float32 and float64.
scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is float32 and float64.
name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Examples:
.. code-block:: python
import paddle
from paddle.distribution import Normal
# Define a single scalar Normal distribution.
dist = Normal(loc=0., scale=3.)
# Define a batch of two scalar valued Normals.
# The first has mean 1 and standard deviation 11, the second 2 and 22.
dist = Normal(loc=[1., 2.], scale=[11., 22.])
# Get 3 samples, returning a 3 x 2 tensor.
dist.sample([3])
# Define a batch of two scalar valued Normals.
# Both have mean 1, but different standard deviations.
dist = Normal(loc=1., scale=[11., 22.])
# Complete example
value_tensor = paddle.to_tensor([0.8], dtype="float32")
normal_a = Normal([0.], [1.])
normal_b = Normal([0.5], [2.])
sample = normal_a.sample([2])
# a random tensor created by normal distribution with shape: [2, 1]
entropy = normal_a.entropy()
# [1.4189385] with shape: [1]
lp = normal_a.log_prob(value_tensor)
# [-1.2389386] with shape: [1]
p = normal_a.probs(value_tensor)
# [0.28969154] with shape: [1]
kl = normal_a.kl_divergence(normal_b)
# [0.34939718] with shape: [1]
import paddle
from paddle.distribution import Normal
# Define a single scalar Normal distribution.
dist = Normal(loc=0., scale=3.)
# Define a batch of two scalar valued Normals.
# The first has mean 1 and standard deviation 11, the second 2 and 22.
dist = Normal(loc=[1., 2.], scale=[11., 22.])
# Get 3 samples, returning a 3 x 2 tensor.
dist.sample([3])
# Define a batch of two scalar valued Normals.
# Both have mean 1, but different standard deviations.
dist = Normal(loc=1., scale=[11., 22.])
# Complete example
value_tensor = paddle.to_tensor([0.8], dtype="float32")
normal_a = Normal([0.], [1.])
normal_b = Normal([0.5], [2.])
sample = normal_a.sample([2])
# a random tensor created by normal distribution with shape: [2, 1]
entropy = normal_a.entropy()
# [1.4189385] with shape: [1]
lp = normal_a.log_prob(value_tensor)
# [-1.2389386] with shape: [1]
p = normal_a.probs(value_tensor)
# [0.28969154] with shape: [1]
kl = normal_a.kl_divergence(normal_b)
# [0.34939718] with shape: [1]
"""
def __init__(self, loc, scale, name=None):
......@@ -132,11 +132,11 @@ class Normal(distribution.Distribution):
"""Generate samples of the specified shape.
Args:
shape (list): 1D `int32`. Shape of the generated samples.
seed (int): Python integer number.
shape (list): 1D `int32`. Shape of the generated samples.
seed (int): Python integer number.
Returns:
Tensor: A tensor with prepended dimensions shape.The data type is float32.
Tensor, A tensor with prepended dimensions shape.The data type is float32.
"""
if not _non_static_mode():
......@@ -177,14 +177,14 @@ class Normal(distribution.Distribution):
.. math::
entropy(\sigma) = 0.5 \\log (2 \pi e \sigma^2)
entropy(\sigma) = 0.5 \log (2 \pi e \sigma^2)
In the above equation:
* :math:`scale = \sigma`: is the std.
Returns:
Tensor: Shannon entropy of normal distribution.The data type is float32.
Tensor, Shannon entropy of normal distribution.The data type is float32.
"""
name = self.name + '_entropy'
......@@ -221,10 +221,10 @@ class Normal(distribution.Distribution):
"""Probability density/mass function.
Args:
value (Tensor): The input tensor.
value (Tensor): The input tensor.
Returns:
Tensor: probability.The data type is same with value.
Tensor, probability. The data type is same with value.
"""
name = self.name + '_probs'
......@@ -243,11 +243,11 @@ class Normal(distribution.Distribution):
.. math::
KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\\frac{diff}{\sigma_1})^2 - 1 - 2 \\ln {ratio})
KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\frac{diff}{\sigma_1})^2 - 1 - 2 \ln {ratio})
.. math::
ratio = \\frac{\sigma_0}{\sigma_1}
ratio = \frac{\sigma_0}{\sigma_1}
.. math::
......@@ -266,7 +266,7 @@ class Normal(distribution.Distribution):
other (Normal): instance of Normal.
Returns:
Tensor: kl-divergence between two normal distributions.The data type is float32.
Tensor, kl-divergence between two normal distributions.The data type is float32.
"""
if not _non_static_mode():
......
......@@ -58,7 +58,7 @@ class Transform(object):
Suppose :math:`X` is a K-dimensional random variable with probability
density function :math:`p_X(x)`. A new random variable :math:`Y = f(X)` may
be defined by transforming :math:`X` with a suitably well-behaved funciton
:math:`f`. It suffices for what follows to note that if f is one-to-one and
:math:`f`. It suffices for what follows to note that if `f` is one-to-one and
its inverse :math:`f^{-1}` have a well-defined Jacobian, then the density of
:math:`Y` is
......@@ -1001,8 +1001,9 @@ class StackTransform(Transform):
specific axis.
Args:
transforms(Sequence[Transform]): The sequence of transformations.
axis(int): The axis along which will be transformed.
transforms (Sequence[Transform]): The sequence of transformations.
axis (int, optional): The axis along which will be transformed. default
value is 0.
Examples:
......@@ -1010,7 +1011,6 @@ class StackTransform(Transform):
import paddle
x = paddle.stack(
(paddle.to_tensor([1., 2., 3.]), paddle.to_tensor([1, 2., 3.])), 1)
t = paddle.distribution.StackTransform(
......@@ -1023,11 +1023,13 @@ class StackTransform(Transform):
# [[2.71828175 , 1. ],
# [7.38905621 , 4. ],
# [20.08553696, 9. ]])
print(t.inverse(t.forward(x)))
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1., 1.],
# [2., 2.],
# [3., 3.]])
print(t.forward_log_det_jacobian(x))
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1. , 0.69314718],
......
......@@ -37,7 +37,7 @@ class Uniform(distribution.Distribution):
.. math::
pdf(x; a, b) = \\frac{1}{Z}, \ a <=x <b
pdf(x; a, b) = \frac{1}{Z}, \ a <=x <b
.. math::
......@@ -50,43 +50,45 @@ class Uniform(distribution.Distribution):
* :math:`Z`: is the normalizing constant.
The parameters `low` and `high` must be shaped in a way that supports
[broadcasting](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/beginners_guide/basic_concept/broadcasting_en.html) (e.g., `high - low` is a valid operation).
:ref:`user_guide_broadcasting` (e.g., `high - low` is a valid operation).
Args:
low(int|float|list|tuple|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
high(int|float|list|tuple|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
low(int|float|list|tuple|numpy.ndarray|Tensor): The lower boundary of
uniform distribution.The data type is float32 and float64.
high(int|float|list|tuple|numpy.ndarray|Tensor): The higher boundary
of uniform distribution.The data type is float32 and float64.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Examples:
.. code-block:: python
import paddle
from paddle.distribution import Uniform
# Without broadcasting, a single uniform distribution [3, 4]:
u1 = Uniform(low=3.0, high=4.0)
# 2 distributions [1, 3], [2, 4]
u2 = Uniform(low=[1.0, 2.0], high=[3.0, 4.0])
# 4 distributions
u3 = Uniform(low=[[1.0, 2.0], [3.0, 4.0]],
high=[[1.5, 2.5], [3.5, 4.5]])
# With broadcasting:
u4 = Uniform(low=3.0, high=[5.0, 6.0, 7.0])
# Complete example
value_tensor = paddle.to_tensor([0.8], dtype="float32")
uniform = Uniform([0.], [2.])
sample = uniform.sample([2])
# a random tensor created by uniform distribution with shape: [2, 1]
entropy = uniform.entropy()
# [0.6931472] with shape: [1]
lp = uniform.log_prob(value_tensor)
# [-0.6931472] with shape: [1]
p = uniform.probs(value_tensor)
# [0.5] with shape: [1]
import paddle
from paddle.distribution import Uniform
# Without broadcasting, a single uniform distribution [3, 4]:
u1 = Uniform(low=3.0, high=4.0)
# 2 distributions [1, 3], [2, 4]
u2 = Uniform(low=[1.0, 2.0], high=[3.0, 4.0])
# 4 distributions
u3 = Uniform(low=[[1.0, 2.0], [3.0, 4.0]],
high=[[1.5, 2.5], [3.5, 4.5]])
# With broadcasting:
u4 = Uniform(low=3.0, high=[5.0, 6.0, 7.0])
# Complete example
value_tensor = paddle.to_tensor([0.8], dtype="float32")
uniform = Uniform([0.], [2.])
sample = uniform.sample([2])
# a random tensor created by uniform distribution with shape: [2, 1]
entropy = uniform.entropy()
# [0.6931472] with shape: [1]
lp = uniform.log_prob(value_tensor)
# [-0.6931472] with shape: [1]
p = uniform.probs(value_tensor)
# [0.5] with shape: [1]
"""
def __init__(self, low, high, name=None):
......@@ -132,11 +134,11 @@ class Uniform(distribution.Distribution):
"""Generate samples of the specified shape.
Args:
shape (list): 1D `int32`. Shape of the generated samples.
seed (int): Python integer number.
shape (list): 1D `int32`. Shape of the generated samples.
seed (int): Python integer number.
Returns:
Tensor: A tensor with prepended dimensions shape.The data type is float32.
Tensor, A tensor with prepended dimensions shape. The data type is float32.
"""
if not _non_static_mode():
......@@ -179,10 +181,10 @@ class Uniform(distribution.Distribution):
"""Log probability density/mass function.
Args:
value (Tensor): The input tensor.
value (Tensor): The input tensor.
Returns:
Tensor: log probability.The data type is same with value.
Tensor, log probability.The data type is same with value.
"""
value = self._check_values_dtype_in_probs(self.low, value)
......@@ -216,10 +218,10 @@ class Uniform(distribution.Distribution):
"""Probability density/mass function.
Args:
value (Tensor): The input tensor.
value (Tensor): The input tensor.
Returns:
Tensor: probability.The data type is same with value.
Tensor, probability. The data type is same with value.
"""
value = self._check_values_dtype_in_probs(self.low, value)
......@@ -256,7 +258,7 @@ class Uniform(distribution.Distribution):
entropy(low, high) = \\log (high - low)
Returns:
Tensor: Shannon entropy of uniform distribution.The data type is float32.
Tensor, Shannon entropy of uniform distribution.The data type is float32.
"""
name = self.name + '_entropy'
......
......@@ -44,8 +44,7 @@ def celu(x, alpha=1.0, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
alpha (float, optional): The 'alpha' value of the CELU formulation. Default is 1.0.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -95,8 +94,7 @@ def elu(x, alpha=1.0, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
alpha (float, optional): The 'alpha' value of the ELU formulation. Default is 1.0.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -145,6 +143,8 @@ def gelu(x, approximate=False, name=None):
r"""
gelu activation.
The activation function of Gelu is calculated element by element. More information refers to :ref: `Gaussian Error Linear Units`.
if approximate is True
.. math::
......@@ -159,9 +159,8 @@ def gelu(x, approximate=False, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
approximate (bool, optional): Wether to enable approximation. Default is False.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
approximate (bool, optional): Whether to enable approximation. Default is False.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -214,9 +213,8 @@ def hardshrink(x, threshold=0.5, name=None):
Args:
x (Tensor): The input Tensor with data type float32, float64.
threshold (float, optional): The value of threshold for hardthrink. Default is 0.5
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
threshold (float, optional): The value of threshold for hardthrink. Default is 0.5.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -267,8 +265,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
x (Tensor): The input Tensor with data type float32, float64.
min (float, optional): The minimum value of the linear region range. Default is -1.
max (float, optional): The maximum value of the linear region range. Default is 1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -327,8 +324,7 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
x (Tensor): The input Tensor with data type float32, float64.
slope (float, optional): The slope of hardsigmoid function. Default is 0.1666667.
offset (float, optional): The offset of hardsigmoid function. Default is 0.5.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -385,8 +381,7 @@ def hardswish(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -432,8 +427,7 @@ def leaky_relu(x, negative_slope=0.01, name=None):
x (Tensor): The input Tensor with data type float32, float64.
negative_slope (float, optional): Slope of the activation function at
:math:`x < 0` . Default is 0.01.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -479,8 +473,7 @@ def prelu(x, weight, data_format="NCHW", name=None):
x (Tensor): The input Tensor with data type float32, float64.
weight (Tensor): The learnable parameter with data type same as ``x``.
The weight shape is [1] or [in], where `in` is the input channel of ``x``.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
data_format(str, optional): Data format that specifies the layout of input.
It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
......@@ -607,8 +600,7 @@ def rrelu(x, lower=1. / 8., upper=1. / 3., training=True, name=None):
lower (float, optional): The lower bound of uniform distribution. Default: 0.125.
upper (float, optional): The upper bound of uniform distribution. Default: 0.333.
training (bool, optional): Current mode is in training or others. Default is True.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -691,8 +683,7 @@ def relu(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -742,8 +733,7 @@ def log_sigmoid(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -803,8 +793,7 @@ def maxout(x, groups, axis=1, name=None):
is NHWC. If ``axis`` < 0, it works the same way as :math:`axis + D` ,
where D is the dimensions of ``x`` . ``axis`` only supports 1, 3 or -1.
Default is 1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type as ``x`` .
......@@ -861,8 +850,7 @@ def relu6(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -915,8 +903,7 @@ def selu(x,
x (Tensor): The input Tensor with data type float32, float64.
scale (float, optional): The value of scale(must be greater than 1.0) for selu. Default is 1.0507009873554804934193349852946
alpha (float, optional): The value of alpha(must be no less than zero) for selu. Default is 1.6732632423543772848170429916717
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -968,8 +955,7 @@ def silu(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1079,8 +1065,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
dimensions of ``x`` . If ``axis`` < 0, it works the same way as
:math:`axis + D` . Default is -1.
dtype (str, optional): The data type of the output tensor, can be float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same shape and data type (use ``dtype`` if it is
......@@ -1194,8 +1179,7 @@ def softplus(x, beta=1, threshold=20, name=None):
x (Tensor): The input Tensor with data type float32, float64.
beta (float, optional): The value of beta for softplus. Default is 1
threshold (float, optional): The value of threshold for softplus. Default is 20
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1249,8 +1233,7 @@ def softshrink(x, threshold=0.5, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
threshold (float, optional): The value of threshold(must be no less than zero) for softplus. Default is 0.5
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1296,8 +1279,7 @@ def softsign(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1335,8 +1317,7 @@ def swish(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1381,8 +1362,7 @@ def mish(x, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1418,8 +1398,7 @@ def tanhshrink(x, name=None):
Args:
x (Tensor): The input Tensor with data type float32, float64.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1466,8 +1445,7 @@ def thresholded_relu(x, threshold=1.0, name=None):
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
threshold (float, optional): The value of threshold for thresholded_relu. Default is 1.0
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type and shape as ``x`` .
......@@ -1524,8 +1502,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
preventing data type overflows. Supported dtype: float32, float64.
If ``dtype`` is None, the output Tensor has the same dtype as x.
Default is None.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same shape and data type (use ``dtype`` if it is
......@@ -1615,8 +1592,7 @@ def glu(x, axis=-1, name=None):
should be in range [-D, D), where D is the dimensions of ``x`` .
If ``axis`` < 0, it works the same way as :math:`axis + D` .
Default is -1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
A Tensor with the same data type as x. The size of the given aixs is
......@@ -1678,8 +1654,7 @@ def gumbel_softmax(x, temperature=1.0, hard=False, axis=-1, name=None):
in autograd. Default is False.
axis (int, optional): The axis along will be calculated softmax value.
Default is -1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
Sampled tensor of same shape as ``x`` from the Gumbel-Softmax distribution.
......
......@@ -176,6 +176,7 @@ def interpolate(x,
"""
This API resizes a batch of images.
The input must be a 3-D Tensor of the shape (num_batches, channels, in_w)
or 4-D (num_batches, channels, in_h, in_w), or a 5-D Tensor of the shape
(num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels),
......@@ -184,12 +185,13 @@ def interpolate(x,
and the resizing only applies on the three dimensions(depth, height and width).
Supporting resample methods:
'linear' : Linear interpolation
'bilinear' : Bilinear interpolation
'trilinear' : Trilinear interpolation
'nearest' : Nearest neighbor interpolation
'bicubic' : Bicubic interpolation
'area': Area interpolation
- 'linear' : Linear interpolation
- 'bilinear' : Bilinear interpolation
- 'trilinear' : Trilinear interpolation
- 'nearest' : Nearest neighbor interpolation
- 'bicubic' : Bicubic interpolation
- 'area': Area interpolation
Linear interpolation is the method of using a line connecting two known quantities
to determine the value of an unknown quantity between the two known quantities.
......@@ -226,13 +228,13 @@ def interpolate(x,
.. code-block:: text
For scale_factor:
# For scale_factor:
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
Linear interpolation:
# Linear interpolation:
if:
align_corners = False , align_mode = 0
input : (N,C,W_in)
......@@ -243,7 +245,7 @@ def interpolate(x,
output: (N,C,W_out) where:
W_out = W_{in} * scale_{factor}
Nearest neighbor interpolation:
# Nearest neighbor interpolation:
align_corners = False
input : (N,C,H_in,W_in)
......@@ -251,7 +253,7 @@ def interpolate(x,
H_out = floor (H_{in} * scale_{factor})
W_out = floor (W_{in} * scale_{factor})
Bilinear interpolation:
# Bilinear interpolation:
if:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
......@@ -264,7 +266,7 @@ def interpolate(x,
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
Bicubic interpolation:
# Bicubic interpolation:
if:
align_corners = False
input : (N,C,H_in,W_in)
......@@ -277,7 +279,7 @@ def interpolate(x,
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
Trilinear interpolation:
# Trilinear interpolation:
if:
align_corners = False , align_mode = 0
input : (N,C,D_in,H_in,W_in)
......@@ -907,15 +909,16 @@ def dropout(x,
training (bool, optional): A flag indicating whether it is in train phrase or not. Default True.
mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'].
1. upscale_in_train(default), upscale the output at training time
1. upscale_in_train(default), upscale the output at training time
- train: out = input * mask / ( 1.0 - dropout_prob )
- inference: out = input
- train: out = input * mask / ( 1.0 - dropout_prob )
- inference: out = input
2. downscale_in_infer, downscale the output at inference
2. downscale_in_infer, downscale the output at inference
- train: out = input * mask
- inference: out = input * (1.0 - dropout_prob)
- train: out = input * mask
- inference: out = input * (1.0 - dropout_prob)
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
......@@ -1776,12 +1779,12 @@ def linear(x, weight, bias=None, name=None):
def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
r"""
Label smoothing is a mechanism to regularize the classifier layer and is called
label-smoothing regularization (LSR).
label-smoothing regularization (LSR).Label smoothing is proposed to encourage
the model to be less confident, since optimizing the log-likelihood of the
correct label directly may cause overfitting and reduce the ability of the
model to adapt.
Label smoothing is proposed to encourage the model to be less confident,
since optimizing the log-likelihood of the correct label directly may
cause overfitting and reduce the ability of the model to adapt. Label
smoothing replaces the ground-truth label :math:`y` with the weighted sum
Label smoothing replaces the ground-truth label :math:`y` with the weighted sum
of itself and some fixed distribution :math:`\mu`. For class :math:`k`,
i.e.
......
......@@ -854,15 +854,18 @@ def hsigmoid_loss(input,
"""
The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity
and speed up the model training, especially the training of language model.
Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier.
For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on
the path, and sum them to get a total cost.
Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
Comparing to softmax, hsigmoid can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
represents the number of classes or the size of word dict.
The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_. For the custom
tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
The API supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_.
For the custom tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict.
2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table.
......@@ -1732,9 +1735,7 @@ def margin_cross_entropy(logits,
.. hint::
The API supports single GPU and multi GPU, and don't supports CPU.
For data parallel mode, set ``group=False``.
For model parallel mode, set ``group=None`` or the group instance return by paddle.distributed.new_group.
And logits.shape[-1] can be different at each rank.
......@@ -1757,12 +1758,12 @@ def margin_cross_entropy(logits,
Default value is `'mean'`.
Returns:
``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
`return_softmax` is False, otherwise the tuple \
(loss, softmax), softmax is shard_softmax when \
using model parallel, otherwise softmax is in \
the same shape with input logits. If ``reduction == None``, \
the shape of loss is ``[N, 1]``, otherwise the shape is ``[1]``.
Tensor|tuple[Tensor, Tensor], return the cross entropy loss if
`return_softmax` is False, otherwise the tuple (loss, softmax),
softmax is shard_softmax when using model parallel, otherwise
softmax is in the same shape with input logits. If
``reduction == None``, the shape of loss is ``[N, 1]``, otherwise
the shape is ``[1]``.
Examples:
......
......@@ -31,7 +31,7 @@ __all__ = []
def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
r"""
This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes
Normalize ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes
.. math::
......@@ -45,7 +45,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
Parameters:
x (Tensor): The input tensor could be N-D tensor, and the input data type could be float32 or float64.
p (float|int, optional): The exponent value in the norm formulation. Default: 2
p (float|int, optional): The exponent value in the norm formulation. Default: 2.
axis (int, optional): The axis on which to apply normalization. If `axis < 0`, the dimension to normalization is `x.ndim + axis`. -1 is the last dimension.
epsilon (float, optional): Small float added to denominator to avoid dividing by zero. Default is 1e-12.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
......
......@@ -124,7 +124,7 @@ def grid_sample(x,
align_corners=True,
name=None):
"""
This operation samples input X by using bilinear interpolation or
Sample input X by using bilinear interpolation or
nearest interpolation based on flow field grid, which is usually
generated by :code:`affine_grid` . When the input X is 4-D Tensor,
the grid of shape [N, H, W, 2] is the concatenation of (x, y)
......@@ -209,6 +209,7 @@ def grid_sample(x,
None by default.
Returns:
Tensor, The shape of output is [N, C, grid_H, grid_W] or [N, C, grid_D, grid_H, grid_W] in which `grid_D` is the depth of grid,
`grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor.
......
......@@ -132,12 +132,12 @@ def make_scheduler(*,
skip_first(int, optional): The number of first steps to drop, not participate in the state transform, and at ProfilerState.CLOSED state. Default value is 0.
Returns:
A scheduler function, conforms to above state transform setting. The function will takes one parameter step_num, and returns corresponding ProfilerState.
A scheduler function, conforms to above state transform setting. The function will takes one parameter `step_num`, and returns corresponding ProfilerState.
Examples:
1. profiling range [2, 5]
1. profiling range [2, 5].
Assume batch 0: closed, batch 1: ready, batch [2, 5] record
Assume batch 0: closed, batch 1: ready, batch [2, 5] record.
.. code-block:: python
:name: code-example1
......@@ -146,9 +146,9 @@ def make_scheduler(*,
profiler.make_scheduler(closed=1, ready=1, record=4, repeat=1)
2. profiling range [3,6], [9,12], [15,18]...
2. profiling range [3,6], [9,12], [15,18].
Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat
Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat.
.. code-block:: python
:name: code-example2
......@@ -196,12 +196,12 @@ def export_chrome_tracing(dir_name: str,
worker_name: Optional[str] = None) -> Callable:
r"""
Return a callable, used for outputing tracing data to chrome tracing format file.
The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
if worker_name is not set, the default name is [hostname]_[pid].
The output file will be saved in directory ``dir_name``, and file name will be set as `worker_name`.
if `worker_name` is not set, the default name is `[hostname]_[pid]`.
Args:
dir_name(str): Directory to save profiling data.
worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].
worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`.
Returns:
A callable, which takes a Profiler object as parameter and calls its export method to save data to chrome tracing format file.
......@@ -246,12 +246,12 @@ def export_protobuf(dir_name: str,
worker_name: Optional[str] = None) -> Callable:
r"""
Return a callable, used for outputing tracing data to protobuf file.
The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
if worker_name is not set, the default name is [hostname]_[pid].
The output file will be saved in directory ``dir_name``, and file name will be set as ``worker_name``.
if ``worker_name`` is not set, the default name is `[hostname]_[pid]`.
Args:
dir_name(str): Directory to save profiling data.
worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].
worker_name(str, optional): Prefix of the file name saved, default is `[hostname]_[pid]`.
Returns:
A callable, which takes a Profiler object as parameter and calls its export method to save data to protobuf file.
......@@ -317,7 +317,7 @@ class Profiler:
If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
which means profiling range [start_batch, end_batch).
on_trace_ready (Callable, optional): Callable object, serves as callback function, and takes the Profiler object as parameter, which provides a way for users to do post-processing.
This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing <api_paddle_profiler_export_chrome_tracing>` (./profiler_log/).
This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing <api_paddle_profiler_export_chrome_tracing>`.
timer_only (bool, optional): If it is True, the cost of Dataloader and every step of the model will be count without profiling. Otherwise, the model will
be timed and profiled. Default: False.
record_shapes (bool, optional): If it is True, collect op's input shape information. Default: False.
......@@ -339,7 +339,7 @@ class Profiler:
#train()
p.step()
2. profiling range [2,4], [7, 9], [11,13]
2. profiling range [2,4], [7, 9], [11,13].
.. code-block:: python
:name: code-example2
......@@ -354,7 +354,7 @@ class Profiler:
#train()
p.step()
3. Use profiler without context manager, and use default parameters
3. Use profiler without context manager, and use default parameters.
.. code-block:: python
:name: code-example3
......@@ -369,7 +369,7 @@ class Profiler:
p.stop()
p.summary()
4. Use profiler to get throughput and cost of the model
4. Use profiler to get throughput and cost of the model.
.. code-block:: python
:name: code-example-timer1
......@@ -399,8 +399,7 @@ class Profiler:
dataset = RandomDataset(20 * 4)
simple_net = SimpleNet()
opt = paddle.optimizer.SGD(learning_rate=1e-3,
parameters=simple_net.parameters())
opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters())
BATCH_SIZE = 4
loader = paddle.io.DataLoader(
dataset,
......@@ -531,7 +530,7 @@ class Profiler:
prof.stop()
'''
# Timing only without profiling
# Timing only without profiling.
benchmark().begin()
if not self.timer_only or self.emit_nvtx:
utils._is_profiler_used = True
......@@ -584,7 +583,7 @@ class Profiler:
if self.profile_memory:
disable_memory_recorder()
# self.current_state -> CLOSED
# In this situation, RECORD state is regarded as RECORD_AND_RETURN
# In this situation, RECORD state is regarded as RECORD_AND_RETURN.
if self.record_event:
self.record_event.end()
self.record_event = None
......@@ -607,7 +606,7 @@ class Profiler:
Args:
num_samples (int|None, optional): Specifies the batch size of every step of the model
that is used to compute throughput when timer_only is True. Default: None.
that is used to compute throughput when `timer_only` is True. Default: None.
Examples:
.. code-block:: python
......@@ -645,7 +644,7 @@ class Profiler:
r"""
Get statistics for current step. If the function is called at certain iteration
intervals, the result is the average of all steps between the previous call and
this call. Statistics are as follows
this call. Statistics are as follows:
1. reader_cost: the cost of loading data measured in seconds.
......@@ -751,7 +750,7 @@ class Profiler:
Args:
path(str): file path of the output.
format(str, optional): output format, can be chosen from ['json', 'pb], 'json' for chrome tracing and 'pb' for protobuf, default value is "json".
format(str, optional): output format, can be chosen from ['json', 'pb'], 'json' for chrome tracing and 'pb' for protobuf, default value is 'json'.
Examples:
......
......@@ -36,8 +36,10 @@ class RecordEvent(ContextDecorator):
Interface for recording a time range by user defined.
Args:
name(str): Name of the record event
event_type(TracerEventType, optional): Optional, default value is TracerEventType.PythonUserDefined. It is reserved for internal purpose, and it is better not to specify this parameter.
name (str): Name of the record event.
event_type (TracerEventType, optional): Optional, default value is
`TracerEventType.PythonUserDefined`. It is reserved for internal
purpose, and it is better not to specify this parameter.
Examples:
.. code-block:: python
......@@ -59,7 +61,7 @@ class RecordEvent(ContextDecorator):
record_event.end()
**Note**:
RecordEvent will take effect only when :ref:`Profiler <api_paddle_profiler_Profiler>` is on and at the state of RECORD.
RecordEvent will take effect only when :ref:`Profiler <api_paddle_profiler_Profiler>` is on and at the state of `RECORD`.
"""
def __init__(
......@@ -134,7 +136,7 @@ def load_profiler_result(filename: str):
filename(str): Name of the exported protobuf file of profiler data.
Returns:
ProfilerResult object, which stores profiling data.
``ProfilerResult`` object, which stores profiling data.
Examples:
.. code-block:: python
......
......@@ -4119,9 +4119,8 @@ def lerp_(x, y, weight, name=None):
def erfinv(x, name=None):
r"""
The inverse error function of x.
The inverse error function of x. Please refer to :ref:`api_paddle_erf`
Equation:
.. math::
erfinv(erf(x)) = x.
......@@ -4234,7 +4233,6 @@ def deg2rad(x, name=None):
r"""
Convert each of the elements of input x from degrees to angles in radians.
Equation:
.. math::
deg2rad(x)=\pi * x / 180
......@@ -4250,7 +4248,6 @@ def deg2rad(x, name=None):
.. code-block:: python
import paddle
import numpy as np
x1 = paddle.to_tensor([180.0, -180.0, 360.0, -360.0, 90.0, -90.0])
result1 = paddle.deg2rad(x1)
......@@ -4676,18 +4673,18 @@ def angle(x, name=None):
return out
def heaviside(x, y, name=None):
"""
r"""
Computes the Heaviside step function determined by corresponding element in y for each element in x. The equation is
.. math::
heaviside(x, y)=
\left\{
\\begin{array}{lcl}
0,& &\\text{if} \ x < 0, \\\\
y,& &\\text{if} \ x = 0, \\\\
1,& &\\text{if} \ x > 0.
\begin{array}{lcl}
0,& &\text{if} \ x < 0, \\
y,& &\text{if} \ x = 0, \\
1,& &\text{if} \ x > 0.
\end{array}
\\right.
\right.
Note:
``paddle.heaviside`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
......@@ -4713,7 +4710,7 @@ def heaviside(x, y, name=None):
paddle.heaviside(x, y)
# [[0. , 0.20000000, 1. ],
# [0. , 1. , 0.30000001]]
"""
"""
op_type = 'elementwise_heaviside'
axis = -1
act = None
......
......@@ -293,7 +293,7 @@ def CUDAExtension(sources, *args, **kwargs):
**kwargs(dict[option], optional): Specify other arguments same as ``setuptools.Extension`` .
Returns:
setuptools.Extension: An instance of setuptools.Extension
setuptools.Extension: An instance of setuptools.Extension.
"""
kwargs = normalize_extension_kwargs(kwargs, use_cuda=True)
# Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册