未验证 提交 420570c9 编写于 作者: S sunzhongkai588 提交者: GitHub

paddle/nn/functional docs' bug fix (#34580)

* fix paddle.optimizer test=document_fix

* fix paddle.optimizer test=document_fix

* fix bugs in paddle.nn.functional document test=document_fix

* fix bugs in paddle.nn.functional document test=document_fix

* fix bugs in paddle.nn.functional document test=document_fix

* fix bugs in paddle.nn.functional document test=document_fix
上级 d9e63a81
......@@ -7097,9 +7097,9 @@ def dice_loss(input, label, epsilon=0.00001, name=None):
.. math::
dice\_loss &= 1 - \\frac{2 * intersection\_area}{total\_area} \\\\
&= \\frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\\\
&= \\frac{(union\_area - intersection\_area)}{total\_area}
dice\_loss &= 1 - \frac{2 * intersection\_area}{total\_area} \\
&= \frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\
&= \frac{(union\_area - intersection\_area)}{total\_area}
Parameters:
......@@ -13065,8 +13065,8 @@ def log_loss(input, label, epsilon=1e-4, name=None):
.. math::
Out = -label * \\log{(input + \\epsilon)}
- (1 - label) * \\log{(1 - input + \\epsilon)}
Out = -label * \log{(input + \epsilon)}
- (1 - label) * \log{(1 - input + \epsilon)}
Args:
input (Tensor|list): A 2-D tensor with shape [N x 1], where N is the
......@@ -14500,17 +14500,17 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
.. math::
dkernel[0] &= dilations[0] \\times (kernel\_sizes[0] - 1) + 1
dkernel[0] &= dilations[0] \times (kernel\_sizes[0] - 1) + 1
dkernel[1] &= dilations[1] \\times (kernel\_sizes[1] - 1) + 1
dkernel[1] &= dilations[1] \times (kernel\_sizes[1] - 1) + 1
hout &= \\frac{H + paddings[0] + paddings[2] - dkernel[0]}{strides[0]} + 1
hout &= \frac{H + paddings[0] + paddings[2] - dkernel[0]}{strides[0]} + 1
wout &= \\frac{W + paddings[1] + paddings[3] - dkernel[1]}{strides[1]} + 1
wout &= \frac{W + paddings[1] + paddings[3] - dkernel[1]}{strides[1]} + 1
Cout &= C \\times kernel\_sizes[0] \\times kernel\_sizes[1]
Cout &= C \times kernel\_sizes[0] \times kernel\_sizes[1]
Lout &= hout \\times wout
Lout &= hout \times wout
Parameters:
......
......@@ -37,7 +37,7 @@ def elu(x, alpha=1.0, name=None):
.. math::
elu(x) = max(0, x) + min(0, \\alpha * (e^{x}-1))
elu(x) = max(0, x) + min(0, \alpha * (e^{x}-1))
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -91,13 +91,13 @@ def gelu(x, approximate=False, name=None):
.. math::
gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3})))
gelu(x) = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))
else
.. math::
gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}}))
gelu(x) = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -144,13 +144,13 @@ def hardshrink(x, threshold=0.5, name=None):
.. math::
hardshrink(x)=
\\left\\{
\\begin{aligned}
&x, & & if \\ x > threshold \\\\
&x, & & if \\ x < -threshold \\\\
&0, & & if \\ others
\\end{aligned}
\\right.
\left\{
\begin{array}{rcl}
x,& &if \ {x > threshold} \\
x,& &if \ {x < -threshold} \\
0,& &if \ {others} &
\end{array}
\right.
Args:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -192,11 +192,14 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
.. math::
hardtanh(x)= \\begin{cases}
max, \\text{if } x > max \\\\
min, \\text{if } x < min \\\\
x, \\text{otherwise}
\\end{cases}
hardtanh(x)=
\left\{
\begin{array}{cll}
max,& & \text{if } x > max \\
min,& & \text{if } x < min \\
x,& & \text{otherwise}
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -246,13 +249,13 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
.. math::
hardsigmoid(x)=
\\left\\{
\\begin{aligned}
&0, & & \\text{if } x \\leq -3 \\\\
&1, & & \\text{if } x \\geq 3 \\\\
&slope * x + offset, & & \\text{otherwise}
\\end{aligned}
\\right.
\left\{
\begin{array}{lcl}
0, & &\text{if } \ x \leq -3 \\
1, & &\text{if } \ x \geq 3 \\
slope * x + offset, & &\text{otherwise}
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -302,13 +305,13 @@ def hardswish(x, name=None):
.. math::
hardswish(x)=
\\left\\{
\\begin{aligned}
&0, & & \\text{if } x \\leq -3 \\\\
&x, & & \\text{if } x \\geq 3 \\\\
&\\frac{x(x+3)}{6}, & & \\text{otherwise}
\\end{aligned}
\\right.
\left\{
\begin{array}{cll}
0 &, & \text{if } x \leq -3 \\
x &, & \text{if } x \geq 3 \\
\frac{x(x+3)}{6} &, & \text{otherwise}
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -345,13 +348,13 @@ def leaky_relu(x, negative_slope=0.01, name=None):
leaky_relu activation
.. math::
leaky\\_relu(x)=
\\left\\{
\\begin{aligned}
&x, & & if \\ x >= 0 \\\\
&negative\_slope * x, & & otherwise \\\\
\\end{aligned}
\\right. \\\\
leaky\_relu(x)=
\left\{
\begin{array}{rcl}
x, & & if \ x >= 0 \\
negative\_slope * x, & & otherwise \\
\end{array}
\right.
Args:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -513,7 +516,7 @@ def log_sigmoid(x, name=None):
.. math::
log\\_sigmoid(x) = log \\frac{1}{1 + e^{-x}}
log\_sigmoid(x) = log \frac{1}{1 + e^{-x}}
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -554,12 +557,15 @@ def maxout(x, groups, axis=1, name=None):
.. math::
&out_{si+j} = \\max_{k} x_{gsi + sk + j} \\\\
&g = groups \\\\
&s = \\frac{input.size}{num\\_channels} \\\\
&0 \\le i < \\frac{num\\_channels}{groups} \\\\
&0 \\le j < s \\\\
&0 \\le k < groups
\begin{array}{l}
&out_{si+j} = \max_{k} x_{gsi + sk + j} \\
&g = groups \\
&s = \frac{input.size}{num\_channels} \\
&0 \le i < \frac{num\_channels}{groups} \\
&0 \le j < s \\
&0 \le k < groups
\end{array}
Parameters:
x (Tensor): The input is 4-D Tensor with shape [N, C, H, W] or [N, H, W, C], the data type
......@@ -670,10 +676,12 @@ def selu(x,
.. math::
selu(x)= scale *
\\begin{cases}
x, \\text{if } x > 0 \\\\
alpha * e^{x} - alpha, \\text{if } x <= 0
\\end{cases}
\left\{
\begin{array}{lcl}
x,& &\text{if } \ x > 0 \\
alpha * e^{x} - alpha,& &\text{if } \ x <= 0
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -719,9 +727,11 @@ def selu(x,
def silu(x, name=None):
"""
silu activation.
.. math:
r"""
silu activation
.. math::
silu(x) = \frac{x}{1 + e^{-x}}
Parameters:
......@@ -734,6 +744,7 @@ def silu(x, name=None):
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
......@@ -778,7 +789,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
.. math::
softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])}
softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])}
Example:
......@@ -923,8 +934,8 @@ def softplus(x, beta=1, threshold=20, name=None):
.. math::
softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\
\\text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.}
softplus(x) = \frac{1}{beta} * \log(1 + e^{beta * x}) \\
\text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.}
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -968,11 +979,14 @@ def softshrink(x, threshold=0.5, name=None):
.. math::
softshrink(x)= \\begin{cases}
x - threshold, \\text{if } x > threshold \\\\
x + threshold, \\text{if } x < -threshold \\\\
0, \\text{otherwise}
\\end{cases}
softshrink(x)=
\left\{
\begin{array}{rcl}
x - threshold,& & \text{if } x > threshold \\
x + threshold,& & \text{if } x < -threshold \\
0,& & \text{otherwise}
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -1019,7 +1033,7 @@ def softsign(x, name=None):
.. math::
softsign(x) = \\frac{x}{1 + |x|}
softsign(x) = \frac{x}{1 + |x|}
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -1056,7 +1070,7 @@ def swish(x, name=None):
.. math::
swish(x) = \\frac{x}{1 + e^{-x}}
swish(x) = \frac{x}{1 + e^{-x}}
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -1134,10 +1148,14 @@ def thresholded_relu(x, threshold=1.0, name=None):
.. math::
thresholded\\_relu(x) = \\begin{cases}
x, \\text{if } x > threshold \\\\
0, \\text{otherwise}
\\end{cases}
thresholded\_relu(x) =
\left\{
\begin{array}{rl}
x,& \text{if } \ x > threshold \\
0,& \text{otherwise}
\end{array}
\right.
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......@@ -1181,10 +1199,10 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
.. math::
\\begin{aligned}
log\\_softmax[i, j] &= log(softmax(x)) \\\\
&= log(\\frac{\\exp(X[i, j])}{\\sum_j(\\exp(X[i, j])})
\\end{aligned}
\begin{aligned}
log\_softmax[i, j] &= log(softmax(x)) \\
&= log(\frac{\exp(X[i, j])}{\sum_j(\exp(X[i, j])})
\end{aligned}
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
......
......@@ -180,18 +180,18 @@ def binary_cross_entropy_with_logits(logit,
First this operator calculate loss function as follows:
.. math::
Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit))
Out = -Labels * \log(\sigma(Logit)) - (1 - Labels) * \log(1 - \sigma(Logit))
We know that :math:`\\sigma(Logit) = \\frac{1}{1 + e^{-Logit}}`. By substituting this we get:
We know that :math:`\sigma(Logit) = \frac{1}{1 + e^{-Logit}}`. By substituting this we get:
.. math::
Out = Logit - Logit * Labels + \\log(1 + e^{-Logit})
Out = Logit - Logit * Labels + \log(1 + e^{-Logit})
For stability and to prevent overflow of :math:`e^{-Logit}` when Logit < 0,
we reformulate the loss as follows:
.. math::
Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + e^{-\|Logit\|})
Out = \max(Logit, 0) - Logit * Labels + \log(1 + e^{-\|Logit\|})
Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the
weight tensor on the loss `Out`. The ``weight`` tensor will attach different
......@@ -450,17 +450,17 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):
.. math::
loss(x,y) = \\frac{1}{n}\\sum_{i}z_i
loss(x,y) = \frac{1}{n}\sum_{i}z_i
where z_i is given by:
.. math::
\\mathop{z_i} = \\left\\{\\begin{array}{rcl}
0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\
\mathop{z_i} = \left\{\begin{array}{rcl}
0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\
delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise}
\\end{array} \\right.
\end{array} \right.
Parameters:
input (Tensor): Input tensor, the data type is float32 or float64. Shape is
......@@ -631,17 +631,17 @@ def l1_loss(input, label, reduction='mean', name=None):
If `reduction` set to ``'none'``, the loss is:
.. math::
Out = \\lvert input - label \\rvert
Out = \lvert input - label \rvert
If `reduction` set to ``'mean'``, the loss is:
.. math::
Out = MEAN(\\lvert input - label \\rvert)
Out = MEAN(\lvert input - label \rvert)
If `reduction` set to ``'sum'``, the loss is:
.. math::
Out = SUM(\\lvert input - label\\rvert)
Out = SUM(\lvert input - label \rvert)
Parameters:
......@@ -1563,15 +1563,15 @@ def sigmoid_focal_loss(logit,
This operator measures focal loss function as follows:
.. math::
Out = -Labels * alpha * {(1 - \\sigma(Logit))}^{gamma}\\log(\\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\\sigma(Logit)}^{gamma}\\log(1 - \\sigma(Logit))
Out = -Labels * alpha * {(1 - \sigma(Logit))}^{gamma}\log(\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\sigma(Logit)}^{gamma}\log(1 - \sigma(Logit))
We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\exp(-Logit)}`.
We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`.
Then, if :attr:`normalizer` is not None, this operator divides the
normalizer tensor on the loss `Out`:
.. math::
Out = \\frac{Out}{normalizer}
Out = \frac{Out}{normalizer}
Finally, this operator applies reduce operation on the loss.
If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`.
......
......@@ -34,12 +34,12 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
.. math::
y = \\frac{x}{ \\max\\left( \\lvert \\lvert x \\rvert \\rvert_p, epsilon\\right) }
y = \frac{x}{ \max\left( \lvert \lvert x \rvert \rvert_p, epsilon\right) }
.. math::
\\lvert \\lvert x \\rvert \\rvert_p = \\left( \\sum_i {\\lvert x_i \\rvert^p} \\right)^{1/p}
\lvert \lvert x \rvert \rvert_p = \left( \sum_i {\lvert x_i \rvert^p} \right)^{1/p}
where, :math:`\\sum_i{\\lvert x_i \\rvert^p}` is calculated along the ``axis`` dimension.
where, :math:`\sum_i{\lvert x_i \rvert^p}` is calculated along the ``axis`` dimension.
Parameters:
......@@ -432,7 +432,7 @@ def local_response_norm(x,
.. math::
Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C-1, i + size/2)}_{j = \\max(0, i - size/2)}(Input(j, x, y))^2\\right)^{\\beta}
Output(i, x, y) = Input(i, x, y) / \left(k + \alpha \sum\limits^{\min(C-1, i + size/2)}_{j = \max(0, i - size/2)}(Input(j, x, y))^2\right)^{\beta}
In the above equation:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册