paddle/nn/functional docs' bug fix (#34580)

* fix paddle.optimizer test=document_fix * fix paddle.optimizer test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix

paddle/nn/functional docs' bug fix (#34580)
* fix paddle.optimizer test=document_fix * fix paddle.optimizer test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix * fix bugs in paddle.nn.functional document test=document_fix
420570c9 · sunzhongkai588 · GitHub · d9e63a81 · 420570c9 · 420570c9
4 changed file
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -7097,9 +7097,9 @@ def dice_loss(input, label, epsilon=0.00001, name=None):

    .. math::

-        dice\_loss &= 1 - \\frac{2 * intersection\_area}{total\_area} \\\\
-                  &= \\frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\\\
-                  &= \\frac{(union\_area - intersection\_area)}{total\_area}
+        dice\_loss &= 1 - \frac{2 * intersection\_area}{total\_area} \\
+                  &= \frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\
+                  &= \frac{(union\_area - intersection\_area)}{total\_area}


    Parameters:
@@ -13065,8 +13065,8 @@ def log_loss(input, label, epsilon=1e-4, name=None):

    .. math::

-        Out = -label * \\log{(input + \\epsilon)}
-              - (1 - label) * \\log{(1 - input + \\epsilon)}
+        Out = -label * \log{(input + \epsilon)}
+              - (1 - label) * \log{(1 - input + \epsilon)}

    Args:
        input (Tensor|list):  A 2-D tensor with shape [N x 1], where N is the
@@ -14500,17 +14500,17 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):

    .. math::

-        dkernel[0] &= dilations[0] \\times (kernel\_sizes[0] - 1) + 1
+        dkernel[0] &= dilations[0] \times (kernel\_sizes[0] - 1) + 1

-        dkernel[1] &= dilations[1] \\times (kernel\_sizes[1] - 1) + 1
+        dkernel[1] &= dilations[1] \times (kernel\_sizes[1] - 1) + 1

-        hout &= \\frac{H + paddings[0] + paddings[2] - dkernel[0]}{strides[0]} + 1
+        hout &= \frac{H + paddings[0] + paddings[2] - dkernel[0]}{strides[0]} + 1

-        wout &= \\frac{W + paddings[1] + paddings[3] - dkernel[1]}{strides[1]} + 1
+        wout &= \frac{W + paddings[1] + paddings[3] - dkernel[1]}{strides[1]} + 1

-        Cout &= C \\times kernel\_sizes[0] \\times kernel\_sizes[1]
+        Cout &= C \times kernel\_sizes[0] \times kernel\_sizes[1]

-        Lout &= hout \\times wout
+        Lout &= hout \times wout


    Parameters:

--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -37,7 +37,7 @@ def elu(x, alpha=1.0, name=None):

    .. math::

-        elu(x) = max(0, x) + min(0, \\alpha * (e^{x}-1))
+        elu(x) = max(0, x) + min(0, \alpha * (e^{x}-1))

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -91,13 +91,13 @@ def gelu(x, approximate=False, name=None):

    .. math::

-        gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3})))
+        gelu(x) = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))

    else

    .. math::

-        gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}}))
+        gelu(x) = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -144,13 +144,13 @@ def hardshrink(x, threshold=0.5, name=None):
    .. math::

        hardshrink(x)=
-            \\left\\{
-            \\begin{aligned}
-            &x, & & if \\ x > threshold \\\\
-            &x, & & if \\ x < -threshold \\\\
-            &0, & & if \\ others
-            \\end{aligned}
-            \\right.
+            \left\{
+                \begin{array}{rcl}
+                x,&  &if \ {x > threshold}  \\
+                x,&  &if \ {x < -threshold}   \\
+                0,&  &if \ {others} &
+                \end{array}
+            \right.

    Args:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -192,11 +192,14 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):

    .. math::

-        hardtanh(x)= \\begin{cases}
-                        max, \\text{if } x > max \\\\
-                        min, \\text{if } x < min \\\\
-                        x,  \\text{otherwise}
-                      \\end{cases}
+        hardtanh(x)=
+            \left\{
+                \begin{array}{cll}
+                    max,& & \text{if } x > max \\
+                    min,& & \text{if } x < min \\
+                    x,& & \text{otherwise}
+                \end{array}
+            \right.

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -246,13 +249,13 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
    .. math::

        hardsigmoid(x)=
-            \\left\\{
-            \\begin{aligned}
-            &0, & & \\text{if } x \\leq -3 \\\\
-            &1, & & \\text{if } x \\geq 3 \\\\
-            &slope * x + offset, & & \\text{otherwise}
-            \\end{aligned}
-            \\right.
+            \left\{
+                \begin{array}{lcl}
+                0, & &\text{if } \ x \leq -3 \\
+                1, & &\text{if } \ x \geq 3 \\
+                slope * x + offset, & &\text{otherwise}
+                \end{array}
+            \right.

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -302,13 +305,13 @@ def hardswish(x, name=None):
    .. math::

        hardswish(x)=
-            \\left\\{
-            \\begin{aligned}
-            &0, & & \\text{if } x \\leq -3 \\\\
-            &x, & & \\text{if } x \\geq 3 \\\\
-            &\\frac{x(x+3)}{6}, & & \\text{otherwise}
-            \\end{aligned}
-            \\right.
+            \left\{
+                \begin{array}{cll}
+                0 &, & \text{if } x \leq -3 \\
+                x &, & \text{if } x \geq 3 \\
+                \frac{x(x+3)}{6} &, & \text{otherwise}
+                \end{array}
+            \right.

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -345,13 +348,13 @@ def leaky_relu(x, negative_slope=0.01, name=None):
    leaky_relu activation

    .. math::
-        leaky\\_relu(x)=
-            \\left\\{
-            \\begin{aligned}
-            &x, & & if \\ x >= 0 \\\\
-            &negative\_slope * x, & & otherwise \\\\
-            \\end{aligned}
-            \\right. \\\\
+        leaky\_relu(x)=
+        \left\{
+            \begin{array}{rcl}
+                x, & & if \ x >= 0 \\
+                negative\_slope * x, & & otherwise \\
+            \end{array}
+        \right.

    Args:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -513,7 +516,7 @@ def log_sigmoid(x, name=None):

    .. math::

-        log\\_sigmoid(x) = log \\frac{1}{1 + e^{-x}}
+        log\_sigmoid(x) = log \frac{1}{1 + e^{-x}}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -554,12 +557,15 @@ def maxout(x, groups, axis=1, name=None):

    .. math::

-        &out_{si+j} = \\max_{k} x_{gsi + sk + j} \\\\
-        &g = groups \\\\
-        &s = \\frac{input.size}{num\\_channels} \\\\
-        &0 \\le i < \\frac{num\\_channels}{groups} \\\\
-        &0 \\le j < s \\\\
-        &0 \\le k < groups
+        \begin{array}{l}
+        &out_{si+j} = \max_{k} x_{gsi + sk + j} \\
+        &g = groups \\
+        &s = \frac{input.size}{num\_channels} \\
+        &0 \le i < \frac{num\_channels}{groups} \\
+        &0 \le j < s \\
+        &0 \le k < groups
+        \end{array}
+

    Parameters:
        x (Tensor): The input is 4-D Tensor with shape [N, C, H, W] or [N, H, W, C], the data type
@@ -670,10 +676,12 @@ def selu(x,
    .. math::

        selu(x)= scale *
-                 \\begin{cases}
-                   x, \\text{if } x > 0 \\\\
-                   alpha * e^{x} - alpha, \\text{if } x <= 0
-                 \\end{cases}
+            \left\{
+                \begin{array}{lcl}
+                x,& &\text{if } \ x > 0 \\
+                alpha * e^{x} - alpha,& &\text{if } \ x <= 0
+                \end{array}
+            \right.

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -719,9 +727,11 @@ def selu(x,


 def silu(x, name=None):
-    """
-    silu activation.
-    .. math:
+    r"""
+    silu activation
+
+    .. math::
+
        silu(x) = \frac{x}{1 + e^{-x}}
    
    Parameters:
@@ -734,11 +744,12 @@ def silu(x, name=None):
    
    Examples:
        .. code-block:: python
-        import paddle
-        import paddle.nn.functional as F
-        
-        x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
-        out = F.silu(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
+
+            import paddle
+            import paddle.nn.functional as F
+            
+            x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
+            out = F.silu(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
    """

    if in_dygraph_mode():
@@ -778,7 +789,7 @@ def softmax(x, axis=-1, dtype=None, name=None):

    .. math::

-        softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])}
+        softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])}

    Example:

@@ -923,8 +934,8 @@ def softplus(x, beta=1, threshold=20, name=None):

    .. math::

-        softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\
-        \\text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.}
+        softplus(x) = \frac{1}{beta} * \log(1 + e^{beta * x}) \\
+        \text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -968,11 +979,14 @@ def softshrink(x, threshold=0.5, name=None):

    .. math::

-        softshrink(x)= \\begin{cases}
-                        x - threshold, \\text{if } x > threshold \\\\
-                        x + threshold, \\text{if } x < -threshold \\\\
-                        0,  \\text{otherwise}
-                      \\end{cases}
+        softshrink(x)= 
+            \left\{
+                \begin{array}{rcl}
+                x - threshold,& & \text{if } x > threshold \\
+                x + threshold,& & \text{if } x < -threshold \\
+                0,& &  \text{otherwise}
+            \end{array}
+            \right.

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -1019,7 +1033,7 @@ def softsign(x, name=None):

    .. math::

-        softsign(x) = \\frac{x}{1 + |x|}
+        softsign(x) = \frac{x}{1 + |x|}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -1056,7 +1070,7 @@ def swish(x, name=None):

    .. math::

-        swish(x) = \\frac{x}{1 + e^{-x}}
+        swish(x) = \frac{x}{1 + e^{-x}}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -1134,10 +1148,14 @@ def thresholded_relu(x, threshold=1.0, name=None):

    .. math::

-        thresholded\\_relu(x) = \\begin{cases}
-                                 x, \\text{if } x > threshold \\\\
-                                 0, \\text{otherwise}
-                                \\end{cases}
+        thresholded\_relu(x) = 
+            \left\{
+                \begin{array}{rl}
+                x,& \text{if } \ x > threshold \\
+                0,& \text{otherwise}
+                \end{array}
+            \right.
+

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.
@@ -1181,10 +1199,10 @@ def log_softmax(x, axis=-1, dtype=None, name=None):

    .. math::

-        \\begin{aligned} 
-        log\\_softmax[i, j] &= log(softmax(x)) \\\\
-        &= log(\\frac{\\exp(X[i, j])}{\\sum_j(\\exp(X[i, j])})
-        \\end{aligned}
+        \begin{aligned} 
+        log\_softmax[i, j] &= log(softmax(x)) \\
+        &= log(\frac{\exp(X[i, j])}{\sum_j(\exp(X[i, j])})
+        \end{aligned}

    Parameters:
        x (Tensor): The input Tensor with data type float32, float64.

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -180,18 +180,18 @@ def binary_cross_entropy_with_logits(logit,
    First this operator calculate loss function as follows:

    .. math::
-           Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit))
+           Out = -Labels * \log(\sigma(Logit)) - (1 - Labels) * \log(1 - \sigma(Logit))

-    We know that :math:`\\sigma(Logit) = \\frac{1}{1 + e^{-Logit}}`. By substituting this we get:
+    We know that :math:`\sigma(Logit) = \frac{1}{1 + e^{-Logit}}`. By substituting this we get:

    .. math::
-           Out = Logit - Logit * Labels + \\log(1 + e^{-Logit})
+           Out = Logit - Logit * Labels + \log(1 + e^{-Logit})

    For stability and to prevent overflow of :math:`e^{-Logit}` when Logit < 0,
    we reformulate the loss as follows:

    .. math::
-           Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + e^{-\|Logit\|})
+           Out = \max(Logit, 0) - Logit * Labels + \log(1 + e^{-\|Logit\|})

    Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the
    weight tensor on the loss `Out`. The ``weight`` tensor will attach different
@@ -450,17 +450,17 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):

    .. math::

-         loss(x,y) = \\frac{1}{n}\\sum_{i}z_i
+         loss(x,y) = \frac{1}{n}\sum_{i}z_i


    where z_i is given by:

    .. math::

-         \\mathop{z_i} = \\left\\{\\begin{array}{rcl}
-        0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\
+        \mathop{z_i} = \left\{\begin{array}{rcl}
+        0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\
        delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise}
-        \\end{array} \\right.
+        \end{array} \right.

    Parameters:
        input (Tensor): Input tensor, the data type is float32 or float64. Shape is
@@ -631,17 +631,17 @@ def l1_loss(input, label, reduction='mean', name=None):
    If `reduction` set to ``'none'``, the loss is:

    .. math::
-        Out = \\lvert input - label \\rvert
+        Out = \lvert input - label \rvert

    If `reduction` set to ``'mean'``, the loss is:

    .. math::
-        Out = MEAN(\\lvert input - label \\rvert)
+        Out = MEAN(\lvert input - label \rvert)

    If `reduction` set to ``'sum'``, the loss is:

    .. math::
-        Out = SUM(\\lvert input - label\\rvert)
+        Out = SUM(\lvert input - label \rvert)


    Parameters:
@@ -1563,15 +1563,15 @@ def sigmoid_focal_loss(logit,
    This operator measures focal loss function as follows: 

    .. math::
-           Out = -Labels * alpha * {(1 - \\sigma(Logit))}^{gamma}\\log(\\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\\sigma(Logit)}^{gamma}\\log(1 - \\sigma(Logit))
+           Out = -Labels * alpha * {(1 - \sigma(Logit))}^{gamma}\log(\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\sigma(Logit)}^{gamma}\log(1 - \sigma(Logit))

-    We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\exp(-Logit)}`. 
+    We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`. 

    Then, if :attr:`normalizer` is not None, this operator divides the
    normalizer tensor on the loss `Out`:

    .. math::
-           Out = \\frac{Out}{normalizer}
+           Out = \frac{Out}{normalizer}

    Finally, this operator applies reduce operation on the loss.
    If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`.

--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -34,12 +34,12 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):

    .. math::

-        y = \\frac{x}{ \\max\\left( \\lvert \\lvert x \\rvert \\rvert_p, epsilon\\right) }
+        y = \frac{x}{ \max\left( \lvert \lvert x \rvert \rvert_p, epsilon\right) }
    
    .. math::
-        \\lvert \\lvert x \\rvert \\rvert_p = \\left( \\sum_i {\\lvert x_i \\rvert^p}  \\right)^{1/p}
+        \lvert \lvert x \rvert \rvert_p = \left( \sum_i {\lvert x_i \rvert^p}  \right)^{1/p}

-    where, :math:`\\sum_i{\\lvert x_i \\rvert^p}` is calculated along the ``axis`` dimension.
+    where, :math:`\sum_i{\lvert x_i \rvert^p}` is calculated along the ``axis`` dimension.


    Parameters:
@@ -432,7 +432,7 @@ def local_response_norm(x,

        .. math::

-            Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C-1, i + size/2)}_{j = \\max(0, i - size/2)}(Input(j, x, y))^2\\right)^{\\beta}
+            Output(i, x, y) = Input(i, x, y) / \left(k + \alpha \sum\limits^{\min(C-1, i + size/2)}_{j = \max(0, i - size/2)}(Input(j, x, y))^2\right)^{\beta}

        In the above equation: