docs(mge): fix api docs of loss and tranform

GitOrigin-RevId: 4d4d32b3c4536c3d1ba993405fdde6bc5065af5b

docs(mge): fix api docs of loss and tranform
GitOrigin-RevId: 4d4d32b3c4536c3d1ba993405fdde6bc5065af5b
f33a92d6 · Megvii Engine Team · fd83e025 · f33a92d6 · f33a92d6 · f33a92d6
3 changed file
--- a/python_module/megengine/data/dataset/vision/imagenet.py
+++ b/python_module/megengine/data/dataset/vision/imagenet.py
@@ -33,16 +33,18 @@ class ImageNet(ImageFolder):
    r"""
    Load ImageNet from raw files or folder, expected folder looks like

-    ${root}/
-    |       [REQUIRED TAR FILES]
-    |-  ILSVRC2012_img_train.tar
-    |-  ILSVRC2012_img_val.tar
-    |-  ILSVRC2012_devkit_t12.tar.gz
-    |       [OPTIONAL IMAGE FOLDERS]
-    |-  train/cls/xxx.${img_ext}
-    |-  val/cls/xxx.${img_ext}
-    |-  ILSVRC2012_devkit_t12/data/meta.mat
-    |-  ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt
+    .. code-block:: bash
+
+        ${root}/
+        |       [REQUIRED TAR FILES]
+        |-  ILSVRC2012_img_train.tar
+        |-  ILSVRC2012_img_val.tar
+        |-  ILSVRC2012_devkit_t12.tar.gz
+        |       [OPTIONAL IMAGE FOLDERS]
+        |-  train/cls/xxx.${img_ext}
+        |-  val/cls/xxx.${img_ext}
+        |-  ILSVRC2012_devkit_t12/data/meta.mat
+        |-  ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt

    If the image folders don't exist, raw tar files are required to get extracted and processed.
    """

--- a/python_module/megengine/data/transform/vision/transform.py
+++ b/python_module/megengine/data/transform/vision/transform.py
@@ -159,7 +159,7 @@ class ToMode(VisionTransform):
    while the Neural Network might use CHW mode input tensor

    :param mode: Output mode of input. Use "CHW" mode by default.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, mode="CHW", *, order=None):
@@ -185,14 +185,14 @@ class Compose(VisionTransform):
    r"""
    Composes several transforms together.

-    :param transforms: List of ``VisionTransform`` to compose.
+    :param transforms: List of :class:`VisionTransform` to compose.
    :param batch_compose: Whether use shuffle_indices for batch data or not.
        If True, use original input sequence.
        Otherwise, the shuffle_indices will be used for transforms.
    :param shuffle_indices: Indices used for random shuffle, start at 1.
        For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform
        will be random shuffled, the 2nd and 4th transform will also be shuffled.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`

    Example:

@@ -265,7 +265,7 @@ class TorchTransformCompose(VisionTransform):
    such as Normalize and ToTensor in torchvision.

    :param transforms: The same with ``Compose``
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, transforms, *, order=None):
@@ -312,7 +312,7 @@ class Pad(VisionTransform):
    :param value: Padding value of image, could be a sequence of int or float.
        if it's float value, the dtype of image will be casted to float32 also.
    :param mask_value: Padding value of segmentation map.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, size=0, value=0, mask_value=0, *, order=None):
@@ -358,7 +358,7 @@ class Resize(VisionTransform):
        * cv2.INTER_AREA – resampling using pixel area relation.
        * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood.
        * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None):
@@ -477,7 +477,7 @@ class RandomResize(VisionTransform):
    r"""Resize the input data randomly.

    :param scale_range: .
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None):
@@ -525,7 +525,7 @@ class RandomCrop(VisionTransform):
    :param output_size: Target size of output image, with (height, width) shape.
    :param padding_size: The same with `size` in ``Pad``
    :param padding_value: The same with `value` in ``Pad``
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(
@@ -583,7 +583,7 @@ class RandomResizedCrop(VisionTransform):
    :param output_size: Target size of output image, with (height, width) shape.
    :param scale_range: Range of size of the origin size cropped. Default: (0.08, 1.0)
    :param ratio_range: Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(
@@ -667,7 +667,7 @@ class CenterCrop(VisionTransform):
    r"""Crops the given the input data at the center.

    :param output_size: Target size of output image, with (height, width) shape.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, output_size, *, order=None):
@@ -710,7 +710,7 @@ class RandomHorizontalFlip(VisionTransform):
    r"""Horizontally flip the input data randomly with a given probability.

    :param p: probability of the input data being flipped. Default: 0.5
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, prob: float = 0.5, *, order=None):
@@ -742,7 +742,7 @@ class RandomVerticalFlip(VisionTransform):
    r"""Vertically flip the input data randomly with a given probability.

    :param p: probability of the input data being flipped. Default: 0.5
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, prob: float = 0.5, *, order=None):
@@ -778,7 +778,7 @@ class Normalize(VisionTransform):

    :param mean: Sequence of means for each channel.
    :param std: Sequence of standard deviations for each channel.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, mean=0.0, std=1.0, *, order=None):
@@ -802,7 +802,7 @@ class GaussianNoise(VisionTransform):

    :param mean: Gaussian mean used to generate noise.
    :param std: Gaussian standard deviation used to generate noise.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, mean=0.0, std=1.0, *, order=None):
@@ -828,7 +828,7 @@ class BrightnessTransform(VisionTransform):

    :param value: How much to adjust the brightness. Can be any
        non negative number. 0 gives the original image
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, value, *, order=None):
@@ -859,7 +859,7 @@ class ContrastTransform(VisionTransform):

    :param value: How much to adjust the contrast. Can be any
        non negative number. 0 gives the original image
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, value, *, order=None):
@@ -890,7 +890,7 @@ class SaturationTransform(VisionTransform):

    :param value: How much to adjust the saturation. Can be any
        non negative number. 0 gives the original image
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, value, *, order=None):
@@ -921,7 +921,7 @@ class HueTransform(VisionTransform):

    :param value: How much to adjust the hue. Can be any number
        between 0 and 0.5, 0 gives the original image
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, value, *, order=None):
@@ -969,7 +969,7 @@ class ColorJitter(VisionTransform):
    :param hue: How much to jitter hue.
        Chosen uniformly from [-hue, hue] or the given [min, max].
        Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
-    :param order: The same with ``VisionTransform``
+    :param order: The same with :class:`VisionTransform`
    """

    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None):

--- a/python_module/megengine/functional/loss.py
+++ b/python_module/megengine/functional/loss.py
@@ -21,12 +21,12 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor:

    The mean absolute error can be described as:

-    .. math::
-        \ell(x,y) = mean\left(L \right)
+    .. math:: \ell(x,y) = mean\left(L \right)

    where

    .. math::
+
        L = \{l_1,\dots,l_N\}, \quad
        l_n = \left| x_n - y_n \right|,

@@ -36,11 +36,6 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor:
    :param pred: The predicted result from model.
    :param label: The ground truth to compare.

-    Shape:
-        - pred: :math:`(N, *)` where :math:`*` means any number of additional
-          dimensions
-        - label: :math:`(N, *)`. Same shape as ``pred``
-
    Examples:

    .. testcode::
@@ -72,12 +67,12 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:

    The mean squared error can be described as:

-    .. math::
-        \ell(x, y) = mean\left( L \right)
+    .. math:: \ell(x, y) = mean\left( L \right)

    where

    .. math::
+
        L = \{l_1,\dots,l_N\}, \quad
        l_n = \left( x_n - y_n \right)^2,

@@ -100,10 +95,10 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:
 def cross_entropy(
    inp: Tensor, target: Tensor, axis: int = 1, ignore_index: int = -1
 ) -> Tensor:
-    r"""Returns the cross entropy loss in a classification problem.
+    r"""
+    Returns the cross entropy loss in a classification problem.

-    .. math::
-        \textrm{CrossEntropy}(x, y) = - \sum_{i} y_i\log(x_i)
+    .. math:: \textrm{CrossEntropy}(x, y) = - \sum_{i} y_i\log(x_i)

    :param inp: The input tensor representing the predicted probability.
    :param label: The input tensor representing the classification label.
@@ -118,17 +113,19 @@ def cross_entropy(
        from megengine import tensor
        import megengine.functional as F

-
        data_shape = (1, 2)
        label_shape = (1, )

-        pred = tensor(
-            np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape)
-        )
-        label = tensor(
-            np.ones(label_shape, dtype=np.int32)
-        )
+        pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape))
+        label = tensor(np.ones(label_shape, dtype=np.int32))
        loss = F.cross_entropy(pred, label)
+        print(loss.numpy())
+
+    Outputs:
+
+    .. testoutput::
+
+        [0.69]

    """
    n0 = inp.ndim
@@ -157,8 +154,7 @@ def cross_entropy_with_softmax(

    When using label smoothing, the label distribution is as follows:

-    .. math::
-        y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K
+    .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K

    where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
    k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes.
@@ -199,6 +195,7 @@ def triplet_margin_loss(
    Creates a criterion that measures the triplet loss given an input tensors.

    .. math::
+
        L(a, p, n) = max\left\{d\left(a_{i},p_{i}\right)-d\left(a_{i}, n_{i}\right)+margin, 0\right\},\
        d\left(x_{i},y_{i}\right)=\left\|x_{i}-y_{i}\right\|_{p}

@@ -256,18 +253,16 @@ def nll_loss(
    r"""
    The negative log likelihood loss.

-    Shape:
-        - pred: :math:`(N, *)` where :math:`*` means any number of additional
-          dimensions
-        - label: :math:`(N, *)`. Same shape as ``pred``
+    :param pred: The predicted result from model.
+    :param label: The ground truth to compare.

    Examples:

    .. testcode::
+
        import numpy as np
        from megengine import tensor
        import megengine.functional as F
-        from megengine.test.utils import assertTensorClose
        data_shape = (2, 2)
        label_shape = (2, )

@@ -280,7 +275,14 @@ def nll_loss(
        pred = F.log(F.softmax(data))
        loss1 = F.nll_loss(pred, label)
        loss2 = F.cross_entropy_with_softmax(data, label)
-        assertTensorClose(loss1.numpy(), loss2.numpy(), max_err=5e-6)
+        print(loss1.numpy(), loss2.numpy())
+
+    Outputs:
+
+    .. testoutput::
+
+        [0.6576154] [0.6576154]
+
    """
    n0 = pred.ndim
    n1 = label.ndim