add input and output description docs for vision transform (#34926)

* add input and output docs for vision transform

add input and output description docs for vision transform (#34926)
* add input and output docs for vision transform
4f54891c · LielinJiang · GitHub · 5eefc8c7 · 4f54891c
显示空白变更内容
内联并排

Showing with 128 addition and 11 deletion

python/paddle/vision/transforms/transforms.py python/paddle/vision/transforms/transforms.py +128 -11

未找到文件。
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py
@@ -310,6 +310,13 @@ class ToTensor(BaseTransform):
            'CHW'. Default: 'CHW'.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray): The input image with shape (H x W x C).
+        - output(np.ndarray): A tensor with shape (C x H x W) or (H x W x C) according option data_format.
+    Returns:
+        A callable object of ToTensor.
    Examples:
        .. code-block:: python
@@ -368,6 +375,13 @@ class Resize(BaseTransform):
            - "lanczos": cv2.INTER_LANCZOS4
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A resized image.
+    Returns:
+        A callable object of Resize.
    Examples:
        .. code-block:: python
@@ -422,6 +436,13 @@ class RandomResizedCrop(BaseTransform):
            - "lanczos": cv2.INTER_LANCZOS4
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A cropped image.
+    Returns:
+        A callable object of RandomResizedCrop.
    Examples:
        .. code-block:: python
@@ -503,6 +524,13 @@ class CenterCrop(BaseTransform):
        size (int|list|tuple): Target size of output image, with (height, width) shape.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A cropped image.
+    Returns:
+        A callable object of CenterCrop.
    Examples:
        .. code-block:: python
@@ -537,6 +565,13 @@ class RandomHorizontalFlip(BaseTransform):
        prob (float, optional): Probability of the input data being flipped. Should be in [0, 1]. Default: 0.5
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A horiziotal flipped image.
+    Returns:
+        A callable object of RandomHorizontalFlip.
    Examples:
        .. code-block:: python
@@ -571,6 +606,13 @@ class RandomVerticalFlip(BaseTransform):
        prob (float, optional): Probability of the input data being flipped. Default: 0.5
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A vertical flipped image.
+    Returns:
+        A callable object of RandomVerticalFlip.
    Examples:
        .. code-block:: python
@@ -579,7 +621,7 @@ class RandomVerticalFlip(BaseTransform):
            from PIL import Image
            from paddle.vision.transforms import RandomVerticalFlip
-            transform = RandomVerticalFlip(224)
+            transform = RandomVerticalFlip()
            fake_img = Image.fromarray((np.random.rand(300, 320, 3) * 255.).astype(np.uint8))
@@ -613,6 +655,13 @@ class Normalize(BaseTransform):
        to_rgb (bool, optional): Whether to convert to rgb. Default: False.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A normalized array or tensor.
+    Returns:
+        A callable object of Normalize.
    Examples:
        .. code-block:: python
@@ -666,6 +715,14 @@ class Transpose(BaseTransform):
        order (list|tuple, optional): Target order of input data. Default: (2, 0, 1).
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(np.ndarray|Paddle.Tensor): A transposed array or tensor. If input 
+            is a PIL.Image, output will be converted to np.ndarray automatically.
+    Returns:
+        A callable object of Transpose.
    Examples:
        .. code-block:: python
@@ -707,6 +764,13 @@ class BrightnessTransform(BaseTransform):
            non negative number. 0 gives the original image
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): An image with a transform in brghtness.
+    Returns:
+        A callable object of BrightnessTransform.
    Examples:
        .. code-block:: python
@@ -743,6 +807,13 @@ class ContrastTransform(BaseTransform):
            non negative number. 0 gives the original image
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): An image with a transform in contrast.
+    Returns:
+        A callable object of ContrastTransform.
    Examples:
        .. code-block:: python
@@ -781,6 +852,13 @@ class SaturationTransform(BaseTransform):
            non negative number. 0 gives the original image
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): An image with a transform in saturation.
+    Returns:
+        A callable object of SaturationTransform.
    Examples:
        .. code-block:: python
@@ -817,6 +895,13 @@ class HueTransform(BaseTransform):
            between 0 and 0.5, 0 gives the original image
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): An image with a transform in hue.
+    Returns:
+        A callable object of HueTransform.
    Examples:
        .. code-block:: python
@@ -860,6 +945,13 @@ class ColorJitter(BaseTransform):
            Chosen uniformly from [-hue, hue]. Should have 0<= hue <= 0.5.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A color jittered image.
+    Returns:
+        A callable object of ColorJitter.
    Examples:
        .. code-block:: python
@@ -939,6 +1031,13 @@ class RandomCrop(BaseTransform):
            desired size to avoid raising an exception. Default: False.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A random cropped image.
+    Returns:
+        A callable object of RandomCrop.
    Examples:
        .. code-block:: python
@@ -1041,6 +1140,13 @@ class Pad(BaseTransform):
            will result in ``[2, 1, 1, 2, 3, 4, 4, 3]``.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A paded image.
+    Returns:
+        A callable object of Pad.
    Examples:
        .. code-block:: python
@@ -1114,6 +1220,13 @@ class RandomRotation(BaseTransform):
            Default is the center of the image.
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+    Shape:
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): A rotated image.
+    Returns:
+        A callable object of RandomRotation.
    Examples:
        .. code-block:: python
@@ -1181,11 +1294,15 @@ class Grayscale(BaseTransform):
        num_output_channels (int): (1 or 3) number of channels desired for output image
        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
-    Returns:
+    Shape:
-        CV Image: Grayscale version of the input.
+        - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
+        - output(PIL.Image|np.ndarray|Paddle.Tensor): Grayscale version of the input image. 
            - If output_channels == 1 : returned image is single channel
            - If output_channels == 3 : returned image is 3 channel with r == g == b
+    Returns:
+        A callable object of Grayscale.
    Examples:
        .. code-block:: python