diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index b5662f9ecf4f9df055b02117288fcdff57855d93..31a430789d636b35edbf833ac105236834c47e43 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -1076,7 +1076,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-train-batch
             
                 import paddle
                 import paddle.nn as nn
@@ -1128,7 +1127,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-eval-batch
 
                 import paddle
                 import paddle.nn as nn
@@ -1176,7 +1174,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-predict-batch
 
                 import paddle
                 import paddle.nn as nn
@@ -1236,7 +1233,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-save
 
                 import paddle
                 import paddle.nn as nn
@@ -1317,7 +1313,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-load
 
                 import paddle
                 import paddle.nn as nn
@@ -1404,7 +1399,6 @@ class Model(object):
         Examples:
 
             .. code-block:: python
-              :name: code-example-parameters
             
                 import paddle
                 import paddle.nn as nn
@@ -1648,7 +1642,7 @@ class Model(object):
                How to make a batch is done internally.
 
             .. code-block:: python
-              :name: code-example-fit-1
+              :name: code-example1
 
                 import paddle
                 import paddle.vision.transforms as T
@@ -1688,7 +1682,7 @@ class Model(object):
                DataLoader.
 
             .. code-block:: python
-              :name: code-example-fit-2
+              :name: code-example2
 
                 import paddle
                 import paddle.vision.transforms as T
@@ -1844,7 +1838,6 @@ class Model(object):
         Examples:
 
           .. code-block:: python
-            :name: code-example-evaluate
 
                 import paddle
                 import paddle.vision.transforms as T
@@ -1946,7 +1939,6 @@ class Model(object):
         Examples:
 
           .. code-block:: python
-            :name: code-example-predict
 
                 import numpy as np
                 import paddle
@@ -2179,7 +2171,6 @@ class Model(object):
 
         Examples:
             .. code-block:: python
-              :name: code-example-summary
 
                 import paddle
                 from paddle.static import InputSpec
diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py
index f31aab9eccf267225c0754840602448a842c5cad..f083d01c5a8cc2f5988fddeeb3dcb8b07614efb5 100644
--- a/python/paddle/vision/datasets/cifar.py
+++ b/python/paddle/vision/datasets/cifar.py
@@ -46,54 +46,63 @@ class Cifar10(Dataset):
     dataset, which has 10 categories.
 
     Args:
-        data_file(str): path to data file, can be set None if
+        data_file (str, optional): Path to data file, can be set None if
             :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar
-        mode(str): 'train', 'test' mode. Default 'train'.
-        transform(callable): transform to perform on image, None for no transform.
-        download(bool): download dataset automatically if :attr:`data_file` is None. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
 
     Returns:
-        Dataset: instance of cifar-10 dataset
+        :ref:`api_paddle_io_Dataset`. An instance of Cifar10 dataset.
 
     Examples:
 
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import Cifar10
-            from paddle.vision.transforms import Normalize
 
-            class SimpleNet(paddle.nn.Layer):
-                def __init__(self):
-                    super(SimpleNet, self).__init__()
-                    self.fc = nn.Sequential(
-                        nn.Linear(3072, 10),
-                        nn.Softmax())
-
-                def forward(self, image, label):
-                    image = paddle.reshape(image, (1, -1))
-                    return self.fc(image), label
-
-
-            normalize = Normalize(mean=[0.5, 0.5, 0.5],
-                                  std=[0.5, 0.5, 0.5],
-                                  data_format='HWC')
-            cifar10 = Cifar10(mode='train', transform=normalize)
-
-            for i in range(10):
-                image, label = cifar10[i]
-                image = paddle.to_tensor(image)
-                label = paddle.to_tensor(label)
-
-                model = SimpleNet()
-                image, label = model(image, label)
-                print(image.numpy().shape, label.numpy().shape)
 
+            cifar10 = Cifar10()
+            print(len(cifar10))
+            # 50000
+
+            for i in range(5):  # only show first 5 images
+                img, label = cifar10[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.Image.Image'> (32, 32) 6
+
+
+            transform = T.Compose(
+                [
+                    T.Resize(64),
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            cifar10_test = Cifar10(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(cifar10_test))
+            # 10000
+
+            for img, label in itertools.islice(iter(cifar10_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [3, 64, 64] 3
     """
 
     def __init__(self,
@@ -179,54 +188,63 @@ class Cifar100(Cifar10):
     dataset, which has 100 categories.
 
     Args:
-        data_file(str): path to data file, can be set None if
-            :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar
-        mode(str): 'train', 'test' mode. Default 'train'.
-        transform(callable): transform to perform on image, None for no transform.
-        download(bool): download dataset automatically if :attr:`data_file` is None. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        data_file (str, optional): path to data file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/cifar
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
 
     Returns:
-        Dataset: instance of cifar-100 dataset
+        :ref:`api_paddle_io_Dataset`. An instance of Cifar100 dataset.
 
     Examples:
 
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import Cifar100
-            from paddle.vision.transforms import Normalize
-
-            class SimpleNet(paddle.nn.Layer):
-                def __init__(self):
-                    super(SimpleNet, self).__init__()
-                    self.fc = nn.Sequential(
-                        nn.Linear(3072, 10),
-                        nn.Softmax())
-
-                def forward(self, image, label):
-                    image = paddle.reshape(image, (1, -1))
-                    return self.fc(image), label
-
-
-            normalize = Normalize(mean=[0.5, 0.5, 0.5],
-                                  std=[0.5, 0.5, 0.5],
-                                  data_format='HWC')
-            cifar100 = Cifar100(mode='train', transform=normalize)
-
-            for i in range(10):
-                image, label = cifar100[i]
-                image = paddle.to_tensor(image)
-                label = paddle.to_tensor(label)
 
-                model = SimpleNet()
-                image, label = model(image, label)
-                print(image.numpy().shape, label.numpy().shape)
 
+            cifar100 = Cifar100()
+            print(len(cifar100))
+            # 50000
+
+            for i in range(5):  # only show first 5 images
+                img, label = cifar100[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.Image.Image'> (32, 32) 19
+
+
+            transform = T.Compose(
+                [
+                    T.Resize(64),
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            cifar100_test = Cifar100(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(cifar100_test))
+            # 10000
+
+            for img, label in itertools.islice(iter(cifar100_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [3, 64, 64] 49
     """
 
     def __init__(self,
diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py
index ef59d24ed6451f7902768a7a68fb970fa3f7ed91..722f52acf69423db52e7c2c73edcb03afde0c683 100644
--- a/python/paddle/vision/datasets/flowers.py
+++ b/python/paddle/vision/datasets/flowers.py
@@ -42,36 +42,71 @@ MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': 'valid'}
 
 class Flowers(Dataset):
     """
-    Implementation of `Flowers <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
-    dataset
+    Implementation of `Flowers102 <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
+    dataset.
 
     Args:
-        data_file(str): path to data file, can be set None if
-            :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/
-        label_file(str): path to label file, can be set None if
-            :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/
-        setid_file(str): path to subset index file, can be set
-            None if :attr:`download` is True. Default None
-        mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
-        transform(callable): transform to perform on image, None for no transform.
-        download(bool): download dataset automatically if :attr:`data_file` is None. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        data_file (str, optional): Path to data file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
+        label_file (str, optional): Path to label file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
+        setid_file (str, optional): Path to subset index file, can be set
+            None if :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default: True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
 
+    Returns:
+        :ref:`api_paddle_io_Dataset`. An instance of Flowers dataset.
+
     Examples:
         
         .. code-block:: python
 
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import Flowers
 
-            flowers = Flowers(mode='test')
-
-            for i in range(len(flowers)):
-                sample = flowers[i]
-                print(sample[0].size, sample[1])
 
+            flowers = Flowers()
+            print(len(flowers))
+            # 6149
+
+            for i in range(5):  # only show first 5 images
+                img, label = flowers[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.JpegImagePlugin.JpegImageFile'> (523, 500) [1]
+
+
+            transform = T.Compose(
+                [
+                    T.Resize(64),
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            flowers_test = Flowers(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(flowers_test))
+            # 1020
+
+            for img, label in itertools.islice(iter(flowers_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [3, 64, 96] [1]
     """
 
     def __init__(self,
diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py
index c3f1b61f30ed9adf344c757bce6ff01f33dfd265..0d874765729ab7b2f9b5bcba585588f25012800e 100644
--- a/python/paddle/vision/datasets/folder.py
+++ b/python/paddle/vision/datasets/folder.py
@@ -65,6 +65,8 @@ def make_dataset(dir, class_to_idx, extensions, is_valid_file=None):
 class DatasetFolder(Dataset):
     """A generic data loader where the samples are arranged in this way:
 
+    .. code-block:: text
+
         root/class_a/1.ext
         root/class_a/2.ext
         root/class_a/3.ext
@@ -74,55 +76,127 @@ class DatasetFolder(Dataset):
         root/class_b/789.ext
 
     Args:
-        root (string): Root directory path.
-        loader (callable|optional): A function to load a sample given its path.
-        extensions (list[str]|tuple[str]|optional): A list of allowed extensions.
-            both extensions and is_valid_file should not be passed.
-        transform (callable|optional): A function/transform that takes in
-            a sample and returns a transformed version.
-        is_valid_file (callable|optional): A function that takes path of a file
-            and check if the file is a valid file (used to check of corrupt files)
-            both extensions and is_valid_file should not be passed.
-
-     Attributes:
-        classes (list): List of the class names.
-        class_to_idx (dict): Dict with items (class_name, class_index).
-        samples (list): List of (sample path, class_index) tuples
-        targets (list): The class_index value for each image in the dataset
+        root (str): Root directory path.
+        loader (Callable, optional): A function to load a sample given its path. Default: None.
+        extensions (list[str]|tuple[str], optional): A list of allowed extensions.
+            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
+            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
+            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
+        transform (Callable, optional): A function/transform that takes in
+            a sample and returns a transformed version. Default: None.
+        is_valid_file (Callable, optional): A function that takes path of a file
+            and check if the file is a valid file. Both :attr:`extensions` and
+            :attr:`is_valid_file` should not be passed. Default: None.
+
+    Returns:
+        :ref:`api_paddle_io_Dataset`. An instance of DatasetFolder.
+
+    Attributes:
+        classes (list[str]): List of the class names.
+        class_to_idx (dict[str, int]): Dict with items (class_name, class_index).
+        samples (list[tuple[str, int]]): List of (sample_path, class_index) tuples.
+        targets (list[int]): The class_index value for each image in the dataset.
 
     Example:
 
         .. code-block:: python
 
-            import os
-            import cv2
-            import tempfile
             import shutil
+            import tempfile
+            import cv2
             import numpy as np
+            import paddle.vision.transforms as T
+            from pathlib import Path
             from paddle.vision.datasets import DatasetFolder
 
-            def make_fake_dir():
-                data_dir = tempfile.mkdtemp()
-
-                for i in range(2):
-                    sub_dir = os.path.join(data_dir, 'class_' + str(i))
-                    if not os.path.exists(sub_dir):
-                        os.makedirs(sub_dir)
-                    for j in range(2):
-                        fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
-                        cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
-                return data_dir
-
-            temp_dir = make_fake_dir()
-            # temp_dir is root dir
-            # temp_dir/class_1/img1_1.jpg
-            # temp_dir/class_2/img2_1.jpg
-            data_folder = DatasetFolder(temp_dir)
-
-            for items in data_folder:
-                break
-                
-            shutil.rmtree(temp_dir)
+
+            def make_fake_file(img_path: str):
+                if img_path.endswith((".jpg", ".png", ".jpeg")):
+                    fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
+                    cv2.imwrite(img_path, fake_img)
+                elif img_path.endswith(".txt"):
+                    with open(img_path, "w") as f:
+                        f.write("This is a fake file.")
+
+            def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
+                root = Path(root)
+                root.mkdir(parents=True, exist_ok=True)
+                for subpath in directory_hierarchy:
+                    if isinstance(subpath, str):
+                        filepath = root / subpath
+                        file_maker(str(filepath))
+                    else:
+                        dirname = list(subpath.keys())[0]
+                        make_directory(root / dirname, subpath[dirname])
+
+            directory_hirerarchy = [
+                {"class_0": [
+                    "abc.jpg",
+                    "def.png"]},
+                {"class_1": [
+                    "ghi.jpeg",
+                    "jkl.png",
+                    {"mno": [
+                        "pqr.jpeg",
+                        "stu.jpg"]}]},
+                "this_will_be_ignored.txt",
+            ]
+
+            # You can replace this with any directory to explore the structure
+            # of generated data. e.g. fake_data_dir = "./temp_dir"
+            fake_data_dir = tempfile.mkdtemp()
+            make_directory(fake_data_dir, directory_hirerarchy)
+            data_folder_1 = DatasetFolder(fake_data_dir)
+            print(data_folder_1.classes)
+            # ['class_0', 'class_1']
+            print(data_folder_1.class_to_idx)
+            # {'class_0': 0, 'class_1': 1}
+            print(data_folder_1.samples)
+            # [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0),
+            #  ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1),
+            #  ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)]
+            print(data_folder_1.targets)
+            # [0, 0, 1, 1, 1, 1]
+            print(len(data_folder_1))
+            # 6
+
+            for i in range(len(data_folder_1)):
+                img, label = data_folder_1[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.Image.Image'> (32, 32) 0
+
+
+            transform = T.Compose(
+                [
+                    T.Resize(64),
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            data_folder_2 = DatasetFolder(
+                fake_data_dir,
+                loader=lambda x: cv2.imread(x),  # load image with OpenCV
+                extensions=(".jpg",),  # only load *.jpg files
+                transform=transform,  # apply transform to every image
+            )
+
+            print([img_path for img_path, label in data_folder_2.samples])
+            # ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg']
+            print(len(data_folder_2))
+            # 2
+
+            for img, label in iter(data_folder_2):
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [3, 64, 64] 0
+
+            shutil.rmtree(fake_data_dir)
     """
 
     def __init__(self,
@@ -223,54 +297,121 @@ def default_loader(path):
 class ImageFolder(Dataset):
     """A generic data loader where the samples are arranged in this way:
 
+    .. code-block:: text
+
         root/1.ext
         root/2.ext
         root/sub_dir/3.ext
 
     Args:
-        root (string): Root directory path.
-        loader (callable, optional): A function to load a sample given its path.
+        root (str): Root directory path.
+        loader (Callable, optional): A function to load a sample given its path. Default: None.
         extensions (list[str]|tuple[str], optional): A list of allowed extensions.
-            both extensions and is_valid_file should not be passed.
-        transform (callable, optional): A function/transform that takes in
-            a sample and returns a transformed version.
-        is_valid_file (callable, optional): A function that takes path of a file
-            and check if the file is a valid file (used to check of corrupt files)
-            both extensions and is_valid_file should not be passed.
+            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
+            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
+            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
+        transform (Callable, optional): A function/transform that takes in
+            a sample and returns a transformed version. Default: None.
+        is_valid_file (Callable, optional): A function that takes path of a file
+            and check if the file is a valid file. Both :attr:`extensions` and
+            :attr:`is_valid_file` should not be passed. Default: None.
+
+    Returns:
+        :ref:`api_paddle_io_Dataset`. An instance of ImageFolder.
 
-     Attributes:
-        samples (list): List of sample path
+    Attributes:
+        samples (list[str]): List of sample path.
 
     Example:
 
         .. code-block:: python
 
-            import os
-            import cv2
-            import tempfile
             import shutil
+            import tempfile
+            import cv2
             import numpy as np
+            import paddle.vision.transforms as T
+            from pathlib import Path
             from paddle.vision.datasets import ImageFolder
 
-            def make_fake_dir():
-                data_dir = tempfile.mkdtemp()
-
-                for i in range(2):
-                    sub_dir = os.path.join(data_dir, 'class_' + str(i))
-                    if not os.path.exists(sub_dir):
-                        os.makedirs(sub_dir)
-                    for j in range(2):
-                        fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
-                        cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
-                return data_dir
-
-            temp_dir = make_fake_dir()
-            data_folder = ImageFolder(temp_dir)
-
-            for items in data_folder:
-                break
-                
-            shutil.rmtree(temp_dir)
+
+            def make_fake_file(img_path: str):
+                if img_path.endswith((".jpg", ".png", ".jpeg")):
+                    fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
+                    cv2.imwrite(img_path, fake_img)
+                elif img_path.endswith(".txt"):
+                    with open(img_path, "w") as f:
+                        f.write("This is a fake file.")
+
+            def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
+                root = Path(root)
+                root.mkdir(parents=True, exist_ok=True)
+                for subpath in directory_hierarchy:
+                    if isinstance(subpath, str):
+                        filepath = root / subpath
+                        file_maker(str(filepath))
+                    else:
+                        dirname = list(subpath.keys())[0]
+                        make_directory(root / dirname, subpath[dirname])
+
+            directory_hirerarchy = [
+                "abc.jpg",
+                "def.png",
+                {"ghi": [
+                    "jkl.jpeg",
+                    {"mno": [
+                        "pqr.jpg"]}]},
+                "this_will_be_ignored.txt",
+            ]
+
+            # You can replace this with any directory to explore the structure
+            # of generated data. e.g. fake_data_dir = "./temp_dir"
+            fake_data_dir = tempfile.mkdtemp()
+            make_directory(fake_data_dir, directory_hirerarchy)
+            image_folder_1 = ImageFolder(fake_data_dir)
+            print(image_folder_1.samples)
+            # ['./temp_dir/abc.jpg', './temp_dir/def.png',
+            #  './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
+            print(len(image_folder_1))
+            # 4
+
+            for i in range(len(image_folder_1)):
+                (img,) = image_folder_1[i]
+                # do something with img
+                print(type(img), img.size)
+                # <class 'PIL.Image.Image'> (32, 32)
+
+
+            transform = T.Compose(
+                [
+                    T.Resize(64),
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            image_folder_2 = ImageFolder(
+                fake_data_dir,
+                loader=lambda x: cv2.imread(x),  # load image with OpenCV
+                extensions=(".jpg",),  # only load *.jpg files
+                transform=transform,  # apply transform to every image
+            )
+
+            print(image_folder_2.samples)
+            # ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
+            print(len(image_folder_2))
+            # 2
+
+            for (img,) in iter(image_folder_2):
+                # do something with img
+                print(type(img), img.shape)
+                # <class 'paddle.Tensor'> [3, 64, 64]
+
+            shutil.rmtree(fake_data_dir)
      """
 
     def __init__(self,
diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py
index 703a4f64cf44e468bf137e502585f2239bb748fc..34049ed2f72b59b1a6b0503b2778f0fe57a9f012 100644
--- a/python/paddle/vision/datasets/mnist.py
+++ b/python/paddle/vision/datasets/mnist.py
@@ -29,36 +29,67 @@ __all__ = []
 
 class MNIST(Dataset):
     """
-    Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset
+    Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset.
 
     Args:
-        image_path(str): path to image file, can be set None if
-            :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist
-        label_path(str): path to label file, can be set None if
-            :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist
-        mode(str): 'train' or 'test' mode. Default 'train'.
-        download(bool): download dataset automatically if
-            :attr:`image_path` :attr:`label_path` is not set. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        image_path (str, optional): Path to image file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
+        label_path (str, optional): Path to label file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): Download dataset automatically if
+            :attr:`image_path` :attr:`label_path` is not set. Default: True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
             
     Returns:
-        Dataset: MNIST Dataset.
+        :ref:`api_paddle_io_Dataset`. An instance of MNIST dataset.
 
     Examples:
         
         .. code-block:: python
 
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import MNIST
 
-            mnist = MNIST(mode='test')
-
-            for i in range(len(mnist)):
-                sample = mnist[i]
-                print(sample[0].size, sample[1])
 
+            mnist = MNIST()
+            print(len(mnist))
+            # 60000
+
+            for i in range(5):  # only show first 5 images
+                img, label = mnist[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.Image.Image'> (28, 28) [5]
+
+
+            transform = T.Compose(
+                [
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[127.5],
+                        std=[127.5],
+                    ),
+                ]
+            )
+
+            mnist_test = MNIST(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(mnist_test))
+            # 10000
+
+            for img, label in itertools.islice(iter(mnist_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [1, 28, 28] [7]
     """
     NAME = 'mnist'
     URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
@@ -180,35 +211,67 @@ class MNIST(Dataset):
 
 class FashionMNIST(MNIST):
     """
-    Implementation `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
+    Implementation of `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
 
     Args:
-        image_path(str): path to image file, can be set None if
-            :attr:`download` is True. Default None
-        label_path(str): path to label file, can be set None if
-            :attr:`download` is True. Default None
-        mode(str): 'train' or 'test' mode. Default 'train'.
-        download(bool): whether to download dataset automatically if
-            :attr:`image_path` :attr:`label_path` is not set. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        image_path (str, optional): Path to image file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
+        label_path (str, optional): Path to label file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): Whether to download dataset automatically if
+            :attr:`image_path` :attr:`label_path` is not set. Default: True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
             
     Returns:
-        Dataset: Fashion-MNIST Dataset.
+        :ref:`api_paddle_io_Dataset`. An instance of FashionMNIST dataset.
 
     Examples:
         
         .. code-block:: python
 
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import FashionMNIST
 
-            mnist = FashionMNIST(mode='test')
 
-            for i in range(len(mnist)):
-                sample = mnist[i]
-                print(sample[0].size, sample[1])
+            fashion_mnist = FashionMNIST()
+            print(len(fashion_mnist))
+            # 60000
+
+            for i in range(5):  # only show first 5 images
+                img, label = fashion_mnist[i]
+                # do something with img and label
+                print(type(img), img.size, label)
+                # <class 'PIL.Image.Image'> (28, 28) [9]
+
+
+            transform = T.Compose(
+                [
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[127.5],
+                        std=[127.5],
+                    ),
+                ]
+            )
+
+            fashion_mnist_test = FashionMNIST(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(fashion_mnist_test))
+            # 10000
+
+            for img, label in itertools.islice(iter(fashion_mnist_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape, label)
+                # <class 'paddle.Tensor'> [1, 28, 28] [9]
     """
 
     NAME = 'fashion-mnist'
diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py
index cd9ff70ca1e5074b4c43928fa46e4db77939e288..2d65b16550bad1c9d7b8ac2cba56009c74a81bd8 100644
--- a/python/paddle/vision/datasets/voc2012.py
+++ b/python/paddle/vision/datasets/voc2012.py
@@ -39,51 +39,69 @@ MODE_FLAG_MAP = {'train': 'trainval', 'test': 'train', 'valid': "val"}
 
 class VOC2012(Dataset):
     """
-    Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset
-
-    To speed up the download, we put the data on https://dataset.bj.bcebos.com/voc/VOCtrainval_11-May-2012.tar. 
-    Original data can get from http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar.
+    Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset.
 
     Args:
-        data_file(str): path to data file, can be set None if
-            :attr:`download` is True. Default None,  default data path: ~/.cache/paddle/dataset/voc2012
-        mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
-        download(bool): download dataset automatically if :attr:`data_file` is None. Default True
-        backend(str, optional): Specifies which type of image to be returned: 
-            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. 
-            If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
+        data_file (str, optional): Path to data file, can be set None if
+            :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/voc2012.
+        mode (str, optional): Either train or test mode. Default 'train'.
+        transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
+        download (bool, optional): Download dataset automatically if :attr:`data_file` is None. Default: True.
+        backend (str, optional): Specifies which type of image to be returned:
+            PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
+            If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
             default backend is 'pil'. Default: None.
 
+    Returns:
+        :ref:`api_paddle_io_Dataset`. An instance of VOC2012 dataset.
+
     Examples:
 
         .. code-block:: python
 
-            import paddle
+            import itertools
+            import paddle.vision.transforms as T
             from paddle.vision.datasets import VOC2012
-            from paddle.vision.transforms import Normalize
-
-            class SimpleNet(paddle.nn.Layer):
-                def __init__(self):
-                    super(SimpleNet, self).__init__()
-
-                def forward(self, image, label):
-                    return paddle.sum(image), label
-
-
-            normalize = Normalize(mean=[0.5, 0.5, 0.5],
-                                  std=[0.5, 0.5, 0.5],
-                                  data_format='HWC')
-            voc2012 = VOC2012(mode='train', transform=normalize, backend='cv2')
-
-            for i in range(10):
-                image, label= voc2012[i]
-                image = paddle.cast(paddle.to_tensor(image), 'float32')
-                label = paddle.to_tensor(label)
 
-                model = SimpleNet()
-                image, label= model(image, label)
-                print(image.numpy().shape, label.numpy().shape)
 
+            voc2012 = VOC2012()
+            print(len(voc2012))
+            # 2913
+
+            for i in range(5):  # only show first 5 images
+                img, label = voc2012[i]
+                # do something with img and label
+                print(type(img), img.size)
+                # <class 'PIL.JpegImagePlugin.JpegImageFile'> (500, 281)
+                print(type(label), label.size)
+                # <class 'PIL.PngImagePlugin.PngImageFile'> (500, 281)
+
+
+            transform = T.Compose(
+                [
+                    T.ToTensor(),
+                    T.Normalize(
+                        mean=[0.5, 0.5, 0.5],
+                        std=[0.5, 0.5, 0.5],
+                        to_rgb=True,
+                    ),
+                ]
+            )
+
+            voc2012_test = VOC2012(
+                mode="test",
+                transform=transform,  # apply transform to every image
+                backend="cv2",  # use OpenCV as image transform backend
+            )
+            print(len(voc2012_test))
+            # 1464
+
+            for img, label in itertools.islice(iter(voc2012_test), 5):  # only show first 5 images
+                # do something with img and label
+                print(type(img), img.shape)
+                # <class 'paddle.Tensor'> [3, 281, 500]
+                print(type(label), label.shape)
+                # <class 'numpy.ndarray'> (281, 500)
     """
 
     def __init__(self,