未验证 提交 1fca8f33 编写于 作者: N Nyakku Shigure 提交者: GitHub

fix paddle.vision.datasets.* en docs (#43649)

* rewrite all code examples, test=document_fix

* refine arguments, test=document_fix

* fix desc format error, test=document_fix

* capitalize the first letter, test=document_fix

* refine api desc, test=document_fix

* fix wrong COPY-FROM label in Model docs, test=document_fix

* refine returns, test=document_fix

* refine returns, test=document_fix

* add a blank line in code block, test=document_fix
上级 fdcb57fb
...@@ -1076,7 +1076,6 @@ class Model(object): ...@@ -1076,7 +1076,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-train-batch
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1128,7 +1127,6 @@ class Model(object): ...@@ -1128,7 +1127,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-eval-batch
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1176,7 +1174,6 @@ class Model(object): ...@@ -1176,7 +1174,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-predict-batch
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1236,7 +1233,6 @@ class Model(object): ...@@ -1236,7 +1233,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-save
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1317,7 +1313,6 @@ class Model(object): ...@@ -1317,7 +1313,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-load
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1404,7 +1399,6 @@ class Model(object): ...@@ -1404,7 +1399,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-parameters
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1648,7 +1642,7 @@ class Model(object): ...@@ -1648,7 +1642,7 @@ class Model(object):
How to make a batch is done internally. How to make a batch is done internally.
.. code-block:: python .. code-block:: python
:name: code-example-fit-1 :name: code-example1
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
...@@ -1688,7 +1682,7 @@ class Model(object): ...@@ -1688,7 +1682,7 @@ class Model(object):
DataLoader. DataLoader.
.. code-block:: python .. code-block:: python
:name: code-example-fit-2 :name: code-example2
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
...@@ -1844,7 +1838,6 @@ class Model(object): ...@@ -1844,7 +1838,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-evaluate
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
...@@ -1946,7 +1939,6 @@ class Model(object): ...@@ -1946,7 +1939,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-predict
import numpy as np import numpy as np
import paddle import paddle
...@@ -2179,7 +2171,6 @@ class Model(object): ...@@ -2179,7 +2171,6 @@ class Model(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-summary
import paddle import paddle
from paddle.static import InputSpec from paddle.static import InputSpec
......
...@@ -46,54 +46,63 @@ class Cifar10(Dataset): ...@@ -46,54 +46,63 @@ class Cifar10(Dataset):
dataset, which has 10 categories. dataset, which has 10 categories.
Args: Args:
data_file(str): path to data file, can be set None if data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar :attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar
mode(str): 'train', 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform. transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
backend(str, optional): Specifies which type of image to be returned: backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns: Returns:
Dataset: instance of cifar-10 dataset :ref:`api_paddle_io_Dataset`. An instance of Cifar10 dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import itertools
import paddle.nn as nn import paddle.vision.transforms as T
from paddle.vision.datasets import Cifar10 from paddle.vision.datasets import Cifar10
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (1, -1))
return self.fc(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
cifar10 = Cifar10(mode='train', transform=normalize)
for i in range(10):
image, label = cifar10[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
cifar10 = Cifar10()
print(len(cifar10))
# 50000
for i in range(5): # only show first 5 images
img, label = cifar10[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 6
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
cifar10_test = Cifar10(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(cifar10_test))
# 10000
for img, label in itertools.islice(iter(cifar10_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 3
""" """
def __init__(self, def __init__(self,
...@@ -179,54 +188,63 @@ class Cifar100(Cifar10): ...@@ -179,54 +188,63 @@ class Cifar100(Cifar10):
dataset, which has 100 categories. dataset, which has 100 categories.
Args: Args:
data_file(str): path to data file, can be set None if data_file (str, optional): path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/cifar
mode(str): 'train', 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform. transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
backend(str, optional): Specifies which type of image to be returned: backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns: Returns:
Dataset: instance of cifar-100 dataset :ref:`api_paddle_io_Dataset`. An instance of Cifar100 dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import itertools
import paddle.nn as nn import paddle.vision.transforms as T
from paddle.vision.datasets import Cifar100 from paddle.vision.datasets import Cifar100
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (1, -1))
return self.fc(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
cifar100 = Cifar100(mode='train', transform=normalize)
for i in range(10):
image, label = cifar100[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
cifar100 = Cifar100()
print(len(cifar100))
# 50000
for i in range(5): # only show first 5 images
img, label = cifar100[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 19
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
cifar100_test = Cifar100(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(cifar100_test))
# 10000
for img, label in itertools.islice(iter(cifar100_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 49
""" """
def __init__(self, def __init__(self,
......
...@@ -42,36 +42,71 @@ MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': 'valid'} ...@@ -42,36 +42,71 @@ MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': 'valid'}
class Flowers(Dataset): class Flowers(Dataset):
""" """
Implementation of `Flowers <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_ Implementation of `Flowers102 <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
dataset dataset.
Args: Args:
data_file(str): path to data file, can be set None if data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/ :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
label_file(str): path to label file, can be set None if label_file (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/ :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
setid_file(str): path to subset index file, can be set setid_file (str, optional): Path to subset index file, can be set
None if :attr:`download` is True. Default None None if :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
mode(str): 'train', 'valid' or 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform. transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default: True.
backend(str, optional): Specifies which type of image to be returned: backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of Flowers dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import Flowers from paddle.vision.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].size, sample[1])
flowers = Flowers()
print(len(flowers))
# 6149
for i in range(5): # only show first 5 images
img, label = flowers[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.JpegImagePlugin.JpegImageFile'> (523, 500) [1]
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
flowers_test = Flowers(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(flowers_test))
# 1020
for img, label in itertools.islice(iter(flowers_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 96] [1]
""" """
def __init__(self, def __init__(self,
......
...@@ -65,6 +65,8 @@ def make_dataset(dir, class_to_idx, extensions, is_valid_file=None): ...@@ -65,6 +65,8 @@ def make_dataset(dir, class_to_idx, extensions, is_valid_file=None):
class DatasetFolder(Dataset): class DatasetFolder(Dataset):
"""A generic data loader where the samples are arranged in this way: """A generic data loader where the samples are arranged in this way:
.. code-block:: text
root/class_a/1.ext root/class_a/1.ext
root/class_a/2.ext root/class_a/2.ext
root/class_a/3.ext root/class_a/3.ext
...@@ -74,55 +76,127 @@ class DatasetFolder(Dataset): ...@@ -74,55 +76,127 @@ class DatasetFolder(Dataset):
root/class_b/789.ext root/class_b/789.ext
Args: Args:
root (string): Root directory path. root (str): Root directory path.
loader (callable|optional): A function to load a sample given its path. loader (Callable, optional): A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str]|optional): A list of allowed extensions. extensions (list[str]|tuple[str], optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed. Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
transform (callable|optional): A function/transform that takes in If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
a sample and returns a transformed version. '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
is_valid_file (callable|optional): A function that takes path of a file transform (Callable, optional): A function/transform that takes in
and check if the file is a valid file (used to check of corrupt files) a sample and returns a transformed version. Default: None.
both extensions and is_valid_file should not be passed. is_valid_file (Callable, optional): A function that takes path of a file
and check if the file is a valid file. Both :attr:`extensions` and
Attributes: :attr:`is_valid_file` should not be passed. Default: None.
classes (list): List of the class names.
class_to_idx (dict): Dict with items (class_name, class_index). Returns:
samples (list): List of (sample path, class_index) tuples :ref:`api_paddle_io_Dataset`. An instance of DatasetFolder.
targets (list): The class_index value for each image in the dataset
Attributes:
classes (list[str]): List of the class names.
class_to_idx (dict[str, int]): Dict with items (class_name, class_index).
samples (list[tuple[str, int]]): List of (sample_path, class_index) tuples.
targets (list[int]): The class_index value for each image in the dataset.
Example: Example:
.. code-block:: python .. code-block:: python
import os
import cv2
import tempfile
import shutil import shutil
import tempfile
import cv2
import numpy as np import numpy as np
import paddle.vision.transforms as T
from pathlib import Path
from paddle.vision.datasets import DatasetFolder from paddle.vision.datasets import DatasetFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp() def make_fake_file(img_path: str):
if img_path.endswith((".jpg", ".png", ".jpeg")):
for i in range(2): fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
sub_dir = os.path.join(data_dir, 'class_' + str(i)) cv2.imwrite(img_path, fake_img)
if not os.path.exists(sub_dir): elif img_path.endswith(".txt"):
os.makedirs(sub_dir) with open(img_path, "w") as f:
for j in range(2): f.write("This is a fake file.")
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img) def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
return data_dir root = Path(root)
root.mkdir(parents=True, exist_ok=True)
temp_dir = make_fake_dir() for subpath in directory_hierarchy:
# temp_dir is root dir if isinstance(subpath, str):
# temp_dir/class_1/img1_1.jpg filepath = root / subpath
# temp_dir/class_2/img2_1.jpg file_maker(str(filepath))
data_folder = DatasetFolder(temp_dir) else:
dirname = list(subpath.keys())[0]
for items in data_folder: make_directory(root / dirname, subpath[dirname])
break
directory_hirerarchy = [
shutil.rmtree(temp_dir) {"class_0": [
"abc.jpg",
"def.png"]},
{"class_1": [
"ghi.jpeg",
"jkl.png",
{"mno": [
"pqr.jpeg",
"stu.jpg"]}]},
"this_will_be_ignored.txt",
]
# You can replace this with any directory to explore the structure
# of generated data. e.g. fake_data_dir = "./temp_dir"
fake_data_dir = tempfile.mkdtemp()
make_directory(fake_data_dir, directory_hirerarchy)
data_folder_1 = DatasetFolder(fake_data_dir)
print(data_folder_1.classes)
# ['class_0', 'class_1']
print(data_folder_1.class_to_idx)
# {'class_0': 0, 'class_1': 1}
print(data_folder_1.samples)
# [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0),
# ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1),
# ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)]
print(data_folder_1.targets)
# [0, 0, 1, 1, 1, 1]
print(len(data_folder_1))
# 6
for i in range(len(data_folder_1)):
img, label = data_folder_1[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 0
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
data_folder_2 = DatasetFolder(
fake_data_dir,
loader=lambda x: cv2.imread(x), # load image with OpenCV
extensions=(".jpg",), # only load *.jpg files
transform=transform, # apply transform to every image
)
print([img_path for img_path, label in data_folder_2.samples])
# ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg']
print(len(data_folder_2))
# 2
for img, label in iter(data_folder_2):
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 0
shutil.rmtree(fake_data_dir)
""" """
def __init__(self, def __init__(self,
...@@ -223,54 +297,121 @@ def default_loader(path): ...@@ -223,54 +297,121 @@ def default_loader(path):
class ImageFolder(Dataset): class ImageFolder(Dataset):
"""A generic data loader where the samples are arranged in this way: """A generic data loader where the samples are arranged in this way:
.. code-block:: text
root/1.ext root/1.ext
root/2.ext root/2.ext
root/sub_dir/3.ext root/sub_dir/3.ext
Args: Args:
root (string): Root directory path. root (str): Root directory path.
loader (callable, optional): A function to load a sample given its path. loader (Callable, optional): A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str], optional): A list of allowed extensions. extensions (list[str]|tuple[str], optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed. Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
transform (callable, optional): A function/transform that takes in If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
a sample and returns a transformed version. '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
is_valid_file (callable, optional): A function that takes path of a file transform (Callable, optional): A function/transform that takes in
and check if the file is a valid file (used to check of corrupt files) a sample and returns a transformed version. Default: None.
both extensions and is_valid_file should not be passed. is_valid_file (Callable, optional): A function that takes path of a file
and check if the file is a valid file. Both :attr:`extensions` and
:attr:`is_valid_file` should not be passed. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of ImageFolder.
Attributes: Attributes:
samples (list): List of sample path samples (list[str]): List of sample path.
Example: Example:
.. code-block:: python .. code-block:: python
import os
import cv2
import tempfile
import shutil import shutil
import tempfile
import cv2
import numpy as np import numpy as np
import paddle.vision.transforms as T
from pathlib import Path
from paddle.vision.datasets import ImageFolder from paddle.vision.datasets import ImageFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp() def make_fake_file(img_path: str):
if img_path.endswith((".jpg", ".png", ".jpeg")):
for i in range(2): fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
sub_dir = os.path.join(data_dir, 'class_' + str(i)) cv2.imwrite(img_path, fake_img)
if not os.path.exists(sub_dir): elif img_path.endswith(".txt"):
os.makedirs(sub_dir) with open(img_path, "w") as f:
for j in range(2): f.write("This is a fake file.")
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img) def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
return data_dir root = Path(root)
root.mkdir(parents=True, exist_ok=True)
temp_dir = make_fake_dir() for subpath in directory_hierarchy:
data_folder = ImageFolder(temp_dir) if isinstance(subpath, str):
filepath = root / subpath
for items in data_folder: file_maker(str(filepath))
break else:
dirname = list(subpath.keys())[0]
shutil.rmtree(temp_dir) make_directory(root / dirname, subpath[dirname])
directory_hirerarchy = [
"abc.jpg",
"def.png",
{"ghi": [
"jkl.jpeg",
{"mno": [
"pqr.jpg"]}]},
"this_will_be_ignored.txt",
]
# You can replace this with any directory to explore the structure
# of generated data. e.g. fake_data_dir = "./temp_dir"
fake_data_dir = tempfile.mkdtemp()
make_directory(fake_data_dir, directory_hirerarchy)
image_folder_1 = ImageFolder(fake_data_dir)
print(image_folder_1.samples)
# ['./temp_dir/abc.jpg', './temp_dir/def.png',
# './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_1))
# 4
for i in range(len(image_folder_1)):
(img,) = image_folder_1[i]
# do something with img
print(type(img), img.size)
# <class 'PIL.Image.Image'> (32, 32)
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
image_folder_2 = ImageFolder(
fake_data_dir,
loader=lambda x: cv2.imread(x), # load image with OpenCV
extensions=(".jpg",), # only load *.jpg files
transform=transform, # apply transform to every image
)
print(image_folder_2.samples)
# ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_2))
# 2
for (img,) in iter(image_folder_2):
# do something with img
print(type(img), img.shape)
# <class 'paddle.Tensor'> [3, 64, 64]
shutil.rmtree(fake_data_dir)
""" """
def __init__(self, def __init__(self,
......
...@@ -29,36 +29,67 @@ __all__ = [] ...@@ -29,36 +29,67 @@ __all__ = []
class MNIST(Dataset): class MNIST(Dataset):
""" """
Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset.
Args: Args:
image_path(str): path to image file, can be set None if image_path (str, optional): Path to image file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
label_path(str): path to label file, can be set None if label_path (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
mode(str): 'train' or 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
download(bool): download dataset automatically if transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
:attr:`image_path` :attr:`label_path` is not set. Default True download (bool, optional): Download dataset automatically if
backend(str, optional): Specifies which type of image to be returned: :attr:`image_path` :attr:`label_path` is not set. Default: True.
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. backend (str, optional): Specifies which type of image to be returned:
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns: Returns:
Dataset: MNIST Dataset. :ref:`api_paddle_io_Dataset`. An instance of MNIST dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].size, sample[1])
mnist = MNIST()
print(len(mnist))
# 60000
for i in range(5): # only show first 5 images
img, label = mnist[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (28, 28) [5]
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[127.5],
std=[127.5],
),
]
)
mnist_test = MNIST(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(mnist_test))
# 10000
for img, label in itertools.islice(iter(mnist_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [1, 28, 28] [7]
""" """
NAME = 'mnist' NAME = 'mnist'
URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/' URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
...@@ -180,35 +211,67 @@ class MNIST(Dataset): ...@@ -180,35 +211,67 @@ class MNIST(Dataset):
class FashionMNIST(MNIST): class FashionMNIST(MNIST):
""" """
Implementation `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset. Implementation of `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
Args: Args:
image_path(str): path to image file, can be set None if image_path (str, optional): Path to image file, can be set None if
:attr:`download` is True. Default None :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
label_path(str): path to label file, can be set None if label_path (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default None :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
mode(str): 'train' or 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
download(bool): whether to download dataset automatically if transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
:attr:`image_path` :attr:`label_path` is not set. Default True download (bool, optional): Whether to download dataset automatically if
backend(str, optional): Specifies which type of image to be returned: :attr:`image_path` :attr:`label_path` is not set. Default: True.
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. backend (str, optional): Specifies which type of image to be returned:
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns: Returns:
Dataset: Fashion-MNIST Dataset. :ref:`api_paddle_io_Dataset`. An instance of FashionMNIST dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import FashionMNIST from paddle.vision.datasets import FashionMNIST
mnist = FashionMNIST(mode='test')
for i in range(len(mnist)): fashion_mnist = FashionMNIST()
sample = mnist[i] print(len(fashion_mnist))
print(sample[0].size, sample[1]) # 60000
for i in range(5): # only show first 5 images
img, label = fashion_mnist[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (28, 28) [9]
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[127.5],
std=[127.5],
),
]
)
fashion_mnist_test = FashionMNIST(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(fashion_mnist_test))
# 10000
for img, label in itertools.islice(iter(fashion_mnist_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [1, 28, 28] [9]
""" """
NAME = 'fashion-mnist' NAME = 'fashion-mnist'
......
...@@ -39,51 +39,69 @@ MODE_FLAG_MAP = {'train': 'trainval', 'test': 'train', 'valid': "val"} ...@@ -39,51 +39,69 @@ MODE_FLAG_MAP = {'train': 'trainval', 'test': 'train', 'valid': "val"}
class VOC2012(Dataset): class VOC2012(Dataset):
""" """
Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset.
To speed up the download, we put the data on https://dataset.bj.bcebos.com/voc/VOCtrainval_11-May-2012.tar.
Original data can get from http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar.
Args: Args:
data_file(str): path to data file, can be set None if data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/voc2012 :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/voc2012.
mode(str): 'train', 'valid' or 'test' mode. Default 'train'. mode (str, optional): Either train or test mode. Default 'train'.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
backend(str, optional): Specifies which type of image to be returned: download (bool, optional): Download dataset automatically if :attr:`data_file` is None. Default: True.
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}. backend (str, optional): Specifies which type of image to be returned:
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` , PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None. default backend is 'pil'. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of VOC2012 dataset.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import VOC2012 from paddle.vision.datasets import VOC2012
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, image, label):
return paddle.sum(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
voc2012 = VOC2012(mode='train', transform=normalize, backend='cv2')
for i in range(10):
image, label= voc2012[i]
image = paddle.cast(paddle.to_tensor(image), 'float32')
label = paddle.to_tensor(label)
model = SimpleNet()
image, label= model(image, label)
print(image.numpy().shape, label.numpy().shape)
voc2012 = VOC2012()
print(len(voc2012))
# 2913
for i in range(5): # only show first 5 images
img, label = voc2012[i]
# do something with img and label
print(type(img), img.size)
# <class 'PIL.JpegImagePlugin.JpegImageFile'> (500, 281)
print(type(label), label.size)
# <class 'PIL.PngImagePlugin.PngImageFile'> (500, 281)
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
voc2012_test = VOC2012(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(voc2012_test))
# 1464
for img, label in itertools.islice(iter(voc2012_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape)
# <class 'paddle.Tensor'> [3, 281, 500]
print(type(label), label.shape)
# <class 'numpy.ndarray'> (281, 500)
""" """
def __init__(self, def __init__(self,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册