未验证 提交 1fca8f33 编写于 作者: N Nyakku Shigure 提交者: GitHub

fix paddle.vision.datasets.* en docs (#43649)

* rewrite all code examples, test=document_fix

* refine arguments, test=document_fix

* fix desc format error, test=document_fix

* capitalize the first letter, test=document_fix

* refine api desc, test=document_fix

* fix wrong COPY-FROM label in Model docs, test=document_fix

* refine returns, test=document_fix

* refine returns, test=document_fix

* add a blank line in code block, test=document_fix
上级 fdcb57fb
......@@ -1076,7 +1076,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-train-batch
import paddle
import paddle.nn as nn
......@@ -1128,7 +1127,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-eval-batch
import paddle
import paddle.nn as nn
......@@ -1176,7 +1174,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-predict-batch
import paddle
import paddle.nn as nn
......@@ -1236,7 +1233,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-save
import paddle
import paddle.nn as nn
......@@ -1317,7 +1313,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-load
import paddle
import paddle.nn as nn
......@@ -1404,7 +1399,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-parameters
import paddle
import paddle.nn as nn
......@@ -1648,7 +1642,7 @@ class Model(object):
How to make a batch is done internally.
.. code-block:: python
:name: code-example-fit-1
:name: code-example1
import paddle
import paddle.vision.transforms as T
......@@ -1688,7 +1682,7 @@ class Model(object):
DataLoader.
.. code-block:: python
:name: code-example-fit-2
:name: code-example2
import paddle
import paddle.vision.transforms as T
......@@ -1844,7 +1838,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-evaluate
import paddle
import paddle.vision.transforms as T
......@@ -1946,7 +1939,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-predict
import numpy as np
import paddle
......@@ -2179,7 +2171,6 @@ class Model(object):
Examples:
.. code-block:: python
:name: code-example-summary
import paddle
from paddle.static import InputSpec
......
......@@ -46,54 +46,63 @@ class Cifar10(Dataset):
dataset, which has 10 categories.
Args:
data_file(str): path to data file, can be set None if
data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar
mode(str): 'train', 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
Dataset: instance of cifar-10 dataset
:ref:`api_paddle_io_Dataset`. An instance of Cifar10 dataset.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import Cifar10
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (1, -1))
return self.fc(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
cifar10 = Cifar10(mode='train', transform=normalize)
for i in range(10):
image, label = cifar10[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
cifar10 = Cifar10()
print(len(cifar10))
# 50000
for i in range(5): # only show first 5 images
img, label = cifar10[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 6
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
cifar10_test = Cifar10(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(cifar10_test))
# 10000
for img, label in itertools.islice(iter(cifar10_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 3
"""
def __init__(self,
......@@ -179,54 +188,63 @@ class Cifar100(Cifar10):
dataset, which has 100 categories.
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/cifar
mode(str): 'train', 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
data_file (str, optional): path to data file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/cifar
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
Dataset: instance of cifar-100 dataset
:ref:`api_paddle_io_Dataset`. An instance of Cifar100 dataset.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import Cifar100
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (1, -1))
return self.fc(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
cifar100 = Cifar100(mode='train', transform=normalize)
for i in range(10):
image, label = cifar100[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
cifar100 = Cifar100()
print(len(cifar100))
# 50000
for i in range(5): # only show first 5 images
img, label = cifar100[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 19
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
cifar100_test = Cifar100(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(cifar100_test))
# 10000
for img, label in itertools.islice(iter(cifar100_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 49
"""
def __init__(self,
......
......@@ -42,36 +42,71 @@ MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': 'valid'}
class Flowers(Dataset):
"""
Implementation of `Flowers <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
dataset
Implementation of `Flowers102 <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
dataset.
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/
label_file(str): path to label file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/flowers/
setid_file(str): path to subset index file, can be set
None if :attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for no transform.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
label_file (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
setid_file (str, optional): Path to subset index file, can be set
None if :attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/flowers/.
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): transform to perform on image, None for no transform. Default: None.
download (bool, optional): download dataset automatically if :attr:`data_file` is None. Default: True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of Flowers dataset.
Examples:
.. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].size, sample[1])
flowers = Flowers()
print(len(flowers))
# 6149
for i in range(5): # only show first 5 images
img, label = flowers[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.JpegImagePlugin.JpegImageFile'> (523, 500) [1]
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
flowers_test = Flowers(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(flowers_test))
# 1020
for img, label in itertools.islice(iter(flowers_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 96] [1]
"""
def __init__(self,
......
......@@ -65,6 +65,8 @@ def make_dataset(dir, class_to_idx, extensions, is_valid_file=None):
class DatasetFolder(Dataset):
"""A generic data loader where the samples are arranged in this way:
.. code-block:: text
root/class_a/1.ext
root/class_a/2.ext
root/class_a/3.ext
......@@ -74,55 +76,127 @@ class DatasetFolder(Dataset):
root/class_b/789.ext
Args:
root (string): Root directory path.
loader (callable|optional): A function to load a sample given its path.
extensions (list[str]|tuple[str]|optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable|optional): A function/transform that takes in
a sample and returns a transformed version.
is_valid_file (callable|optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
classes (list): List of the class names.
class_to_idx (dict): Dict with items (class_name, class_index).
samples (list): List of (sample path, class_index) tuples
targets (list): The class_index value for each image in the dataset
root (str): Root directory path.
loader (Callable, optional): A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str], optional): A list of allowed extensions.
Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
'.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
transform (Callable, optional): A function/transform that takes in
a sample and returns a transformed version. Default: None.
is_valid_file (Callable, optional): A function that takes path of a file
and check if the file is a valid file. Both :attr:`extensions` and
:attr:`is_valid_file` should not be passed. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of DatasetFolder.
Attributes:
classes (list[str]): List of the class names.
class_to_idx (dict[str, int]): Dict with items (class_name, class_index).
samples (list[tuple[str, int]]): List of (sample_path, class_index) tuples.
targets (list[int]): The class_index value for each image in the dataset.
Example:
.. code-block:: python
import os
import cv2
import tempfile
import shutil
import tempfile
import cv2
import numpy as np
import paddle.vision.transforms as T
from pathlib import Path
from paddle.vision.datasets import DatasetFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp()
for i in range(2):
sub_dir = os.path.join(data_dir, 'class_' + str(i))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
for j in range(2):
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
return data_dir
temp_dir = make_fake_dir()
# temp_dir is root dir
# temp_dir/class_1/img1_1.jpg
# temp_dir/class_2/img2_1.jpg
data_folder = DatasetFolder(temp_dir)
for items in data_folder:
break
shutil.rmtree(temp_dir)
def make_fake_file(img_path: str):
if img_path.endswith((".jpg", ".png", ".jpeg")):
fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
cv2.imwrite(img_path, fake_img)
elif img_path.endswith(".txt"):
with open(img_path, "w") as f:
f.write("This is a fake file.")
def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
root = Path(root)
root.mkdir(parents=True, exist_ok=True)
for subpath in directory_hierarchy:
if isinstance(subpath, str):
filepath = root / subpath
file_maker(str(filepath))
else:
dirname = list(subpath.keys())[0]
make_directory(root / dirname, subpath[dirname])
directory_hirerarchy = [
{"class_0": [
"abc.jpg",
"def.png"]},
{"class_1": [
"ghi.jpeg",
"jkl.png",
{"mno": [
"pqr.jpeg",
"stu.jpg"]}]},
"this_will_be_ignored.txt",
]
# You can replace this with any directory to explore the structure
# of generated data. e.g. fake_data_dir = "./temp_dir"
fake_data_dir = tempfile.mkdtemp()
make_directory(fake_data_dir, directory_hirerarchy)
data_folder_1 = DatasetFolder(fake_data_dir)
print(data_folder_1.classes)
# ['class_0', 'class_1']
print(data_folder_1.class_to_idx)
# {'class_0': 0, 'class_1': 1}
print(data_folder_1.samples)
# [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0),
# ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1),
# ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)]
print(data_folder_1.targets)
# [0, 0, 1, 1, 1, 1]
print(len(data_folder_1))
# 6
for i in range(len(data_folder_1)):
img, label = data_folder_1[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (32, 32) 0
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
data_folder_2 = DatasetFolder(
fake_data_dir,
loader=lambda x: cv2.imread(x), # load image with OpenCV
extensions=(".jpg",), # only load *.jpg files
transform=transform, # apply transform to every image
)
print([img_path for img_path, label in data_folder_2.samples])
# ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg']
print(len(data_folder_2))
# 2
for img, label in iter(data_folder_2):
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [3, 64, 64] 0
shutil.rmtree(fake_data_dir)
"""
def __init__(self,
......@@ -223,54 +297,121 @@ def default_loader(path):
class ImageFolder(Dataset):
"""A generic data loader where the samples are arranged in this way:
.. code-block:: text
root/1.ext
root/2.ext
root/sub_dir/3.ext
Args:
root (string): Root directory path.
loader (callable, optional): A function to load a sample given its path.
root (str): Root directory path.
loader (Callable, optional): A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str], optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in
a sample and returns a transformed version.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
'.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
transform (Callable, optional): A function/transform that takes in
a sample and returns a transformed version. Default: None.
is_valid_file (Callable, optional): A function that takes path of a file
and check if the file is a valid file. Both :attr:`extensions` and
:attr:`is_valid_file` should not be passed. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of ImageFolder.
Attributes:
samples (list): List of sample path
Attributes:
samples (list[str]): List of sample path.
Example:
.. code-block:: python
import os
import cv2
import tempfile
import shutil
import tempfile
import cv2
import numpy as np
import paddle.vision.transforms as T
from pathlib import Path
from paddle.vision.datasets import ImageFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp()
for i in range(2):
sub_dir = os.path.join(data_dir, 'class_' + str(i))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
for j in range(2):
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
return data_dir
temp_dir = make_fake_dir()
data_folder = ImageFolder(temp_dir)
for items in data_folder:
break
shutil.rmtree(temp_dir)
def make_fake_file(img_path: str):
if img_path.endswith((".jpg", ".png", ".jpeg")):
fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
cv2.imwrite(img_path, fake_img)
elif img_path.endswith(".txt"):
with open(img_path, "w") as f:
f.write("This is a fake file.")
def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
root = Path(root)
root.mkdir(parents=True, exist_ok=True)
for subpath in directory_hierarchy:
if isinstance(subpath, str):
filepath = root / subpath
file_maker(str(filepath))
else:
dirname = list(subpath.keys())[0]
make_directory(root / dirname, subpath[dirname])
directory_hirerarchy = [
"abc.jpg",
"def.png",
{"ghi": [
"jkl.jpeg",
{"mno": [
"pqr.jpg"]}]},
"this_will_be_ignored.txt",
]
# You can replace this with any directory to explore the structure
# of generated data. e.g. fake_data_dir = "./temp_dir"
fake_data_dir = tempfile.mkdtemp()
make_directory(fake_data_dir, directory_hirerarchy)
image_folder_1 = ImageFolder(fake_data_dir)
print(image_folder_1.samples)
# ['./temp_dir/abc.jpg', './temp_dir/def.png',
# './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_1))
# 4
for i in range(len(image_folder_1)):
(img,) = image_folder_1[i]
# do something with img
print(type(img), img.size)
# <class 'PIL.Image.Image'> (32, 32)
transform = T.Compose(
[
T.Resize(64),
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
image_folder_2 = ImageFolder(
fake_data_dir,
loader=lambda x: cv2.imread(x), # load image with OpenCV
extensions=(".jpg",), # only load *.jpg files
transform=transform, # apply transform to every image
)
print(image_folder_2.samples)
# ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_2))
# 2
for (img,) in iter(image_folder_2):
# do something with img
print(type(img), img.shape)
# <class 'paddle.Tensor'> [3, 64, 64]
shutil.rmtree(fake_data_dir)
"""
def __init__(self,
......
......@@ -29,36 +29,67 @@ __all__ = []
class MNIST(Dataset):
"""
Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset
Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset.
Args:
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/mnist
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): download dataset automatically if
:attr:`image_path` :attr:`label_path` is not set. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
image_path (str, optional): Path to image file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
label_path (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/mnist.
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
download (bool, optional): Download dataset automatically if
:attr:`image_path` :attr:`label_path` is not set. Default: True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
Dataset: MNIST Dataset.
:ref:`api_paddle_io_Dataset`. An instance of MNIST dataset.
Examples:
.. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].size, sample[1])
mnist = MNIST()
print(len(mnist))
# 60000
for i in range(5): # only show first 5 images
img, label = mnist[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (28, 28) [5]
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[127.5],
std=[127.5],
),
]
)
mnist_test = MNIST(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(mnist_test))
# 10000
for img, label in itertools.islice(iter(mnist_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [1, 28, 28] [7]
"""
NAME = 'mnist'
URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
......@@ -180,35 +211,67 @@ class MNIST(Dataset):
class FashionMNIST(MNIST):
"""
Implementation `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
Implementation of `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
Args:
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether to download dataset automatically if
:attr:`image_path` :attr:`label_path` is not set. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
image_path (str, optional): Path to image file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
label_path (str, optional): Path to label file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/fashion-mnist.
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
download (bool, optional): Whether to download dataset automatically if
:attr:`image_path` :attr:`label_path` is not set. Default: True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
Dataset: Fashion-MNIST Dataset.
:ref:`api_paddle_io_Dataset`. An instance of FashionMNIST dataset.
Examples:
.. code-block:: python
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import FashionMNIST
mnist = FashionMNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].size, sample[1])
fashion_mnist = FashionMNIST()
print(len(fashion_mnist))
# 60000
for i in range(5): # only show first 5 images
img, label = fashion_mnist[i]
# do something with img and label
print(type(img), img.size, label)
# <class 'PIL.Image.Image'> (28, 28) [9]
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[127.5],
std=[127.5],
),
]
)
fashion_mnist_test = FashionMNIST(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(fashion_mnist_test))
# 10000
for img, label in itertools.islice(iter(fashion_mnist_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape, label)
# <class 'paddle.Tensor'> [1, 28, 28] [9]
"""
NAME = 'fashion-mnist'
......
......@@ -39,51 +39,69 @@ MODE_FLAG_MAP = {'train': 'trainval', 'test': 'train', 'valid': "val"}
class VOC2012(Dataset):
"""
Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset
To speed up the download, we put the data on https://dataset.bj.bcebos.com/voc/VOCtrainval_11-May-2012.tar.
Original data can get from http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar.
Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset.
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None, default data path: ~/.cache/paddle/dataset/voc2012
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
download(bool): download dataset automatically if :attr:`data_file` is None. Default True
backend(str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from ``paddle.vsion.get_image_backend`` ,
data_file (str, optional): Path to data file, can be set None if
:attr:`download` is True. Default: None, default data path: ~/.cache/paddle/dataset/voc2012.
mode (str, optional): Either train or test mode. Default 'train'.
transform (Callable, optional): Transform to perform on image, None for no transform. Default: None.
download (bool, optional): Download dataset automatically if :attr:`data_file` is None. Default: True.
backend (str, optional): Specifies which type of image to be returned:
PIL.Image or numpy.ndarray. Should be one of {'pil', 'cv2'}.
If this option is not set, will get backend from :ref:`paddle.vision.get_image_backend <api_vision_image_get_image_backend>`,
default backend is 'pil'. Default: None.
Returns:
:ref:`api_paddle_io_Dataset`. An instance of VOC2012 dataset.
Examples:
.. code-block:: python
import paddle
import itertools
import paddle.vision.transforms as T
from paddle.vision.datasets import VOC2012
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, image, label):
return paddle.sum(image), label
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
voc2012 = VOC2012(mode='train', transform=normalize, backend='cv2')
for i in range(10):
image, label= voc2012[i]
image = paddle.cast(paddle.to_tensor(image), 'float32')
label = paddle.to_tensor(label)
model = SimpleNet()
image, label= model(image, label)
print(image.numpy().shape, label.numpy().shape)
voc2012 = VOC2012()
print(len(voc2012))
# 2913
for i in range(5): # only show first 5 images
img, label = voc2012[i]
# do something with img and label
print(type(img), img.size)
# <class 'PIL.JpegImagePlugin.JpegImageFile'> (500, 281)
print(type(label), label.size)
# <class 'PIL.PngImagePlugin.PngImageFile'> (500, 281)
transform = T.Compose(
[
T.ToTensor(),
T.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
to_rgb=True,
),
]
)
voc2012_test = VOC2012(
mode="test",
transform=transform, # apply transform to every image
backend="cv2", # use OpenCV as image transform backend
)
print(len(voc2012_test))
# 1464
for img, label in itertools.islice(iter(voc2012_test), 5): # only show first 5 images
# do something with img and label
print(type(img), img.shape)
# <class 'paddle.Tensor'> [3, 281, 500]
print(type(label), label.shape)
# <class 'numpy.ndarray'> (281, 500)
"""
def __init__(self,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册