未验证 提交 95b18683 编写于 作者: K Kaipeng Deng 提交者: GitHub

update DataLoader doc (#28290)

* update DataLoader doc. test=develop
上级 71d62207
......@@ -153,18 +153,22 @@ class DataLoader(object):
multi-process workers will be used to load data asynchronously if
:attr:`num_workers` is set as a positive number.
DataLoader only supports map-style dataset(can get a sample from
dataset with a given index) currently, for a map-style dataset,
please see :code:`paddle.io.Dataset`.
DataLoader supports map-style dataset and iterable-style dataset.
batch_sampler please see :code:`paddle.io.BatchSampler`
For map-style datast(can get a sample from dataset with a given
index), please see :code:`paddle.io.Dataset`.
For iterable-style datast(get samples from dataset iteratively,
like a Python iterator), please see :code:`paddle.io.IterableDataset`.
For :code:`batch_sampler` please see :code:`paddle.io.BatchSampler`
Args:
dataset(Dataset): the dataset to load data from, should be an
instance of subclass of :code:`paddle.io.Dataset` or
:code:`paddle.io.IterableDataset`.
feed_list (list(Tensor)|tuple(Tensor)): feed variable list.
The variables should be created by :code:`fluid.data()`.
The variables should be created by :code:`paddle.static.data()`.
:attr:`feed_list` must be set if :attr:`return_list` is
False. Default None.
places(list(Place)|tuple(Place)|optional): a list of Place,
......@@ -173,10 +177,10 @@ class DataLoader(object):
will be used. Default None.
return_list (bool): whether the return value on each device is
presented as a list. If :attr:`return_list=False`, the return
value on each device would be a dict of str -> LoDTensor, where
value on each device would be a dict of str -> Tensor, where
the key of the dict is the name of each fed variables. If
:attr:`return_list=True`, the return value on each device would
be a list(LoDTensor). :attr:`return_list` can only be True
be a list(Tensor). :attr:`return_list` can only be True
in dynamic graph mode. Default False.
batch_sampler(BatchSampler): an instance of `paddle.io.BatchSampler`
to generate batch indices to draw samples from :attr:`dataset`
......@@ -224,7 +228,8 @@ class DataLoader(object):
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.io import Dataset, BatchSampler, DataLoader
BATCH_NUM = 20
......@@ -234,8 +239,6 @@ class DataLoader(object):
IMAGE_SIZE = 784
CLASS_NUM = 10
USE_GPU = False # whether use GPU to run model
# define a random dataset
class RandomDataset(Dataset):
def __init__(self, num_samples):
......@@ -251,78 +254,34 @@ class DataLoader(object):
dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
# get places
places = fluid.cuda_places() if USE_GPU else fluid.cpu_places()
# --------------------- dygraph mode --------------------
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = fluid.dygraph.nn.Linear(IMAGE_SIZE, CLASS_NUM, act='softmax')
self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
def forward(self, image, label=None):
return self.fc(image)
with fluid.dygraph.guard(places[0]):
simple_net = SimpleNet()
opt = fluid.optimizer.SGD(learning_rate=1e-3,
parameter_list=simple_net.parameters())
loader = DataLoader(dataset,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
for e in range(EPOCH_NUM):
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.reduce_mean(loss)
avg_loss.backward()
opt.minimize(avg_loss)
simple_net.clear_gradients()
print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))
# -------------------------------------------------------
# -------------------- static graph ---------------------
paddle.enable_static()
def simple_net(image, label):
fc_tmp = fluid.layers.fc(image, size=CLASS_NUM, act='softmax')
cross_entropy = fluid.layers.softmax_with_cross_entropy(image, label)
loss = fluid.layers.reduce_mean(cross_entropy)
sgd = fluid.optimizer.SGD(learning_rate=1e-3)
sgd.minimize(loss)
return loss
image = fluid.data(name='image', shape=[None, IMAGE_SIZE], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
loss = simple_net(image, label)
exe = fluid.Executor(places[0])
exe.run(fluid.default_startup_program())
prog = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(loss_name=loss.name)
simple_net = SimpleNet()
opt = paddle.optimizer.SGD(learning_rate=1e-3,
parameters=simple_net.parameters())
loader = DataLoader(dataset,
feed_list=[image, label],
batch_size=BATCH_SIZE,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
for e in range(EPOCH_NUM):
for i, data in enumerate(loader()):
l = exe.run(prog, feed=data, fetch_list=[loss], return_numpy=True)
print("Epoch {} batch {}: loss = {}".format(e, i, l[0][0]))
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = F.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
opt.minimize(avg_loss)
simple_net.clear_gradients()
print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))
# -------------------------------------------------------
.. note::
For reading iterable dataset with multiprocess Dataloader,
......@@ -439,6 +398,10 @@ class DataLoader(object):
use_multiprocess=False,
drop_last=True):
"""
.. warning::
This API will be deprecated in the future, it is recommended to use
:code:`paddle.io.DataLoader` which supports multi-processes acceleration.
.. note::
**The framework ensures that the data loading order of DataLoader is exactly the same as the user-defined data source.**
......@@ -684,6 +647,10 @@ class DataLoader(object):
@staticmethod
def from_dataset(dataset, places, drop_last=True):
"""
.. warning::
This API will be deprecated in the future, it is recommended to use
:code:`paddle.io.DataLoader` which supports multi-processes acceleration.
Create an iterable DataLoader object for loading data from Dataset.
Dataset is only supported in Linux system currently.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册