未验证 提交 a37963b8 编写于 作者: M mls1999725 提交者: GitHub

Update APIs in text/datasets and dataloader (#29219)

* Update IterableDataset API

* Update TensorDataset API

* Update APIs in paddle/text/datasets

* Update dataset.py
上级 493568b0
......@@ -126,8 +126,8 @@ class IterableDataset(Dataset):
.. code-block:: python
import math
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.io import IterableDataset, DataLoader, get_worker_info
class SplitedIterableDataset(IterableDataset):
......@@ -151,17 +151,15 @@ class IterableDataset(Dataset):
for i in range(iter_start, iter_end):
yield np.array([i])
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
dataset = SplitedIterableDataset(start=2, end=9)
dataloader = DataLoader(
dataset,
places=place,
num_workers=2,
batch_size=1,
drop_last=True)
print(list(dataloader))
dataset = SplitedIterableDataset(start=2, end=9)
dataloader = DataLoader(
dataset,
num_workers=2,
batch_size=1,
drop_last=True)
for data in dataloader:
print(data)
# outputs: [2, 5, 3, 6, 4, 7]
Example 2: splitting data copy in each worker by :code:`worker_init_fn`
......@@ -169,8 +167,8 @@ class IterableDataset(Dataset):
.. code-block:: python
import math
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.io import IterableDataset, DataLoader, get_worker_info
class RangeIterableDataset(IterableDataset):
......@@ -182,33 +180,31 @@ class IterableDataset(Dataset):
for i in range(self.start, self.end):
yield np.array([i])
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
dataset = RangeIterableDataset(start=2, end=9)
dataset = RangeIterableDataset(start=2, end=9)
def worker_init_fn(worker_id):
worker_info = get_worker_info()
def worker_init_fn(worker_id):
worker_info = get_worker_info()
dataset = worker_info.dataset
start = dataset.start
end = dataset.end
num_per_worker = int(
math.ceil((end - start) / float(worker_info.num_workers)))
worker_id = worker_info.id
dataset.start = start + worker_id * num_per_worker
dataset.end = min(dataset.start + num_per_worker, end)
dataloader = DataLoader(
dataset,
places=place,
num_workers=2,
batch_size=1,
drop_last=True,
worker_init_fn=worker_init_fn)
print(list(dataloader))
# outputs: [2, 5, 3, 6, 4, 7]
dataset = worker_info.dataset
start = dataset.start
end = dataset.end
num_per_worker = int(
math.ceil((end - start) / float(worker_info.num_workers)))
worker_id = worker_info.id
dataset.start = start + worker_id * num_per_worker
dataset.end = min(dataset.start + num_per_worker, end)
dataloader = DataLoader(
dataset,
num_workers=2,
batch_size=1,
drop_last=True,
worker_init_fn=worker_init_fn)
for data in dataloader:
print(data)
# outputs: [2, 5, 3, 6, 4, 7]
"""
......@@ -250,7 +246,6 @@ class TensorDataset(Dataset):
import paddle
from paddle.io import TensorDataset
paddle.disable_static()
input_np = np.random.random([2, 3, 4]).astype('float32')
input = paddle.to_tensor(input_np)
......
......@@ -81,7 +81,6 @@ class Conll05st(Dataset):
def forward(self, pred_idx, mark, label):
return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)
paddle.disable_static()
conll05st = Conll05st()
......
......@@ -59,7 +59,6 @@ class Imdb(Dataset):
def forward(self, doc, label):
return paddle.sum(doc), label
paddle.disable_static()
imdb = Imdb(mode='train')
......
......@@ -59,7 +59,6 @@ class Imikolov(Dataset):
def forward(self, src, trg):
return paddle.sum(src), paddle.sum(trg)
paddle.disable_static()
imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2)
......
......@@ -116,7 +116,6 @@ class Movielens(Dataset):
def forward(self, category, title, rating):
return paddle.sum(category), paddle.sum(title), paddle.sum(rating)
paddle.disable_static()
movielens = Movielens(mode='train')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册