未验证 提交 a37963b8 编写于 作者: M mls1999725 提交者: GitHub

Update APIs in text/datasets and dataloader (#29219)

* Update IterableDataset API

* Update TensorDataset API

* Update APIs in paddle/text/datasets

* Update dataset.py
上级 493568b0
...@@ -126,8 +126,8 @@ class IterableDataset(Dataset): ...@@ -126,8 +126,8 @@ class IterableDataset(Dataset):
.. code-block:: python .. code-block:: python
import math import math
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid
from paddle.io import IterableDataset, DataLoader, get_worker_info from paddle.io import IterableDataset, DataLoader, get_worker_info
class SplitedIterableDataset(IterableDataset): class SplitedIterableDataset(IterableDataset):
...@@ -151,17 +151,15 @@ class IterableDataset(Dataset): ...@@ -151,17 +151,15 @@ class IterableDataset(Dataset):
for i in range(iter_start, iter_end): for i in range(iter_start, iter_end):
yield np.array([i]) yield np.array([i])
place = fluid.CPUPlace() dataset = SplitedIterableDataset(start=2, end=9)
with fluid.dygraph.guard(place): dataloader = DataLoader(
dataset = SplitedIterableDataset(start=2, end=9) dataset,
dataloader = DataLoader( num_workers=2,
dataset, batch_size=1,
places=place, drop_last=True)
num_workers=2,
batch_size=1, for data in dataloader:
drop_last=True) print(data)
print(list(dataloader))
# outputs: [2, 5, 3, 6, 4, 7] # outputs: [2, 5, 3, 6, 4, 7]
Example 2: splitting data copy in each worker by :code:`worker_init_fn` Example 2: splitting data copy in each worker by :code:`worker_init_fn`
...@@ -169,8 +167,8 @@ class IterableDataset(Dataset): ...@@ -169,8 +167,8 @@ class IterableDataset(Dataset):
.. code-block:: python .. code-block:: python
import math import math
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid
from paddle.io import IterableDataset, DataLoader, get_worker_info from paddle.io import IterableDataset, DataLoader, get_worker_info
class RangeIterableDataset(IterableDataset): class RangeIterableDataset(IterableDataset):
...@@ -182,33 +180,31 @@ class IterableDataset(Dataset): ...@@ -182,33 +180,31 @@ class IterableDataset(Dataset):
for i in range(self.start, self.end): for i in range(self.start, self.end):
yield np.array([i]) yield np.array([i])
place = fluid.CPUPlace() dataset = RangeIterableDataset(start=2, end=9)
with fluid.dygraph.guard(place):
dataset = RangeIterableDataset(start=2, end=9)
def worker_init_fn(worker_id): def worker_init_fn(worker_id):
worker_info = get_worker_info() worker_info = get_worker_info()
dataset = worker_info.dataset dataset = worker_info.dataset
start = dataset.start start = dataset.start
end = dataset.end end = dataset.end
num_per_worker = int( num_per_worker = int(
math.ceil((end - start) / float(worker_info.num_workers))) math.ceil((end - start) / float(worker_info.num_workers)))
worker_id = worker_info.id worker_id = worker_info.id
dataset.start = start + worker_id * num_per_worker dataset.start = start + worker_id * num_per_worker
dataset.end = min(dataset.start + num_per_worker, end) dataset.end = min(dataset.start + num_per_worker, end)
dataloader = DataLoader( dataloader = DataLoader(
dataset, dataset,
places=place, num_workers=2,
num_workers=2, batch_size=1,
batch_size=1, drop_last=True,
drop_last=True, worker_init_fn=worker_init_fn)
worker_init_fn=worker_init_fn)
for data in dataloader:
print(list(dataloader)) print(data)
# outputs: [2, 5, 3, 6, 4, 7] # outputs: [2, 5, 3, 6, 4, 7]
""" """
...@@ -250,7 +246,6 @@ class TensorDataset(Dataset): ...@@ -250,7 +246,6 @@ class TensorDataset(Dataset):
import paddle import paddle
from paddle.io import TensorDataset from paddle.io import TensorDataset
paddle.disable_static()
input_np = np.random.random([2, 3, 4]).astype('float32') input_np = np.random.random([2, 3, 4]).astype('float32')
input = paddle.to_tensor(input_np) input = paddle.to_tensor(input_np)
......
...@@ -81,7 +81,6 @@ class Conll05st(Dataset): ...@@ -81,7 +81,6 @@ class Conll05st(Dataset):
def forward(self, pred_idx, mark, label): def forward(self, pred_idx, mark, label):
return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)
paddle.disable_static()
conll05st = Conll05st() conll05st = Conll05st()
......
...@@ -59,7 +59,6 @@ class Imdb(Dataset): ...@@ -59,7 +59,6 @@ class Imdb(Dataset):
def forward(self, doc, label): def forward(self, doc, label):
return paddle.sum(doc), label return paddle.sum(doc), label
paddle.disable_static()
imdb = Imdb(mode='train') imdb = Imdb(mode='train')
......
...@@ -59,7 +59,6 @@ class Imikolov(Dataset): ...@@ -59,7 +59,6 @@ class Imikolov(Dataset):
def forward(self, src, trg): def forward(self, src, trg):
return paddle.sum(src), paddle.sum(trg) return paddle.sum(src), paddle.sum(trg)
paddle.disable_static()
imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2)
......
...@@ -116,7 +116,6 @@ class Movielens(Dataset): ...@@ -116,7 +116,6 @@ class Movielens(Dataset):
def forward(self, category, title, rating): def forward(self, category, title, rating):
return paddle.sum(category), paddle.sum(title), paddle.sum(rating) return paddle.sum(category), paddle.sum(title), paddle.sum(rating)
paddle.disable_static()
movielens = Movielens(mode='train') movielens = Movielens(mode='train')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册