未验证 提交 163c6a9e 编写于 作者: W wuhuachaocoding 提交者: GitHub

update reader in sharding unit test. (#49652)

上级 a000e9b8
......@@ -54,14 +54,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -125,18 +129,15 @@ def train_mlp(
)
return
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
if sharding_stage == 2:
model.to(device="gpu")
......
......@@ -50,14 +50,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_multi_precision, opt_group=False):
......@@ -92,18 +96,15 @@ def train_mlp(
model=model, optimizer=optimizer, level=shard_level, scaler=scaler
)
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
......
......@@ -48,14 +48,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_multi_precision, opt_group=False):
......@@ -103,18 +107,15 @@ def train_mlp(
if shard_level == "os_g":
optimizer.set_lr(optimizer.get_lr())
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
......
......@@ -53,14 +53,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -122,18 +126,15 @@ def train_mlp(
)
return
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
if sharding_stage == 2:
model.to(device="gpu")
......
......@@ -53,14 +53,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -124,18 +128,15 @@ def train_mlp(
)
return
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
if sharding_stage == 2:
model.to(device="gpu")
......
......@@ -15,11 +15,7 @@
# limitations under the License.
import numpy as np
from dygraph_group_sharded_stage2 import (
MLP,
optimizer_setting,
reader_decorator,
)
from dygraph_group_sharded_stage2 import MLP, RandomDataset, optimizer_setting
import paddle
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import (
......@@ -53,18 +49,15 @@ def train_mlp(model, offload=False):
)
model = GroupShardedStage2(model, optimizer, buffer_max_size=2**21)
train_reader = paddle.batch(
reader_decorator(linear_size), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
......
......@@ -44,7 +44,7 @@ l2_decay = 1e-4
class MLP(paddle.nn.Layer):
def __init__(self, linear_size=1000, param_attr=None, bias_attr=None):
def __init__(self, linear_size=1024, param_attr=None, bias_attr=None):
super().__init__()
self._linear1 = Linear(linear_size, linear_size)
......@@ -102,14 +102,18 @@ class SpecialModel(paddle.nn.Layer):
return x
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1024):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -181,21 +185,16 @@ def train_mlp(
)
return
train_reader = paddle.batch(
reader_decorator(linear_size=linear_size),
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
for batch_id, data in enumerate(train_loader()):
......
......@@ -52,14 +52,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -103,18 +107,15 @@ def train_mlp(
segment_size=2**15,
)
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
......
......@@ -63,14 +63,18 @@ class MLP(fluid.Layer):
return y
def reader_decorator(linear_size=1000):
def __reader__():
for _ in range(100):
img = np.random.rand(linear_size).astype('float32')
label = np.ones(1).astype('int64')
yield img, label
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples=2000, linear_size=1000):
self.num_samples = num_samples
self.linear_size = linear_size
return __reader__
def __getitem__(self, idx):
img = np.random.rand(self.linear_size).astype('float32')
label = np.ones(1).astype('int64')
return img, label
def __len__(self):
return self.num_samples
def optimizer_setting(model, use_pure_fp16, opt_group=False):
......@@ -141,18 +145,15 @@ def train_mlp(
)
return
train_reader = paddle.batch(
reader_decorator(), batch_size=batch_size, drop_last=True
)
train_loader = paddle.io.DataLoader.from_generator(
capacity=32,
use_double_buffer=True,
iterable=True,
return_list=True,
use_multiprocess=True,
paddle.seed(2023)
np.random.seed(2023)
train_loader = paddle.io.DataLoader(
RandomDataset(),
batch_size=batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
train_loader.set_sample_list_generator(train_reader)
for eop in range(epoch):
model.train()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册