未验证 提交 e2e0d296 编写于 作者: J JYChen 提交者: GitHub

remove old dataloader & generator from quantilization (#55754)

* remove old dataloader & generator from quantilization

* fix ut test_post_training_quantization_mnist
上级 3e2c6a56
......@@ -23,12 +23,10 @@ try:
except:
from .utils import tqdm
from inspect import isgeneratorfunction
from paddle.fluid.framework import IrGraph, _get_var
from ... import io, static
from ...fluid import reader
from ...framework import core
from ...utils import unique_name
from ..log_helper import get_logger
......@@ -171,16 +169,16 @@ class PostTrainingQuantization:
When all parameters were saved in a single binary file, set it
as the real filename. If parameters were saved in separate files,
set it as 'None'. Default is 'None'.
batch_generator(Python Generator): The batch generator provides
batch_generator(Python Generator, depreceated): The batch generator provides
calibrate data for DataLoader, and it returns a batch every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, batch_generator supports lod tensor.
sample_generator(Python Generator): The sample generator provides
sample_generator(Python Generator, depreceated): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, sample_generator dose not support lod tensor.
data_loader(Python Generator, Paddle.io.DataLoader, optional): The
Generator or Dataloader provides calibrate data, and it could
data_loader(Paddle.io.DataLoader): The
Dataloader provides calibrate data, and it could
return a batch every time.
batch_size(int, optional): The batch size of DataLoader. Default is 10.
batch_nums(int, optional): If batch_nums is not None, the number of
......@@ -309,22 +307,12 @@ class PostTrainingQuantization:
# Check inputs
assert executor is not None, "The executor cannot be None."
assert any(
[gen is not None]
for gen in [sample_generator, batch_generator, data_loader]
), (
"The sample_generator, batch_generator "
"and data_loader cannot be None in the same time."
)
if data_loader is not None:
assert isinstance(
data_loader,
(
io.DataLoader,
type(isgeneratorfunction),
reader.GeneratorLoader,
),
), "data_loader only accepts `paddle.io.DataLoader` or Generator instance."
assert data_loader is not None, "data_loader cannot be None."
assert isinstance(
data_loader, io.DataLoader
), "data_loader only accepts `paddle.io.DataLoader`."
assert batch_size > 0, "The batch_size should be greater than 0."
assert (
algo in self._support_algo_type
......@@ -615,29 +603,8 @@ class PostTrainingQuantization:
for var_name in self._feed_list
]
if self._data_loader is not None:
self._batch_nums = (
self._batch_nums if self._batch_nums else len(self._data_loader)
)
return
self._data_loader = reader.DataLoader.from_generator(
feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True
)
if self._sample_generator is not None:
self._data_loader.set_sample_generator(
self._sample_generator,
batch_size=self._batch_size,
drop_last=True,
places=self._place,
)
elif self._batch_generator is not None:
self._data_loader.set_batch_generator(
self._batch_generator, places=self._place
)
self._batch_nums = (
self._batch_nums
if self._batch_nums
else len(list(self._data_loader))
self._batch_nums if self._batch_nums else len(self._data_loader)
)
def _optimize_fp32_model(self):
......
......@@ -30,6 +30,23 @@ random.seed(0)
np.random.seed(0)
class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data
def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"img": batch}
def __len__(self):
return len(self.mnist_data)
class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.root_path = tempfile.TemporaryDirectory()
......@@ -217,14 +234,27 @@ class TestPostTrainingQuantization(unittest.TestCase):
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()
train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)
ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename=model_filename,
params_filename=params_filename,
sample_generator=val_reader,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
......
......@@ -29,6 +29,23 @@ random.seed(0)
np.random.seed(0)
class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data
def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"x": batch}
def __len__(self):
return len(self.mnist_data)
class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.download_path = 'int8/download'
......@@ -132,28 +149,30 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_optimize_model=False,
batch_size=10,
batch_nums=10,
is_data_loader=False,
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()
def val_data_generator():
batches = []
for data in val_reader():
batches.append(data[0].reshape(1, 28, 28))
if len(batches) == batch_size:
batches = np.asarray(batches)
yield {"x": batches}
batches = []
train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)
ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename='model.pdmodel',
params_filename='model.pdiparams',
sample_generator=val_reader if not is_data_loader else None,
data_loader=val_data_generator if is_data_loader else None,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
......@@ -183,7 +202,6 @@ class TestPostTrainingQuantization(unittest.TestCase):
batch_size=10,
infer_iterations=10,
quant_iterations=5,
is_data_loader=False,
):
origin_model_path = self.download_model(data_url, data_md5, model_name)
......@@ -210,7 +228,6 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_optimize_model,
batch_size,
quant_iterations,
is_data_loader=is_data_loader,
)
print(
......@@ -442,7 +459,6 @@ class TestPostTrainingAbsMaxForWhile(TestPostTrainingQuantization):
batch_size,
infer_iterations,
quant_iterations,
is_data_loader=True,
)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册