未验证 提交 e2e0d296 编写于 作者: J JYChen 提交者: GitHub

remove old dataloader & generator from quantilization (#55754)

* remove old dataloader & generator from quantilization

* fix ut test_post_training_quantization_mnist
上级 3e2c6a56
...@@ -23,12 +23,10 @@ try: ...@@ -23,12 +23,10 @@ try:
except: except:
from .utils import tqdm from .utils import tqdm
from inspect import isgeneratorfunction
from paddle.fluid.framework import IrGraph, _get_var from paddle.fluid.framework import IrGraph, _get_var
from ... import io, static from ... import io, static
from ...fluid import reader
from ...framework import core from ...framework import core
from ...utils import unique_name from ...utils import unique_name
from ..log_helper import get_logger from ..log_helper import get_logger
...@@ -171,16 +169,16 @@ class PostTrainingQuantization: ...@@ -171,16 +169,16 @@ class PostTrainingQuantization:
When all parameters were saved in a single binary file, set it When all parameters were saved in a single binary file, set it
as the real filename. If parameters were saved in separate files, as the real filename. If parameters were saved in separate files,
set it as 'None'. Default is 'None'. set it as 'None'. Default is 'None'.
batch_generator(Python Generator): The batch generator provides batch_generator(Python Generator, depreceated): The batch generator provides
calibrate data for DataLoader, and it returns a batch every calibrate data for DataLoader, and it returns a batch every
time. Note that, sample_generator and batch_generator, only one time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, batch_generator supports lod tensor. should be set. Beisdes, batch_generator supports lod tensor.
sample_generator(Python Generator): The sample generator provides sample_generator(Python Generator, depreceated): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every calibrate data for DataLoader, and it only returns a sample every
time. Note that, sample_generator and batch_generator, only one time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, sample_generator dose not support lod tensor. should be set. Beisdes, sample_generator dose not support lod tensor.
data_loader(Python Generator, Paddle.io.DataLoader, optional): The data_loader(Paddle.io.DataLoader): The
Generator or Dataloader provides calibrate data, and it could Dataloader provides calibrate data, and it could
return a batch every time. return a batch every time.
batch_size(int, optional): The batch size of DataLoader. Default is 10. batch_size(int, optional): The batch size of DataLoader. Default is 10.
batch_nums(int, optional): If batch_nums is not None, the number of batch_nums(int, optional): If batch_nums is not None, the number of
...@@ -309,22 +307,12 @@ class PostTrainingQuantization: ...@@ -309,22 +307,12 @@ class PostTrainingQuantization:
# Check inputs # Check inputs
assert executor is not None, "The executor cannot be None." assert executor is not None, "The executor cannot be None."
assert any( assert data_loader is not None, "data_loader cannot be None."
[gen is not None]
for gen in [sample_generator, batch_generator, data_loader] assert isinstance(
), ( data_loader, io.DataLoader
"The sample_generator, batch_generator " ), "data_loader only accepts `paddle.io.DataLoader`."
"and data_loader cannot be None in the same time."
)
if data_loader is not None:
assert isinstance(
data_loader,
(
io.DataLoader,
type(isgeneratorfunction),
reader.GeneratorLoader,
),
), "data_loader only accepts `paddle.io.DataLoader` or Generator instance."
assert batch_size > 0, "The batch_size should be greater than 0." assert batch_size > 0, "The batch_size should be greater than 0."
assert ( assert (
algo in self._support_algo_type algo in self._support_algo_type
...@@ -615,29 +603,8 @@ class PostTrainingQuantization: ...@@ -615,29 +603,8 @@ class PostTrainingQuantization:
for var_name in self._feed_list for var_name in self._feed_list
] ]
if self._data_loader is not None:
self._batch_nums = (
self._batch_nums if self._batch_nums else len(self._data_loader)
)
return
self._data_loader = reader.DataLoader.from_generator(
feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True
)
if self._sample_generator is not None:
self._data_loader.set_sample_generator(
self._sample_generator,
batch_size=self._batch_size,
drop_last=True,
places=self._place,
)
elif self._batch_generator is not None:
self._data_loader.set_batch_generator(
self._batch_generator, places=self._place
)
self._batch_nums = ( self._batch_nums = (
self._batch_nums self._batch_nums if self._batch_nums else len(self._data_loader)
if self._batch_nums
else len(list(self._data_loader))
) )
def _optimize_fp32_model(self): def _optimize_fp32_model(self):
......
...@@ -30,6 +30,23 @@ random.seed(0) ...@@ -30,6 +30,23 @@ random.seed(0)
np.random.seed(0) np.random.seed(0)
class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data
def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"img": batch}
def __len__(self):
return len(self.mnist_data)
class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self): def setUp(self):
self.root_path = tempfile.TemporaryDirectory() self.root_path = tempfile.TemporaryDirectory()
...@@ -217,14 +234,27 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -217,14 +234,27 @@ class TestPostTrainingQuantization(unittest.TestCase):
): ):
place = paddle.CPUPlace() place = paddle.CPUPlace()
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()
train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)
ptq = PostTrainingQuantization( ptq = PostTrainingQuantization(
executor=exe, executor=exe,
model_dir=model_path, model_dir=model_path,
model_filename=model_filename, model_filename=model_filename,
params_filename=params_filename, params_filename=params_filename,
sample_generator=val_reader, sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size, batch_size=batch_size,
batch_nums=batch_nums, batch_nums=batch_nums,
algo=algo, algo=algo,
......
...@@ -29,6 +29,23 @@ random.seed(0) ...@@ -29,6 +29,23 @@ random.seed(0)
np.random.seed(0) np.random.seed(0)
class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data
def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"x": batch}
def __len__(self):
return len(self.mnist_data)
class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self): def setUp(self):
self.download_path = 'int8/download' self.download_path = 'int8/download'
...@@ -132,28 +149,30 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -132,28 +149,30 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_optimize_model=False, is_optimize_model=False,
batch_size=10, batch_size=10,
batch_nums=10, batch_nums=10,
is_data_loader=False,
): ):
place = paddle.CPUPlace() place = paddle.CPUPlace()
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()
def val_data_generator(): train_dataset = paddle.vision.datasets.MNIST(
batches = [] mode='train', transform=None
for data in val_reader(): )
batches.append(data[0].reshape(1, 28, 28)) train_dataset = TransedMnistDataSet(train_dataset)
if len(batches) == batch_size: BatchSampler = paddle.io.BatchSampler(
batches = np.asarray(batches) train_dataset, batch_size=batch_size
yield {"x": batches} )
batches = [] val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)
ptq = PostTrainingQuantization( ptq = PostTrainingQuantization(
executor=exe, executor=exe,
model_dir=model_path, model_dir=model_path,
model_filename='model.pdmodel', model_filename='model.pdmodel',
params_filename='model.pdiparams', params_filename='model.pdiparams',
sample_generator=val_reader if not is_data_loader else None, sample_generator=None,
data_loader=val_data_generator if is_data_loader else None, data_loader=val_data_generator,
batch_size=batch_size, batch_size=batch_size,
batch_nums=batch_nums, batch_nums=batch_nums,
algo=algo, algo=algo,
...@@ -183,7 +202,6 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -183,7 +202,6 @@ class TestPostTrainingQuantization(unittest.TestCase):
batch_size=10, batch_size=10,
infer_iterations=10, infer_iterations=10,
quant_iterations=5, quant_iterations=5,
is_data_loader=False,
): ):
origin_model_path = self.download_model(data_url, data_md5, model_name) origin_model_path = self.download_model(data_url, data_md5, model_name)
...@@ -210,7 +228,6 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -210,7 +228,6 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_optimize_model, is_optimize_model,
batch_size, batch_size,
quant_iterations, quant_iterations,
is_data_loader=is_data_loader,
) )
print( print(
...@@ -442,7 +459,6 @@ class TestPostTrainingAbsMaxForWhile(TestPostTrainingQuantization): ...@@ -442,7 +459,6 @@ class TestPostTrainingAbsMaxForWhile(TestPostTrainingQuantization):
batch_size, batch_size,
infer_iterations, infer_iterations,
quant_iterations, quant_iterations,
is_data_loader=True,
) )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册