From 5246f5df40dce0c5d8cf0df9cb24ff7c07cb2379 Mon Sep 17 00:00:00 2001 From: WangXi Date: Fri, 31 Jul 2020 16:54:24 +0800 Subject: [PATCH] Speedup reader check_input_array when item is array (#25395) (#25848) --- python/paddle/fluid/reader.py | 38 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 4c1f0f1b382..6f14c895a0a 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -96,6 +96,18 @@ class DataLoaderBase(object): def __next__(self): raise NotImplementedError() + @classmethod + def _check_input_array(cls, item): + arr = np.asarray(item) + if arr.dtype == np.object: + raise TypeError( + "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " + "this means the input data contains nested lists with different lengths. " + "\n\t* Check the reader function passed to 'decorate_batch_generator'" + " to locate the data causes this issue.\n\t* Please consider using " + "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.") + return arr + class DataLoader(object): """ @@ -806,17 +818,6 @@ class DygraphGeneratorLoader(DataLoaderBase): self._reset() six.reraise(*sys.exc_info()) - @classmethod - def _check_input_array(cls, item): - arr = np.array(item) - if arr.dtype == np.object: - raise TypeError( - "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " - "this means the input data contains nested lists with different lengths. " - "\n\t* Check the reader function passed to 'decorate_batch_generator'" - " to locate the data causes this issue.\n\t* Please consider using " - "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.") - def _exit_thread_expectedly(self): self._thread_done_event.set() self._blocking_queue.close() @@ -893,7 +894,7 @@ class DygraphGeneratorLoader(DataLoaderBase): array = core.LoDTensorArray() for item in sample: if not isinstance(item, core.LoDTensor): - self._check_input_array(item) + item = self._check_input_array(item) tmp = core.LoDTensor() tmp.set(item, core.CPUPlace()) item = tmp @@ -1114,19 +1115,6 @@ class GeneratorLoader(DataLoaderBase): assert not self._iterable, "reset() cannot be called when DataLoader is iterable" self._reset() - @classmethod - def _check_input_array(cls, item): - arr = np.array(item) - if arr.dtype == np.object: - raise TypeError(( - "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " - "this means the input data contains nested lists with different lengths. " - "\n\t* Check the reader function passed to 'decorate_batch_generator'" - " to locate the data causes this issue.\n\t* Please consider using " - "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.")) - - return arr - def _start(self): def __thread_main__(): try: -- GitLab