From 1c7215ace4b51491efe1c379d70c821559f9ebae Mon Sep 17 00:00:00 2001 From: WangXi Date: Wed, 8 Jul 2020 10:58:54 +0800 Subject: [PATCH] Speedup reader check_input_array when item is array (#25395) --- python/paddle/fluid/reader.py | 38 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 06d74386b8..ebe16a8bbb 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -97,6 +97,18 @@ class DataLoaderBase(object): def __next__(self): raise NotImplementedError() + @classmethod + def _check_input_array(cls, item): + arr = np.asarray(item) + if arr.dtype == np.object: + raise TypeError( + "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " + "this means the input data contains nested lists with different lengths. " + "\n\t* Check the reader function passed to 'decorate_batch_generator'" + " to locate the data causes this issue.\n\t* Please consider using " + "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.") + return arr + class DataLoader(object): """ @@ -807,17 +819,6 @@ class DygraphGeneratorLoader(DataLoaderBase): self._reset() six.reraise(*sys.exc_info()) - @classmethod - def _check_input_array(cls, item): - arr = np.array(item) - if arr.dtype == np.object: - raise TypeError( - "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " - "this means the input data contains nested lists with different lengths. " - "\n\t* Check the reader function passed to 'decorate_batch_generator'" - " to locate the data causes this issue.\n\t* Please consider using " - "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.") - def _exit_thread_expectedly(self): self._thread_done_event.set() self._blocking_queue.close() @@ -894,7 +895,7 @@ class DygraphGeneratorLoader(DataLoaderBase): array = core.LoDTensorArray() for item in sample: if not isinstance(item, core.LoDTensor): - self._check_input_array(item) + item = self._check_input_array(item) tmp = core.LoDTensor() tmp.set(item, core.CPUPlace()) item = tmp @@ -1115,19 +1116,6 @@ class GeneratorLoader(DataLoaderBase): assert not self._iterable, "reset() cannot be called when DataLoader is iterable" self._reset() - @classmethod - def _check_input_array(cls, item): - arr = np.array(item) - if arr.dtype == np.object: - raise TypeError(( - "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " - "this means the input data contains nested lists with different lengths. " - "\n\t* Check the reader function passed to 'decorate_batch_generator'" - " to locate the data causes this issue.\n\t* Please consider using " - "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.")) - - return arr - def _start(self): def __thread_main__(): try: -- GitLab