diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 24abc3dd731e42ccfb4b657b4744f98c0e778759..94a5dfb8110f811ec1518f9e17e3dea23c590264 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -320,7 +320,6 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): array = core.LoDTensorArray() for slot in batch: if not isinstance(slot, core.LoDTensor): - self._check_input_array(slot) # FIXME(dkp): blocking_queue only support # core.LoDTensorArray as input now, read # numpy data into a LoDTensorArray here, @@ -346,19 +345,6 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): logging.warning("DataLoader reader thread raised an exception.") six.reraise(*sys.exc_info()) - @classmethod - def _check_input_array(cls, item): - if isinstance(item, paddle.Tensor): - return - arr = np.array(item) - if arr.dtype == np.object: - raise TypeError(( - "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually " - "this means the input data contains nested lists with different lengths. " - "\n\t* Check the reader function passed to 'decorate_batch_generator'" - " to locate the data causes this issue.\n\t* Please consider using " - "'fluid.create_lod_tensor' to convert it to a LoD-Tensor.")) - def __next__(self): try: if in_dygraph_mode(): @@ -454,11 +440,16 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, if use_shared_memory: # FIXME(dkp): _convert_to_tensor_list only support np.array # list now, should support paddle.Tensor list - if isinstance(batch[0][0], paddle.Tensor): - np_batch = [] - for sample in batch: - np_batch.append([s.numpy() for s in sample]) - batch = np_batch + new_batch = [] + for sample in batch: + new_sample = [] + for s in sample: + if isinstance(s, paddle.Tensor): + new_sample.append(s.numpy()) + else: + new_sample.append(s) + new_batch.append(new_sample) + batch = new_batch tensor_list = core._convert_to_tensor_list(batch) out_queue.put((idx, tensor_list)) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py index 4ff9b73421a73e93ac5d7cc22eea7bc7e78fd32c..c89cd0ecdfb9b94127ebb49f552ad82c42c393ef 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py @@ -142,5 +142,43 @@ class TestChainDataset(unittest.TestCase): self.run_main(num_workers=0, places=p) +class NumpyMixTensorDataset(Dataset): + def __init__(self, sample_num): + self.sample_num = sample_num + + def __len__(self): + return self.sample_num + + def __getitem__(self, idx): + np.random.seed(idx) + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return paddle.to_tensor(image, place=paddle.CPUPlace()), label + + +class TestNumpyMixTensorDataset(TestTensorDataset): + def run_main(self, num_workers, places): + paddle.static.default_startup_program().random_seed = 1 + paddle.static.default_main_program().random_seed = 1 + place = paddle.CPUPlace() + with fluid.dygraph.guard(place): + dataset = NumpyMixTensorDataset(16) + assert len(dataset) == 16 + dataloader = DataLoader( + dataset, + places=place, + num_workers=num_workers, + batch_size=1, + drop_last=True) + + for i, (input, label) in enumerate(dataloader()): + assert len(input) == 1 + assert len(label) == 1 + assert input.shape == [1, IMAGE_SIZE] + assert label.shape == [1, 1] + assert isinstance(input, paddle.Tensor) + assert isinstance(label, paddle.Tensor) + + if __name__ == '__main__': unittest.main()