fix test_multiprocess_dataloader unittest. test=develop (#26241)

* fix test_multiprocess_dataloader unittest. test=develop

fix test_multiprocess_dataloader unittest. test=develop (#26241)
* fix test_multiprocess_dataloader unittest. test=develop
7051bbc2 · Kaipeng Deng · GitHub · 9a6a4fbc · 7051bbc2
隐藏空白更改
内联并排

Showing with 27 addition and 14 deletion

python/paddle/fluid/dataloader/dataloader_iter.py python/paddle/fluid/dataloader/dataloader_iter.py +27 -14

未找到文件。
--- a/python/paddle/fluid/dataloader/dataloader_iter.py
+++ b/python/paddle/fluid/dataloader/dataloader_iter.py
@@ -359,6 +359,9 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
        self._outstanding_capacity = 2 * max(self._num_workers,
                                             len(self._places))
+        # see _try_put_indices
+        self._thread_lock = threading.Lock()
        # init workers and indices queues and put 2 indices in each indices queue
        self._init_workers()
        for _ in range(self._outstanding_capacity):
@@ -660,22 +663,32 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
    def _try_put_indices(self):
        assert self._batches_outstanding <= self._outstanding_capacity, \
                    "too many indices have been put to queue"
-        try:
+        # In multi-process mode for IterableDataset, _try_put_indices will
-            indices = next(self._sampler_iter)
+        # be called both in main process(for our implement has blocking queue,
-        except StopIteration:
+        # and blocking queue read is in main process) and thread, which may
-            return
+        # cause error following error
+        #   1. "ValueError: generator already executing" in next(self._sampler_iter)
+        #   2. re-enter in increase _send_idx
+        # add a lock for threading save, for _try_put_indices is only a slight
+        # function which is not in data reading pipeline, this lock almost no
+        # influence on performance
+        with self._thread_lock:
+            try:
+                indices = next(self._sampler_iter)
+            except StopIteration:
+                return
-        for i in range(self._num_workers):
+            for i in range(self._num_workers):
-            worker_idx = next(self._workers_idx_cycle)
+                worker_idx = next(self._workers_idx_cycle)
-            if self._worker_status[worker_idx]:
+                if self._worker_status[worker_idx]:
-                break
+                    break
-        else:
+            else:
-            return
+                return
-        self._indices_queues[worker_idx].put((self._send_idx, indices))
+            self._indices_queues[worker_idx].put((self._send_idx, indices))
-        self._task_infos[self._send_idx] = (worker_idx, )
+            self._task_infos[self._send_idx] = (worker_idx, )
-        self._batches_outstanding += 1
+            self._batches_outstanding += 1
-        self._send_idx += 1
+            self._send_idx += 1
    def __del__(self):
        self._try_shutdown_all()