From f4c42389f4a12b7c37ab7ed86d3907d81f2be156 Mon Sep 17 00:00:00 2001 From: Zhang Ting Date: Wed, 22 Jun 2022 13:51:16 +0800 Subject: [PATCH] fix the bug that _DataLoaderIterMultiProcess use time to generate the seed (#43318) (#43702) fix the bug that _DataLoaderIterMultiProcess use time to generate the seed cherry-pick #43318 --- python/paddle/fluid/dataloader/dataloader_iter.py | 5 ++++- python/paddle/fluid/dataloader/worker.py | 13 ++++++++++--- .../test_multiprocess_dataloader_exception.py | 6 ++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 03dacb0396c..3deff6e2d40 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -374,6 +374,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): # see _try_put_indices self._thread_lock = threading.Lock() + self._base_seed = np.random.randint(low=0, high=sys.maxsize) + # init workers and indices queues and put 2 indices in each indices queue self._init_workers() for _ in range(self._outstanding_capacity): @@ -406,7 +408,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._data_queue, self._workers_done_event, self._auto_collate_batch, self._collate_fn, self._drop_last, self._worker_init_fn, i, - self._num_workers, self._use_shared_memory)) + self._num_workers, self._use_shared_memory, + self._base_seed)) worker.daemon = True worker.start() self._workers.append(worker) diff --git a/python/paddle/fluid/dataloader/worker.py b/python/paddle/fluid/dataloader/worker.py index 6dc3813fa6d..2e7cb1d3bc0 100644 --- a/python/paddle/fluid/dataloader/worker.py +++ b/python/paddle/fluid/dataloader/worker.py @@ -254,7 +254,7 @@ def _generate_states(base_seed=0, worker_id=0): def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, auto_collate_batch, collate_fn, drop_last, init_fn, worker_id, - num_workers, use_shared_memory): + num_workers, use_shared_memory, base_seed): try: # NOTE: [ mmap files clear ] When the child process exits unexpectedly, # some shared memory objects may have been applied for but have not yet @@ -269,14 +269,21 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, try: import numpy as np import time + import random except ImportError: pass else: - np.random.seed(_generate_states(int(time.time()), worker_id)) + seed = base_seed + worker_id + random.seed(seed) + paddle.seed(seed) + np.random.seed(_generate_states(base_seed, worker_id)) global _worker_info _worker_info = WorkerInfo( - id=worker_id, num_workers=num_workers, dataset=dataset) + id=worker_id, + num_workers=num_workers, + dataset=dataset, + seed=base_seed) init_exception = None try: diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py index 52f4c256773..bf6e7aaf3b2 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py @@ -178,10 +178,11 @@ class TestDataLoaderWorkerLoop(unittest.TestCase): for i in range(10): indices_queue.put([i, i + 10]) indices_queue.put(None) + base_seed = 1234 _worker_loop(loader._dataset, 0, indices_queue, loader._data_queue, loader._workers_done_event, True, _collate_fn, True, _init_fn, 0, 1, - loader._use_shared_memory) + loader._use_shared_memory, base_seed) self.assertTrue(False) except AssertionError: pass @@ -222,10 +223,11 @@ class TestDataLoaderWorkerLoop(unittest.TestCase): indices_queue.put([i, i + 10]) indices_queue.put(None) loader._workers_done_event.set() + base_seed = 1234 _worker_loop(loader._dataset, 0, indices_queue, loader._data_queue, loader._workers_done_event, True, _collate_fn, True, _init_fn, 0, 1, - loader._use_shared_memory) + loader._use_shared_memory, base_seed) self.assertTrue(True) except AssertionError: pass -- GitLab