From dbc54d2d39ef4a97790b40eb733986d6718c25df Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 26 Jul 2021 15:18:25 +0800 Subject: [PATCH] [cherry pick] fix DataLoader memory leak (#34301) * fix DataLoader memory leak. test=develop * fix unittest abort. test=develop * enable test_dataloader_dataset & skip GPU tensor break. test=develop --- .../fluid/dataloader/dataloader_iter.py | 22 +------------------ .../unittests/test_dataloader_dataset.py | 7 +++++- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 1f928bfc8a6..ed97987ae1b 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -125,13 +125,6 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): self._init_thread() - # if user exit python program when dataloader is still - # iterating, resource may no release safely, so we - # add __del__ function to to CleanupFuncRegistrar - # to make sure __del__ is always called when program - # exit for resoure releasing safely - CleanupFuncRegistrar.register(self.__del__) - def _init_thread(self): self._var_names = [v.name for v in self._feed_list] self._shapes = [v.shape for v in self._feed_list] @@ -229,9 +222,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): def _shutdown_thread(self): if self._thread: self._thread_done_event.set() - if self._thread is not threading.current_thread(): - self._thread.join() - self._thread = None + self._thread = None # python2 compatibility def next(self): @@ -287,17 +278,6 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._init_thread() self._shutdown = False - # if user exit python program when dataloader is still - # iterating, resource may no release safely, so we - # add _shutdown_on_exit function to to CleanupFuncRegistrar - # to make sure _try_shutdown_all is always called when program - # exit for resoure releasing safely - # worker join may hang for in _try_shutdown_all call in atexit - # for main process is in atexit state in some OS, so we add - # timeout=1 for shutdown function call in atexit, for shutdown - # function call in __del__, we keep it as it is - CleanupFuncRegistrar.register(self._shutdown_on_exit) - def _init_workers(self): # multiprocess worker and indice queue list initial as empty self._workers = [] diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py index 08589f0191d..d2f4eadc9c5 100644 --- a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py @@ -66,7 +66,12 @@ class TestDatasetWithDiffOutputPlace(unittest.TestCase): for image, label in loader: self.assertTrue(image.place.is_gpu_place()) self.assertTrue(label.place.is_cuda_pinned_place()) - break + # FIXME(dkp): when input tensor is in GPU place and + # iteration break in the median, it seems the GPU + # tensor put into blocking_queue cannot be safely + # released and may cause ABRT/SEGV, this should + # be fixed + # break def test_multi_process(self): # DataLoader with multi-process mode is not supported on MacOs and Windows currently -- GitLab