diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 70c7b01b05ba3bd71c69aaf8c37ae9d5830c8fd7..d8cb3e0918dc8d0e08c3895c5598b46479958a95 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -24,7 +24,7 @@ import threading import numpy as np import multiprocessing from collections import namedtuple -from paddle.fluid.framework import _set_expected_place, _current_expected_place +from paddle.fluid.framework import _set_expected_place, _current_expected_place, set_flags # NOTE: queue has a different name in python2 and python3 import queue @@ -202,6 +202,22 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): # APIs in this thread. _set_expected_place(legacy_expected_place) + # NOTE(chenweihang): [ Why need to set not to execute pten kernel here? ] + # Now, in order to ensure that the execution performance of the dynamic + # graph mode in pten compatible state does not decline significantly, + # we have adopted the approach of caching a KernelContext globally for + # the dynamic graph tracer to reduce the construction and deconstruction + # overhead of data interfaces such as the compatible state DenseTensor. + # The static graph is each op caches a KernelContext, but the op of + # the dynamic graph will be constructed and destroyed every round of + # execution, so it is impossible to cache KernelContext for each op. + # However, it is not thread-safe if using only one global kernel context in + # dynamic graph. If the pten op of paddle is used in the DataLoader thread, + # it may cause access errors. We temporarily do not execute pten kernel + # in this scenario and will find a better solution later and remove + # this setting. + set_flags({'FLAGS_run_pten_kernel': False}) + while not self._thread_done_event.is_set(): try: indices = next(self._sampler_iter) @@ -501,6 +517,9 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): # APIs in this thread. _set_expected_place(legacy_expected_place) + # NOTE(chenweihang): See Note [ Why need to set not to execute pten kernel here? ] + set_flags({'FLAGS_run_pten_kernel': False}) + while not self._thread_done_event.is_set(): batch = self._get_data() if not self._thread_done_event.is_set():