From 338fcc10fcde33277477530277ddb1c80d47e61d Mon Sep 17 00:00:00 2001 From: chenjian Date: Sun, 24 Apr 2022 10:40:41 +0800 Subject: [PATCH] [cherry-pick]Reduce performance influence by record event in python (#42142) * fix kenrel name apperance (#42071) * Reduce performance influence by record event in python (#42040) * optimize performance * fix * improve coverage * fix * fix --- .../fluid/dataloader/dataloader_iter.py | 25 +++++++------ python/paddle/fluid/dygraph/layers.py | 10 ++++-- .../fluid/dygraph/varbase_patch_methods.py | 11 +++--- .../fluid/tests/unittests/test_newprofiler.py | 36 +++++++++++++++++++ python/paddle/profiler/profiler_statistic.py | 8 +++-- python/paddle/profiler/utils.py | 18 ++++++++-- 6 files changed, 85 insertions(+), 23 deletions(-) diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index bbf2a4377c7..430578db510 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -31,6 +31,7 @@ import queue import paddle import paddle.profiler as profiler +from paddle.profiler.utils import in_profiler_mode from .. import core, layers from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar @@ -252,10 +253,11 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): self._exit_thread_expectedly() def __next__(self): - trace_event = profiler.RecordEvent( - name="_DataLoaderIterSingleProcess", - event_type=profiler.TracerEventType.Dataloader) - trace_event.begin() + if in_profiler_mode(): + trace_event = profiler.RecordEvent( + name="_DataLoaderIterSingleProcess", + event_type=profiler.TracerEventType.Dataloader) + trace_event.begin() try: benchmark().check_if_need_record(self) benchmark().before_reader() @@ -294,7 +296,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): self._try_shutdown_all() six.reraise(*sys.exc_info()) finally: - trace_event.end() + if in_profiler_mode(): + trace_event.end() def _shutdown_thread(self): if self._thread: @@ -708,10 +711,11 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._try_shutdown_all(1) def __next__(self): - trace_event = profiler.RecordEvent( - name="_DataLoaderIterMultiProcess", - event_type=profiler.TracerEventType.Dataloader) - trace_event.begin() + if in_profiler_mode(): + trace_event = profiler.RecordEvent( + name="_DataLoaderIterMultiProcess", + event_type=profiler.TracerEventType.Dataloader) + trace_event.begin() try: benchmark().check_if_need_record(self) benchmark().before_reader() @@ -765,7 +769,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._try_shutdown_all() six.reraise(*sys.exc_info()) finally: - trace_event.end() + if in_profiler_mode(): + trace_event.end() # python2 compatibility def next(self): diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 41c1a0aa580..088fed03c35 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -26,6 +26,7 @@ import inspect import paddle import paddle.profiler as profiler +from paddle.profiler.utils import in_profiler_mode from . import parallel_helper from .. import unique_name @@ -906,8 +907,11 @@ class Layer(object): self._built = True - with profiler.RecordEvent(self.full_name(), - profiler.TracerEventType.Forward): + if in_profiler_mode(): + with profiler.RecordEvent(self.full_name(), + profiler.TracerEventType.Forward): + outputs = self.forward(*inputs, **kwargs) + else: outputs = self.forward(*inputs, **kwargs) for forward_post_hook in self._forward_post_hooks.values(): @@ -919,7 +923,7 @@ class Layer(object): def __call__(self, *inputs, **kwargs): if (not in_declarative_mode()) and (not self._forward_pre_hooks) \ - and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode(): + and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()): self._build_once(*inputs, **kwargs) return self.forward(*inputs, **kwargs) else: diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index b2441e90fc9..e9b04a183fd 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -30,6 +30,7 @@ from .parallel import scale_loss from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE import paddle.utils.deprecated as deprecated import paddle.profiler as profiler +from paddle.profiler.utils import in_profiler_mode from paddle import _C_ops _grad_scalar = None @@ -247,9 +248,10 @@ def monkey_patch_varbase(): """ if framework._non_static_mode(): - record_event = profiler.RecordEvent( - "Gradient Backward", profiler.TracerEventType.Backward) - record_event.begin() + if in_profiler_mode(): + record_event = profiler.RecordEvent( + "Gradient Backward", profiler.TracerEventType.Backward) + record_event.begin() if grad_tensor is not None: if framework._in_eager_mode_: assert isinstance( @@ -288,7 +290,8 @@ def monkey_patch_varbase(): core.dygraph_run_backward([self], [grad_tensor], retain_graph, framework._dygraph_tracer()) - record_event.end() + if in_profiler_mode(): + record_event.end() else: raise ValueError( "Variable.backward() is only available in DyGraph mode") diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py index ae804f82b90..53ade0dfb79 100755 --- a/python/paddle/fluid/tests/unittests/test_newprofiler.py +++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py @@ -134,6 +134,42 @@ class TestProfiler(unittest.TestCase): prof.export(path='./test_profiler_pb.pb', format='pb') prof.summary() result = profiler.utils.load_profiler_result('./test_profiler_pb.pb') + prof = None + dataset = RandomDataset(10 * 4) + simple_net = SimpleNet() + opt = paddle.optimizer.SGD(learning_rate=1e-3, + parameters=simple_net.parameters()) + loader = DataLoader( + dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2) + prof = profiler.Profiler(on_trace_ready=lambda prof: None) + prof.start() + for i, (image, label) in enumerate(loader()): + out = simple_net(image) + loss = F.cross_entropy(out, label) + avg_loss = paddle.mean(loss) + avg_loss.backward() + opt.minimize(avg_loss) + simple_net.clear_gradients() + prof.step() + prof.stop() + prof.summary() + prof = None + dataset = RandomDataset(10 * 4) + simple_net = SimpleNet() + loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True) + opt = paddle.optimizer.Adam( + learning_rate=1e-3, parameters=simple_net.parameters()) + prof = profiler.Profiler(on_trace_ready=lambda prof: None) + prof.start() + for i, (image, label) in enumerate(loader()): + out = simple_net(image) + loss = F.cross_entropy(out, label) + avg_loss = paddle.mean(loss) + avg_loss.backward() + opt.step() + simple_net.clear_gradients() + prof.step() + prof.stop() class TestNvprof(unittest.TestCase): diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py index 422dbe4ce35..50aa3a1f11f 100755 --- a/python/paddle/profiler/profiler_statistic.py +++ b/python/paddle/profiler/profiler_statistic.py @@ -13,6 +13,7 @@ # limitations under the License. import collections from enum import Enum +import re from paddle.fluid.core import TracerEventType @@ -1317,10 +1318,11 @@ def _build_table(statistic_data, append(header_sep) append(row_format.format(*headers)) append(header_sep) + kernel_name_pattern = re.compile('(.+?)(<.*>)(\(.*\))') for row_values in all_row_values: - indx = row_values[0].find('(') - if indx != -1: - name = row_values[0][:indx] + match = kernel_name_pattern.match(row_values[0]) + if match: + name = match.group(1) + match.group(2) else: name = row_values[0] if len(name) > name_column_width: diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py index 6ae3fe4e60b..fba1aeabf28 100644 --- a/python/paddle/profiler/utils.py +++ b/python/paddle/profiler/utils.py @@ -21,6 +21,7 @@ from paddle.fluid import core from paddle.fluid.core import (_RecordEvent, TracerEventType) _is_profiler_used = False +_has_optimizer_wrapped = False _AllowedEventTypeList = [ TracerEventType.Dataloader, TracerEventType.ProfileStep, @@ -154,20 +155,31 @@ def load_profiler_result(filename: str): return core.load_profiler_result(filename) +def in_profiler_mode(): + return _is_profiler_used == True + + def wrap_optimizers(): def optimizer_warpper(func): @functools.wraps(func) def warpper(*args, **kwargs): - with RecordEvent( - 'Optimization Step', - event_type=TracerEventType.Optimization): + if in_profiler_mode(): + with RecordEvent( + 'Optimization Step', + event_type=TracerEventType.Optimization): + return func(*args, **kwargs) + else: return func(*args, **kwargs) return warpper + global _has_optimizer_wrapped + if _has_optimizer_wrapped == True: + return import paddle.optimizer as optimizer for classname in optimizer.__all__: if classname != 'Optimizer': classobject = getattr(optimizer, classname) if getattr(classobject, 'step', None) != None: classobject.step = optimizer_warpper(classobject.step) + _has_optimizer_wrapped = True -- GitLab