未验证 提交 4fd190d5 编写于 作者: C chenjian 提交者: GitHub

Reduce performance influence by record event in python (#42040)

* optimize performance

* fix

* improve coverage

* fix

* fix
上级 19650d72
...@@ -31,6 +31,7 @@ import queue ...@@ -31,6 +31,7 @@ import queue
import paddle import paddle
import paddle.profiler as profiler import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from .. import core, layers from .. import core, layers
from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph
from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar
...@@ -252,10 +253,11 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): ...@@ -252,10 +253,11 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self._exit_thread_expectedly() self._exit_thread_expectedly()
def __next__(self): def __next__(self):
trace_event = profiler.RecordEvent( if in_profiler_mode():
name="_DataLoaderIterSingleProcess", trace_event = profiler.RecordEvent(
event_type=profiler.TracerEventType.Dataloader) name="_DataLoaderIterSingleProcess",
trace_event.begin() event_type=profiler.TracerEventType.Dataloader)
trace_event.begin()
try: try:
benchmark().check_if_need_record(self) benchmark().check_if_need_record(self)
benchmark().before_reader() benchmark().before_reader()
...@@ -294,7 +296,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): ...@@ -294,7 +296,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self._try_shutdown_all() self._try_shutdown_all()
six.reraise(*sys.exc_info()) six.reraise(*sys.exc_info())
finally: finally:
trace_event.end() if in_profiler_mode():
trace_event.end()
def _shutdown_thread(self): def _shutdown_thread(self):
if self._thread: if self._thread:
...@@ -708,10 +711,11 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ...@@ -708,10 +711,11 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._try_shutdown_all(1) self._try_shutdown_all(1)
def __next__(self): def __next__(self):
trace_event = profiler.RecordEvent( if in_profiler_mode():
name="_DataLoaderIterMultiProcess", trace_event = profiler.RecordEvent(
event_type=profiler.TracerEventType.Dataloader) name="_DataLoaderIterMultiProcess",
trace_event.begin() event_type=profiler.TracerEventType.Dataloader)
trace_event.begin()
try: try:
benchmark().check_if_need_record(self) benchmark().check_if_need_record(self)
benchmark().before_reader() benchmark().before_reader()
...@@ -765,7 +769,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ...@@ -765,7 +769,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._try_shutdown_all() self._try_shutdown_all()
six.reraise(*sys.exc_info()) six.reraise(*sys.exc_info())
finally: finally:
trace_event.end() if in_profiler_mode():
trace_event.end()
# python2 compatibility # python2 compatibility
def next(self): def next(self):
......
...@@ -26,6 +26,7 @@ import inspect ...@@ -26,6 +26,7 @@ import inspect
import paddle import paddle
import paddle.profiler as profiler import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from . import parallel_helper from . import parallel_helper
from .. import unique_name from .. import unique_name
...@@ -906,8 +907,11 @@ class Layer(object): ...@@ -906,8 +907,11 @@ class Layer(object):
self._built = True self._built = True
with profiler.RecordEvent(self.full_name(), if in_profiler_mode():
profiler.TracerEventType.Forward): with profiler.RecordEvent(self.full_name(),
profiler.TracerEventType.Forward):
outputs = self.forward(*inputs, **kwargs)
else:
outputs = self.forward(*inputs, **kwargs) outputs = self.forward(*inputs, **kwargs)
for forward_post_hook in self._forward_post_hooks.values(): for forward_post_hook in self._forward_post_hooks.values():
...@@ -919,7 +923,7 @@ class Layer(object): ...@@ -919,7 +923,7 @@ class Layer(object):
def __call__(self, *inputs, **kwargs): def __call__(self, *inputs, **kwargs):
if (not in_declarative_mode()) and (not self._forward_pre_hooks) \ if (not in_declarative_mode()) and (not self._forward_pre_hooks) \
and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode(): and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()):
self._build_once(*inputs, **kwargs) self._build_once(*inputs, **kwargs)
return self.forward(*inputs, **kwargs) return self.forward(*inputs, **kwargs)
else: else:
......
...@@ -30,6 +30,7 @@ from .parallel import scale_loss ...@@ -30,6 +30,7 @@ from .parallel import scale_loss
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
import paddle.utils.deprecated as deprecated import paddle.utils.deprecated as deprecated
import paddle.profiler as profiler import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from paddle import _C_ops from paddle import _C_ops
_grad_scalar = None _grad_scalar = None
...@@ -247,9 +248,10 @@ def monkey_patch_varbase(): ...@@ -247,9 +248,10 @@ def monkey_patch_varbase():
""" """
if framework._non_static_mode(): if framework._non_static_mode():
record_event = profiler.RecordEvent( if in_profiler_mode():
"Gradient Backward", profiler.TracerEventType.Backward) record_event = profiler.RecordEvent(
record_event.begin() "Gradient Backward", profiler.TracerEventType.Backward)
record_event.begin()
if grad_tensor is not None: if grad_tensor is not None:
if framework._in_eager_mode_: if framework._in_eager_mode_:
assert isinstance( assert isinstance(
...@@ -289,7 +291,8 @@ def monkey_patch_varbase(): ...@@ -289,7 +291,8 @@ def monkey_patch_varbase():
core.dygraph_run_backward([self], [grad_tensor], core.dygraph_run_backward([self], [grad_tensor],
retain_graph, retain_graph,
framework._dygraph_tracer()) framework._dygraph_tracer())
record_event.end() if in_profiler_mode():
record_event.end()
else: else:
raise ValueError( raise ValueError(
"Variable.backward() is only available in DyGraph mode") "Variable.backward() is only available in DyGraph mode")
......
...@@ -134,6 +134,42 @@ class TestProfiler(unittest.TestCase): ...@@ -134,6 +134,42 @@ class TestProfiler(unittest.TestCase):
prof.export(path='./test_profiler_pb.pb', format='pb') prof.export(path='./test_profiler_pb.pb', format='pb')
prof.summary() prof.summary()
result = profiler.utils.load_profiler_result('./test_profiler_pb.pb') result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
prof = None
dataset = RandomDataset(10 * 4)
simple_net = SimpleNet()
opt = paddle.optimizer.SGD(learning_rate=1e-3,
parameters=simple_net.parameters())
loader = DataLoader(
dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2)
prof = profiler.Profiler(on_trace_ready=lambda prof: None)
prof.start()
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = F.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
opt.minimize(avg_loss)
simple_net.clear_gradients()
prof.step()
prof.stop()
prof.summary()
prof = None
dataset = RandomDataset(10 * 4)
simple_net = SimpleNet()
loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True)
opt = paddle.optimizer.Adam(
learning_rate=1e-3, parameters=simple_net.parameters())
prof = profiler.Profiler(on_trace_ready=lambda prof: None)
prof.start()
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = F.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
opt.step()
simple_net.clear_gradients()
prof.step()
prof.stop()
class TestNvprof(unittest.TestCase): class TestNvprof(unittest.TestCase):
......
...@@ -21,6 +21,7 @@ from paddle.fluid import core ...@@ -21,6 +21,7 @@ from paddle.fluid import core
from paddle.fluid.core import (_RecordEvent, TracerEventType) from paddle.fluid.core import (_RecordEvent, TracerEventType)
_is_profiler_used = False _is_profiler_used = False
_has_optimizer_wrapped = False
_AllowedEventTypeList = [ _AllowedEventTypeList = [
TracerEventType.Dataloader, TracerEventType.ProfileStep, TracerEventType.Dataloader, TracerEventType.ProfileStep,
...@@ -154,20 +155,31 @@ def load_profiler_result(filename: str): ...@@ -154,20 +155,31 @@ def load_profiler_result(filename: str):
return core.load_profiler_result(filename) return core.load_profiler_result(filename)
def in_profiler_mode():
return _is_profiler_used == True
def wrap_optimizers(): def wrap_optimizers():
def optimizer_warpper(func): def optimizer_warpper(func):
@functools.wraps(func) @functools.wraps(func)
def warpper(*args, **kwargs): def warpper(*args, **kwargs):
with RecordEvent( if in_profiler_mode():
'Optimization Step', with RecordEvent(
event_type=TracerEventType.Optimization): 'Optimization Step',
event_type=TracerEventType.Optimization):
return func(*args, **kwargs)
else:
return func(*args, **kwargs) return func(*args, **kwargs)
return warpper return warpper
global _has_optimizer_wrapped
if _has_optimizer_wrapped == True:
return
import paddle.optimizer as optimizer import paddle.optimizer as optimizer
for classname in optimizer.__all__: for classname in optimizer.__all__:
if classname != 'Optimizer': if classname != 'Optimizer':
classobject = getattr(optimizer, classname) classobject = getattr(optimizer, classname)
if getattr(classobject, 'step', None) != None: if getattr(classobject, 'step', None) != None:
classobject.step = optimizer_warpper(classobject.step) classobject.step = optimizer_warpper(classobject.step)
_has_optimizer_wrapped = True
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册