未验证 提交 338fcc10 编写于 作者: C chenjian 提交者: GitHub

[cherry-pick]Reduce performance influence by record event in python (#42142)

* fix kenrel name apperance (#42071)

* Reduce performance influence by record event in python (#42040)

* optimize performance

* fix

* improve coverage

* fix

* fix
上级 b543998f
......@@ -31,6 +31,7 @@ import queue
import paddle
import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from .. import core, layers
from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph
from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar
......@@ -252,6 +253,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self._exit_thread_expectedly()
def __next__(self):
if in_profiler_mode():
trace_event = profiler.RecordEvent(
name="_DataLoaderIterSingleProcess",
event_type=profiler.TracerEventType.Dataloader)
......@@ -294,6 +296,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self._try_shutdown_all()
six.reraise(*sys.exc_info())
finally:
if in_profiler_mode():
trace_event.end()
def _shutdown_thread(self):
......@@ -708,6 +711,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._try_shutdown_all(1)
def __next__(self):
if in_profiler_mode():
trace_event = profiler.RecordEvent(
name="_DataLoaderIterMultiProcess",
event_type=profiler.TracerEventType.Dataloader)
......@@ -765,6 +769,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._try_shutdown_all()
six.reraise(*sys.exc_info())
finally:
if in_profiler_mode():
trace_event.end()
# python2 compatibility
......
......@@ -26,6 +26,7 @@ import inspect
import paddle
import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from . import parallel_helper
from .. import unique_name
......@@ -906,9 +907,12 @@ class Layer(object):
self._built = True
if in_profiler_mode():
with profiler.RecordEvent(self.full_name(),
profiler.TracerEventType.Forward):
outputs = self.forward(*inputs, **kwargs)
else:
outputs = self.forward(*inputs, **kwargs)
for forward_post_hook in self._forward_post_hooks.values():
hook_result = forward_post_hook(self, inputs, outputs)
......@@ -919,7 +923,7 @@ class Layer(object):
def __call__(self, *inputs, **kwargs):
if (not in_declarative_mode()) and (not self._forward_pre_hooks) \
and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode():
and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()):
self._build_once(*inputs, **kwargs)
return self.forward(*inputs, **kwargs)
else:
......
......@@ -30,6 +30,7 @@ from .parallel import scale_loss
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
import paddle.utils.deprecated as deprecated
import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from paddle import _C_ops
_grad_scalar = None
......@@ -247,6 +248,7 @@ def monkey_patch_varbase():
"""
if framework._non_static_mode():
if in_profiler_mode():
record_event = profiler.RecordEvent(
"Gradient Backward", profiler.TracerEventType.Backward)
record_event.begin()
......@@ -288,6 +290,7 @@ def monkey_patch_varbase():
core.dygraph_run_backward([self], [grad_tensor],
retain_graph,
framework._dygraph_tracer())
if in_profiler_mode():
record_event.end()
else:
raise ValueError(
......
......@@ -134,6 +134,42 @@ class TestProfiler(unittest.TestCase):
prof.export(path='./test_profiler_pb.pb', format='pb')
prof.summary()
result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
prof = None
dataset = RandomDataset(10 * 4)
simple_net = SimpleNet()
opt = paddle.optimizer.SGD(learning_rate=1e-3,
parameters=simple_net.parameters())
loader = DataLoader(
dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2)
prof = profiler.Profiler(on_trace_ready=lambda prof: None)
prof.start()
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = F.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
opt.minimize(avg_loss)
simple_net.clear_gradients()
prof.step()
prof.stop()
prof.summary()
prof = None
dataset = RandomDataset(10 * 4)
simple_net = SimpleNet()
loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True)
opt = paddle.optimizer.Adam(
learning_rate=1e-3, parameters=simple_net.parameters())
prof = profiler.Profiler(on_trace_ready=lambda prof: None)
prof.start()
for i, (image, label) in enumerate(loader()):
out = simple_net(image)
loss = F.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
opt.step()
simple_net.clear_gradients()
prof.step()
prof.stop()
class TestNvprof(unittest.TestCase):
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import collections
from enum import Enum
import re
from paddle.fluid.core import TracerEventType
......@@ -1317,10 +1318,11 @@ def _build_table(statistic_data,
append(header_sep)
append(row_format.format(*headers))
append(header_sep)
kernel_name_pattern = re.compile('(.+?)(<.*>)(\(.*\))')
for row_values in all_row_values:
indx = row_values[0].find('(')
if indx != -1:
name = row_values[0][:indx]
match = kernel_name_pattern.match(row_values[0])
if match:
name = match.group(1) + match.group(2)
else:
name = row_values[0]
if len(name) > name_column_width:
......
......@@ -21,6 +21,7 @@ from paddle.fluid import core
from paddle.fluid.core import (_RecordEvent, TracerEventType)
_is_profiler_used = False
_has_optimizer_wrapped = False
_AllowedEventTypeList = [
TracerEventType.Dataloader, TracerEventType.ProfileStep,
......@@ -154,20 +155,31 @@ def load_profiler_result(filename: str):
return core.load_profiler_result(filename)
def in_profiler_mode():
return _is_profiler_used == True
def wrap_optimizers():
def optimizer_warpper(func):
@functools.wraps(func)
def warpper(*args, **kwargs):
if in_profiler_mode():
with RecordEvent(
'Optimization Step',
event_type=TracerEventType.Optimization):
return func(*args, **kwargs)
else:
return func(*args, **kwargs)
return warpper
global _has_optimizer_wrapped
if _has_optimizer_wrapped == True:
return
import paddle.optimizer as optimizer
for classname in optimizer.__all__:
if classname != 'Optimizer':
classobject = getattr(optimizer, classname)
if getattr(classobject, 'step', None) != None:
classobject.step = optimizer_warpper(classobject.step)
_has_optimizer_wrapped = True
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册