未验证 提交 2ea5e02c 编写于 作者: C chenjian 提交者: GitHub

[cherry-pick] Refine user experience for profiler (#41989)

* fix divide zero error when cpu only (#41794)

* reduce performance influence by RecordEvent in Python (#41822)

* reduce performance influence

* add unit test

* fix

* Rebase for profiler statistic ratio (#41939)

* fix according to suggestion

* add kernel summary

* improve coverage
上级 85a4ecb6
...@@ -20,6 +20,7 @@ import tempfile ...@@ -20,6 +20,7 @@ import tempfile
import paddle import paddle
import paddle.profiler as profiler import paddle.profiler as profiler
import paddle.profiler.utils as utils
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.io import Dataset, DataLoader from paddle.io import Dataset, DataLoader
...@@ -40,11 +41,17 @@ class TestProfiler(unittest.TestCase): ...@@ -40,11 +41,17 @@ class TestProfiler(unittest.TestCase):
with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], ) as prof: with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], ) as prof:
y = x / 2.0 y = x / 2.0
prof = None prof = None
self.assertEqual(utils._is_profiler_used, False)
with profiler.RecordEvent(name='test'):
y = x / 2.0
with profiler.Profiler( with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU], targets=[profiler.ProfilerTarget.CPU],
scheduler=(1, 2)) as prof: scheduler=(1, 2)) as prof:
self.assertEqual(utils._is_profiler_used, True)
with profiler.RecordEvent(name='test'): with profiler.RecordEvent(name='test'):
y = x / 2.0 y = x / 2.0
prof = None prof = None
with profiler.Profiler( with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU], targets=[profiler.ProfilerTarget.CPU],
......
...@@ -51,8 +51,9 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -51,8 +51,9 @@ class TestProfilerStatistic(unittest.TestCase):
profilerstep_node = HostPythonNode('ProfileStep#1', profilerstep_node = HostPythonNode('ProfileStep#1',
profiler.TracerEventType.ProfileStep, profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001) 0, 400, 1000, 1001)
dataloader_node = HostPythonNode( dataloader_node = HostPythonNode('Dataloader',
'Dataloader', profiler.TracerEventType.Forward, 5, 15, 1000, 1001) profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)
mobilenet_node = HostPythonNode( mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001) 'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
yolonet_node = HostPythonNode( yolonet_node = HostPythonNode(
...@@ -155,7 +156,7 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -155,7 +156,7 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.ProfileStep), 400) profiler.TracerEventType.ProfileStep), 400)
self.assertEqual( self.assertEqual(
time_range_summary.get_cpu_range_sum( time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Forward), 100) profiler.TracerEventType.Forward), 90)
self.assertEqual( self.assertEqual(
time_range_summary.get_cpu_range_sum( time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Backward), 80) profiler.TracerEventType.Backward), 80)
...@@ -185,12 +186,12 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -185,12 +186,12 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.Communication), 5) profiler.TracerEventType.Communication), 5)
self.assertEqual(len(event_summary.items), 2) self.assertEqual(len(event_summary.items), 2)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 4) self.assertEqual(len(event_summary.model_perspective_items), 5)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 90)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].general_gpu_time, event_summary.model_perspective_items['Forward'].general_gpu_time,
135) 135)
...@@ -217,8 +218,9 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -217,8 +218,9 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.ProfileStep, profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001) 0, 400, 1000, 1001)
dataloader_node = HostPythonNode( dataloader_node = HostPythonNode('Dataloader',
'Dataloader', profiler.TracerEventType.Forward, 5, 15, 1000, 1001) profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)
mobilenet_node = HostPythonNode( mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001) 'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
...@@ -372,7 +374,7 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -372,7 +374,7 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.ProfileStep), 400) profiler.TracerEventType.ProfileStep), 400)
self.assertEqual( self.assertEqual(
time_range_summary.get_cpu_range_sum( time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Forward), 100) profiler.TracerEventType.Forward), 90)
self.assertEqual( self.assertEqual(
time_range_summary.get_cpu_range_sum( time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Backward), 80) profiler.TracerEventType.Backward), 80)
...@@ -417,12 +419,12 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -417,12 +419,12 @@ class TestProfilerStatistic(unittest.TestCase):
distributed_summary.overlap_range), 85) distributed_summary.overlap_range), 85)
self.assertEqual(len(event_summary.items), 4) self.assertEqual(len(event_summary.items), 4)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 4) self.assertEqual(len(event_summary.model_perspective_items), 5)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 90)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].general_gpu_time, event_summary.model_perspective_items['Forward'].general_gpu_time,
315) 315)
...@@ -441,6 +443,86 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -441,6 +443,86 @@ class TestProfilerStatistic(unittest.TestCase):
thread_sep=False, thread_sep=False,
time_unit='ms')) time_unit='ms'))
def test_statistic_case3(self):
# for coverage, test all time is 0
root_node = HostPythonNode('Root Node',
profiler.TracerEventType.UserDefined, 0,
float('inf'), 1000, 1001)
profilerstep_node = HostPythonNode('ProfileStep#1',
profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001)
dataloader_node = HostPythonNode('Dataloader',
profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)
mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
backward_node = HostPythonNode('Gradient Backward',
profiler.TracerEventType.Backward, 120,
200, 1000, 1001)
optimization_node = HostPythonNode(
'Optimization', profiler.TracerEventType.Optimization, 220, 300,
1000, 1001)
userdefined_node = HostPythonNode('Communication Time',
profiler.TracerEventType.UserDefined,
60, 70, 1000, 1001)
conv2d_node = HostPythonNode(
'conv2d', profiler.TracerEventType.Operator, 25, 25, 1000, 1001)
conv2d_infer_shape = HostPythonNode(
'conv2d::infer_shape', profiler.TracerEventType.OperatorInner, 25,
25, 1000, 1001)
conv2d_compute = HostPythonNode('conv2d::compute',
profiler.TracerEventType.OperatorInner,
25, 25, 1000, 1001)
conv2d_launchkernel = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 25, 25,
1000, 1001)
conv2d_kernel = DevicePythonNode(
'conv2d_kernel', profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0)
another_kernel = DevicePythonNode(
'void phi::funcs::VectorizedBroadcastKernel<float, float, phi::funcs::AddFunctor<float>, phi::funcs::AddFunctor<float>>()',
profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0)
root_node.children_node.append(profilerstep_node)
profilerstep_node.children_node.extend([
dataloader_node, mobilenet_node, userdefined_node, backward_node,
optimization_node
])
mobilenet_node.children_node.append(conv2d_node)
conv2d_node.children_node.extend([conv2d_infer_shape, conv2d_compute])
conv2d_compute.runtime_node.append(conv2d_launchkernel)
conv2d_launchkernel.device_node.append(conv2d_kernel)
conv2d_launchkernel.device_node.append(another_kernel)
thread_tree = {'thread1001': root_node}
extra_info = {
'Process Cpu Utilization': '1.02',
'System Cpu Utilization': '0.68'
}
statistic_data = profiler.profiler_statistic.StatisticData(thread_tree,
extra_info)
time_range_summary = statistic_data.time_range_summary
event_summary = statistic_data.event_summary
self.assertEqual(event_summary.items['conv2d'].cpu_time, 0)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 0)
self.assertEqual(event_summary.userdefined_items['Communication Time']
.general_gpu_time, 0)
for sort_key in [
profiler.SortedKeys.CPUTotal, profiler.SortedKeys.CPUMax,
profiler.SortedKeys.CPUMin, profiler.SortedKeys.CPUAvg,
profiler.SortedKeys.GPUTotal, profiler.SortedKeys.GPUMax,
profiler.SortedKeys.GPUMin, profiler.SortedKeys.GPUAvg
]:
print(
profiler.profiler_statistic._build_table(
statistic_data,
sorted_by=sort_key,
op_detail=True,
thread_sep=False,
time_unit='ms'))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -27,6 +27,7 @@ from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions, ...@@ -27,6 +27,7 @@ from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions,
from .utils import RecordEvent, wrap_optimizers from .utils import RecordEvent, wrap_optimizers
from .profiler_statistic import StatisticData, _build_table, SortedKeys from .profiler_statistic import StatisticData, _build_table, SortedKeys
from paddle.profiler import utils
from .timer import benchmark from .timer import benchmark
...@@ -475,6 +476,7 @@ class Profiler: ...@@ -475,6 +476,7 @@ class Profiler:
if self.timer_only: if self.timer_only:
return return
# CLOSED -> self.current_state # CLOSED -> self.current_state
utils._is_profiler_used = True
if self.current_state == ProfilerState.READY: if self.current_state == ProfilerState.READY:
self.profiler.prepare() self.profiler.prepare()
elif self.current_state == ProfilerState.RECORD: elif self.current_state == ProfilerState.RECORD:
...@@ -527,6 +529,7 @@ class Profiler: ...@@ -527,6 +529,7 @@ class Profiler:
self.profiler_result = self.profiler.stop() self.profiler_result = self.profiler.stop()
if self.on_trace_ready: if self.on_trace_ready:
self.on_trace_ready(self) self.on_trace_ready(self)
utils._is_profiler_used = False
def step(self, num_samples: Optional[int]=None): def step(self, num_samples: Optional[int]=None):
r""" r"""
......
...@@ -20,6 +20,8 @@ from contextlib import ContextDecorator ...@@ -20,6 +20,8 @@ from contextlib import ContextDecorator
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.core import (_RecordEvent, TracerEventType) from paddle.fluid.core import (_RecordEvent, TracerEventType)
_is_profiler_used = False
_AllowedEventTypeList = [ _AllowedEventTypeList = [
TracerEventType.Dataloader, TracerEventType.ProfileStep, TracerEventType.Dataloader, TracerEventType.ProfileStep,
TracerEventType.UserDefined, TracerEventType.Forward, TracerEventType.UserDefined, TracerEventType.Forward,
...@@ -91,6 +93,8 @@ class RecordEvent(ContextDecorator): ...@@ -91,6 +93,8 @@ class RecordEvent(ContextDecorator):
result = data1 - data2 result = data1 - data2
record_event.end() record_event.end()
""" """
if not _is_profiler_used:
return
if self.event_type not in _AllowedEventTypeList: if self.event_type not in _AllowedEventTypeList:
warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\ warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\
can be recorded.".format(*_AllowedEventTypeList)) can be recorded.".format(*_AllowedEventTypeList))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册