未验证 提交 194d16c1 编写于 作者: K kuizhiqing 提交者: GitHub

[Profiler] add views in summary API (#45225)

* add views in summary api

* add args in the last position
上级 1aa6adb1
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from .profiler import ProfilerState, ProfilerTarget from .profiler import ProfilerState, ProfilerTarget
from .profiler import make_scheduler, export_chrome_tracing, export_protobuf from .profiler import make_scheduler, export_chrome_tracing, export_protobuf
from .profiler import Profiler from .profiler import Profiler
from .profiler import SummaryView
from .profiler import TracerEventType from .profiler import TracerEventType
from .utils import RecordEvent, load_profiler_result from .utils import RecordEvent, load_profiler_result
from .profiler_statistic import SortedKeys from .profiler_statistic import SortedKeys
...@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys ...@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys
__all__ = [ __all__ = [
'ProfilerState', 'ProfilerTarget', 'make_scheduler', 'ProfilerState', 'ProfilerTarget', 'make_scheduler',
'export_chrome_tracing', 'export_protobuf', 'Profiler', 'RecordEvent', 'export_chrome_tracing', 'export_protobuf', 'Profiler', 'RecordEvent',
'load_profiler_result', 'SortedKeys' 'load_profiler_result', 'SortedKeys', 'SummaryView'
] ]
...@@ -34,6 +34,22 @@ from paddle.profiler import utils ...@@ -34,6 +34,22 @@ from paddle.profiler import utils
from .timer import benchmark from .timer import benchmark
class SummaryView(Enum):
r"""
SummaryView define the summary view of different contents.
"""
DeviceView = 0
OverView = 1
ModelView = 2
DistributedView = 3
KernelView = 4
OperatorView = 5
MemoryView = 6
MemoryManipulationView = 7
UDFView = 8
class ProfilerState(Enum): class ProfilerState(Enum):
r""" r"""
ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` . ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
...@@ -734,7 +750,8 @@ class Profiler: ...@@ -734,7 +750,8 @@ class Profiler:
sorted_by=SortedKeys.CPUTotal, sorted_by=SortedKeys.CPUTotal,
op_detail=True, op_detail=True,
thread_sep=False, thread_sep=False,
time_unit='ms'): time_unit='ms',
views=None):
r""" r"""
Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary. Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
...@@ -743,6 +760,7 @@ class Profiler: ...@@ -743,6 +760,7 @@ class Profiler:
op_detail(bool, optional): expand each operator detail information, default value is True. op_detail(bool, optional): expand each operator detail information, default value is True.
thread_sep(bool, optional): print op table each thread, default value is False. thread_sep(bool, optional): print op table each thread, default value is False.
time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'. time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
views(list[SummaryView], optional): summary tables to print, default to None means all views to be printed.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -770,7 +788,8 @@ class Profiler: ...@@ -770,7 +788,8 @@ class Profiler:
sorted_by=sorted_by, sorted_by=sorted_by,
op_detail=op_detail, op_detail=op_detail,
thread_sep=thread_sep, thread_sep=thread_sep,
time_unit=time_unit)) time_unit=time_unit,
views=views))
def get_profiler(config_path): def get_profiler(config_path):
......
...@@ -700,7 +700,10 @@ def _build_table(statistic_data, ...@@ -700,7 +700,10 @@ def _build_table(statistic_data,
thread_sep=False, thread_sep=False,
time_unit='ms', time_unit='ms',
row_limit=100, row_limit=100,
max_src_column_width=75): max_src_column_width=75,
views=None):
from .profiler import SummaryView
"""Prints a summary of events.""" """Prints a summary of events."""
# format table row # format table row
SPACING_SIZE = 2 SPACING_SIZE = 2
...@@ -749,6 +752,9 @@ def _build_table(statistic_data, ...@@ -749,6 +752,9 @@ def _build_table(statistic_data,
total_time = statistic_data.time_range_summary.get_cpu_range_sum( total_time = statistic_data.time_range_summary.get_cpu_range_sum(
TracerEventType.ProfileStep) TracerEventType.ProfileStep)
if views is None or SummaryView.DeviceView in views:
###### Print Device Summary ###### ###### Print Device Summary ######
headers = ['Device', 'Utilization (%)'] headers = ['Device', 'Utilization (%)']
name_column_width = 30 name_column_width = 30
...@@ -769,13 +775,14 @@ def _build_table(statistic_data, ...@@ -769,13 +775,14 @@ def _build_table(statistic_data,
append(header_sep) append(header_sep)
row_values = [ row_values = [
'CPU(Process)', 'CPU(Process)',
format_ratio(float( format_ratio(
statistic_data.extra_info['Process Cpu Utilization'])) float(statistic_data.extra_info['Process Cpu Utilization']))
] ]
append(row_format.format(*row_values)) append(row_format.format(*row_values))
row_values = [ row_values = [
'CPU(System)', 'CPU(System)',
format_ratio(float(statistic_data.extra_info['System Cpu Utilization'])) format_ratio(
float(statistic_data.extra_info['System Cpu Utilization']))
] ]
append(row_format.format(*row_values)) append(row_format.format(*row_values))
for gpu_name in statistic_data.time_range_summary.get_gpu_devices(): for gpu_name in statistic_data.time_range_summary.get_gpu_devices():
...@@ -798,6 +805,7 @@ def _build_table(statistic_data, ...@@ -798,6 +805,7 @@ def _build_table(statistic_data,
if total_time == 0: if total_time == 0:
return ''.join(result) return ''.join(result)
if views is None or SummaryView.OverView in views:
###### Print Overview Summary ###### ###### Print Overview Summary ######
headers = ['Event Type', 'Calls', 'CPU Time', 'Ratio (%)'] headers = ['Event Type', 'Calls', 'CPU Time', 'Ratio (%)']
row_format_list = [""] row_format_list = [""]
...@@ -869,7 +877,8 @@ def _build_table(statistic_data, ...@@ -869,7 +877,8 @@ def _build_table(statistic_data,
reverse=True) reverse=True)
event_type, time = sorted_items[0] event_type, time = sorted_items[0]
row_values = [ row_values = [
'{}'.format(str(event_type).split('.')[1]), cpu_call_times[event_type], '{}'.format(str(event_type).split('.')[1]),
cpu_call_times[event_type],
format_time(time, unit=time_unit), format_time(time, unit=time_unit),
format_ratio(float(time) / total_time) format_ratio(float(time) / total_time)
] ]
...@@ -912,6 +921,8 @@ def _build_table(statistic_data, ...@@ -912,6 +921,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.ModelView in views:
###### Print Model Summary Report ###### ###### Print Model Summary Report ######
model_perspective_items = statistic_data.event_summary.model_perspective_items model_perspective_items = statistic_data.event_summary.model_perspective_items
if len(model_perspective_items) > 1: if len(model_perspective_items) > 1:
...@@ -1013,6 +1024,8 @@ def _build_table(statistic_data, ...@@ -1013,6 +1024,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.DistributedView in views:
###### Print Distribution Summary Report ###### ###### Print Distribution Summary Report ######
if statistic_data.distributed_summary.communication_range: if statistic_data.distributed_summary.communication_range:
headers = [ headers = [
...@@ -1087,6 +1100,8 @@ def _build_table(statistic_data, ...@@ -1087,6 +1100,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.OperatorView in views:
###### Print Operator Summary Report ###### ###### Print Operator Summary Report ######
if statistic_data.event_summary.items: if statistic_data.event_summary.items:
all_row_values = [] all_row_values = []
...@@ -1119,16 +1134,18 @@ def _build_table(statistic_data, ...@@ -1119,16 +1134,18 @@ def _build_table(statistic_data,
key=lambda x: x[1].general_gpu_time, key=lambda x: x[1].general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUAvg: elif sorted_by == SortedKeys.GPUAvg:
sorted_items = sorted(items.items(), sorted_items = sorted(
items.items(),
key=lambda x: x[1].avg_general_gpu_time, key=lambda x: x[1].avg_general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUMax: elif sorted_by == SortedKeys.GPUMax:
sorted_items = sorted(items.items(), sorted_items = sorted(
items.items(),
key=lambda x: x[1].max_general_gpu_time, key=lambda x: x[1].max_general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUMin: elif sorted_by == SortedKeys.GPUMin:
sorted_items = sorted(items.items(), sorted_items = sorted(
key=lambda x: x[1].min_general_gpu_time) items.items(), key=lambda x: x[1].min_general_gpu_time)
total_op_cpu_time = 0 total_op_cpu_time = 0
total_op_gpu_time = 0 total_op_gpu_time = 0
...@@ -1144,7 +1161,8 @@ def _build_table(statistic_data, ...@@ -1144,7 +1161,8 @@ def _build_table(statistic_data,
if total_op_gpu_time == 0: if total_op_gpu_time == 0:
gpu_ratio = 0 gpu_ratio = 0
else: else:
gpu_ratio = float(item.general_gpu_time) / total_op_gpu_time gpu_ratio = float(
item.general_gpu_time) / total_op_gpu_time
row_values = [ row_values = [
name, item.call, '{} / {} / {} / {} / {}'.format( name, item.call, '{} / {} / {} / {} / {}'.format(
format_time(item.cpu_time, unit=time_unit), format_time(item.cpu_time, unit=time_unit),
...@@ -1154,9 +1172,12 @@ def _build_table(statistic_data, ...@@ -1154,9 +1172,12 @@ def _build_table(statistic_data,
format_ratio(cpu_ratio)), format_ratio(cpu_ratio)),
'{} / {} / {} / {} / {}'.format( '{} / {} / {} / {} / {}'.format(
format_time(item.general_gpu_time, unit=time_unit), format_time(item.general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time, unit=time_unit), format_time(item.avg_general_gpu_time,
format_time(item.max_general_gpu_time, unit=time_unit), unit=time_unit),
format_time(item.min_general_gpu_time, unit=time_unit), format_time(item.max_general_gpu_time,
unit=time_unit),
format_time(item.min_general_gpu_time,
unit=time_unit),
format_ratio(gpu_ratio)) format_ratio(gpu_ratio))
] ]
all_row_values.append(row_values) all_row_values.append(row_values)
...@@ -1174,7 +1195,8 @@ def _build_table(statistic_data, ...@@ -1174,7 +1195,8 @@ def _build_table(statistic_data,
gpu_ratio = float(innerop_node.general_gpu_time gpu_ratio = float(innerop_node.general_gpu_time
) / item.general_gpu_time ) / item.general_gpu_time
if len(innerop_name) + 2 > name_column_width: if len(innerop_name) + 2 > name_column_width:
innerop_name = innerop_name[:name_column_width - 5] innerop_name = innerop_name[:name_column_width -
5]
innerop_name += "..." innerop_name += "..."
row_values = [ row_values = [
' {}'.format(innerop_name), innerop_node.call, ' {}'.format(innerop_name), innerop_node.call,
...@@ -1191,11 +1213,14 @@ def _build_table(statistic_data, ...@@ -1191,11 +1213,14 @@ def _build_table(statistic_data,
'{} / {} / {} / {} / {}'.format( '{} / {} / {} / {} / {}'.format(
format_time(innerop_node.general_gpu_time, format_time(innerop_node.general_gpu_time,
unit=time_unit), unit=time_unit),
format_time(innerop_node.avg_general_gpu_time, format_time(
innerop_node.avg_general_gpu_time,
unit=time_unit), unit=time_unit),
format_time(innerop_node.max_general_gpu_time, format_time(
innerop_node.max_general_gpu_time,
unit=time_unit), unit=time_unit),
format_time(innerop_node.min_general_gpu_time, format_time(
innerop_node.min_general_gpu_time,
unit=time_unit), unit=time_unit),
format_ratio(gpu_ratio)) format_ratio(gpu_ratio))
] ]
...@@ -1208,7 +1233,8 @@ def _build_table(statistic_data, ...@@ -1208,7 +1233,8 @@ def _build_table(statistic_data,
gpu_ratio = float( gpu_ratio = float(
device_node.gpu_time device_node.gpu_time
) / innerop_node.general_gpu_time ) / innerop_node.general_gpu_time
if len(device_node_name) + 4 > name_column_width: if len(device_node_name
) + 4 > name_column_width:
device_node_name = device_node_name[: device_node_name = device_node_name[:
name_column_width name_column_width
- 7] - 7]
...@@ -1228,20 +1254,21 @@ def _build_table(statistic_data, ...@@ -1228,20 +1254,21 @@ def _build_table(statistic_data,
format_ratio(gpu_ratio)) format_ratio(gpu_ratio))
] ]
all_row_values.append(row_values) all_row_values.append(row_values)
for device_node_name, device_node in item.devices.items(): for device_node_name, device_node in item.devices.items(
):
if item.general_gpu_time == 0: if item.general_gpu_time == 0:
gpu_ratio = 0 gpu_ratio = 0
else: else:
gpu_ratio = float( gpu_ratio = float(device_node.gpu_time
device_node.gpu_time) / item.general_gpu_time ) / item.general_gpu_time
if len(device_node_name) + 2 > name_column_width: if len(device_node_name) + 2 > name_column_width:
device_node_name = device_node_name[: device_node_name = device_node_name[:
name_column_width name_column_width
- 5] - 5]
device_node_name += "..." device_node_name += "..."
row_values = [ row_values = [
' {}'.format(device_node_name), device_node.call, ' {}'.format(device_node_name),
'- / - / - / - / -', device_node.call, '- / - / - / - / -',
'{} / {} / {} / {} / {}'.format( '{} / {} / {} / {} / {}'.format(
format_time(device_node.gpu_time, format_time(device_node.gpu_time,
unit=time_unit), unit=time_unit),
...@@ -1299,6 +1326,8 @@ def _build_table(statistic_data, ...@@ -1299,6 +1326,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.KernelView in views:
###### Print Kernel Summary Report ###### ###### Print Kernel Summary Report ######
if statistic_data.event_summary.kernel_items: if statistic_data.event_summary.kernel_items:
all_row_values = [] all_row_values = []
...@@ -1339,7 +1368,9 @@ def _build_table(statistic_data, ...@@ -1339,7 +1368,9 @@ def _build_table(statistic_data,
] ]
all_row_values.append(row_values) all_row_values.append(row_values)
headers = ['Name', 'Calls', 'GPU Total / Avg / Max / Min / Ratio(%)'] headers = [
'Name', 'Calls', 'GPU Total / Avg / Max / Min / Ratio(%)'
]
# Calculate the column width # Calculate the column width
name_column_width = 90 name_column_width = 90
calltime_width = 6 calltime_width = 6
...@@ -1384,6 +1415,8 @@ def _build_table(statistic_data, ...@@ -1384,6 +1415,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.MemoryManipulationView in views:
###### Print Memory Manipulation Summary Report ###### ###### Print Memory Manipulation Summary Report ######
if statistic_data.event_summary.memory_manipulation_items: if statistic_data.event_summary.memory_manipulation_items:
all_row_values = [] all_row_values = []
...@@ -1456,6 +1489,9 @@ def _build_table(statistic_data, ...@@ -1456,6 +1489,9 @@ def _build_table(statistic_data,
append(header_sep) append(header_sep)
append('') append('')
append('') append('')
if views is None or SummaryView.UDFView in views:
###### Print UserDefined Summary Report ###### ###### Print UserDefined Summary Report ######
if statistic_data.event_summary.userdefined_items: if statistic_data.event_summary.userdefined_items:
all_row_values = [] all_row_values = []
...@@ -1490,22 +1526,25 @@ def _build_table(statistic_data, ...@@ -1490,22 +1526,25 @@ def _build_table(statistic_data,
key=lambda x: x[1].general_gpu_time, key=lambda x: x[1].general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUAvg: elif sorted_by == SortedKeys.GPUAvg:
sorted_items = sorted(items.items(), sorted_items = sorted(
items.items(),
key=lambda x: x[1].avg_general_gpu_time, key=lambda x: x[1].avg_general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUMax: elif sorted_by == SortedKeys.GPUMax:
sorted_items = sorted(items.items(), sorted_items = sorted(
items.items(),
key=lambda x: x[1].max_general_gpu_time, key=lambda x: x[1].max_general_gpu_time,
reverse=True) reverse=True)
elif sorted_by == SortedKeys.GPUMin: elif sorted_by == SortedKeys.GPUMin:
sorted_items = sorted(items.items(), sorted_items = sorted(
key=lambda x: x[1].min_general_gpu_time) items.items(), key=lambda x: x[1].min_general_gpu_time)
for name, item in sorted_items: for name, item in sorted_items:
if gpu_total_time == 0: if gpu_total_time == 0:
gpu_ratio = 0 gpu_ratio = 0
else: else:
gpu_ratio = float(item.general_gpu_time) / gpu_total_time gpu_ratio = float(
item.general_gpu_time) / gpu_total_time
row_values = [ row_values = [
name, name,
item.call, item.call,
...@@ -1517,9 +1556,12 @@ def _build_table(statistic_data, ...@@ -1517,9 +1556,12 @@ def _build_table(statistic_data,
format_ratio(float(item.cpu_time) / total_time)), format_ratio(float(item.cpu_time) / total_time)),
'{} / {} / {} / {} / {}'.format( '{} / {} / {} / {} / {}'.format(
format_time(item.general_gpu_time, unit=time_unit), format_time(item.general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time, unit=time_unit), format_time(item.avg_general_gpu_time,
format_time(item.max_general_gpu_time, unit=time_unit), unit=time_unit),
format_time(item.min_general_gpu_time, unit=time_unit), format_time(item.max_general_gpu_time,
unit=time_unit),
format_time(item.min_general_gpu_time,
unit=time_unit),
format_ratio(gpu_ratio)), format_ratio(gpu_ratio)),
] ]
all_row_values.append(row_values) all_row_values.append(row_values)
...@@ -1573,6 +1615,8 @@ def _build_table(statistic_data, ...@@ -1573,6 +1615,8 @@ def _build_table(statistic_data,
append('') append('')
append('') append('')
if views is None or SummaryView.MemoryView in views:
###### Print Memory Summary Report ###### ###### Print Memory Summary Report ######
if statistic_data.memory_summary.allocated_items or statistic_data.memory_summary.reserved_items: if statistic_data.memory_summary.allocated_items or statistic_data.memory_summary.reserved_items:
for device_type, memory_events in statistic_data.memory_summary.allocated_items.items( for device_type, memory_events in statistic_data.memory_summary.allocated_items.items(
...@@ -1590,8 +1634,9 @@ def _build_table(statistic_data, ...@@ -1590,8 +1634,9 @@ def _build_table(statistic_data,
] ]
all_row_values.append(row_values) all_row_values.append(row_values)
sorted_reserved_items = sorted(statistic_data.memory_summary. sorted_reserved_items = sorted(
reserved_items[device_type].items(), statistic_data.memory_summary.reserved_items[device_type].
items(),
key=lambda x: x[1].increase_size, key=lambda x: x[1].increase_size,
reverse=True) reverse=True)
for event_name, item in sorted_reserved_items: for event_name, item in sorted_reserved_items:
...@@ -1632,8 +1677,8 @@ def _build_table(statistic_data, ...@@ -1632,8 +1677,8 @@ def _build_table(statistic_data,
statistic_data.memory_summary. statistic_data.memory_summary.
peak_allocation_values[device_type])) peak_allocation_values[device_type]))
append('Peak Reserved Memory: {}'.format( append('Peak Reserved Memory: {}'.format(
statistic_data.memory_summary.peak_reserved_values[device_type]) statistic_data.memory_summary.
) peak_reserved_values[device_type]))
append(header_sep) append(header_sep)
append(row_format.format(*headers)) append(row_format.format(*headers))
append(header_sep) append(header_sep)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册