未验证 提交 194d16c1 编写于 作者: K kuizhiqing 提交者: GitHub

[Profiler] add views in summary API (#45225)

* add views in summary api

* add args in the last position
上级 1aa6adb1
......@@ -15,6 +15,7 @@
from .profiler import ProfilerState, ProfilerTarget
from .profiler import make_scheduler, export_chrome_tracing, export_protobuf
from .profiler import Profiler
from .profiler import SummaryView
from .profiler import TracerEventType
from .utils import RecordEvent, load_profiler_result
from .profiler_statistic import SortedKeys
......@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys
__all__ = [
'ProfilerState', 'ProfilerTarget', 'make_scheduler',
'export_chrome_tracing', 'export_protobuf', 'Profiler', 'RecordEvent',
'load_profiler_result', 'SortedKeys'
'load_profiler_result', 'SortedKeys', 'SummaryView'
]
......@@ -34,6 +34,22 @@ from paddle.profiler import utils
from .timer import benchmark
class SummaryView(Enum):
r"""
SummaryView define the summary view of different contents.
"""
DeviceView = 0
OverView = 1
ModelView = 2
DistributedView = 3
KernelView = 4
OperatorView = 5
MemoryView = 6
MemoryManipulationView = 7
UDFView = 8
class ProfilerState(Enum):
r"""
ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
......@@ -734,7 +750,8 @@ class Profiler:
sorted_by=SortedKeys.CPUTotal,
op_detail=True,
thread_sep=False,
time_unit='ms'):
time_unit='ms',
views=None):
r"""
Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
......@@ -743,6 +760,7 @@ class Profiler:
op_detail(bool, optional): expand each operator detail information, default value is True.
thread_sep(bool, optional): print op table each thread, default value is False.
time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
views(list[SummaryView], optional): summary tables to print, default to None means all views to be printed.
Examples:
.. code-block:: python
......@@ -770,7 +788,8 @@ class Profiler:
sorted_by=sorted_by,
op_detail=op_detail,
thread_sep=thread_sep,
time_unit=time_unit))
time_unit=time_unit,
views=views))
def get_profiler(config_path):
......
......@@ -700,7 +700,10 @@ def _build_table(statistic_data,
thread_sep=False,
time_unit='ms',
row_limit=100,
max_src_column_width=75):
max_src_column_width=75,
views=None):
from .profiler import SummaryView
"""Prints a summary of events."""
# format table row
SPACING_SIZE = 2
......@@ -749,6 +752,9 @@ def _build_table(statistic_data,
total_time = statistic_data.time_range_summary.get_cpu_range_sum(
TracerEventType.ProfileStep)
if views is None or SummaryView.DeviceView in views:
###### Print Device Summary ######
headers = ['Device', 'Utilization (%)']
name_column_width = 30
......@@ -769,13 +775,14 @@ def _build_table(statistic_data,
append(header_sep)
row_values = [
'CPU(Process)',
format_ratio(float(
statistic_data.extra_info['Process Cpu Utilization']))
format_ratio(
float(statistic_data.extra_info['Process Cpu Utilization']))
]
append(row_format.format(*row_values))
row_values = [
'CPU(System)',
format_ratio(float(statistic_data.extra_info['System Cpu Utilization']))
format_ratio(
float(statistic_data.extra_info['System Cpu Utilization']))
]
append(row_format.format(*row_values))
for gpu_name in statistic_data.time_range_summary.get_gpu_devices():
......@@ -798,6 +805,7 @@ def _build_table(statistic_data,
if total_time == 0:
return ''.join(result)
if views is None or SummaryView.OverView in views:
###### Print Overview Summary ######
headers = ['Event Type', 'Calls', 'CPU Time', 'Ratio (%)']
row_format_list = [""]
......@@ -869,7 +877,8 @@ def _build_table(statistic_data,
reverse=True)
event_type, time = sorted_items[0]
row_values = [
'{}'.format(str(event_type).split('.')[1]), cpu_call_times[event_type],
'{}'.format(str(event_type).split('.')[1]),
cpu_call_times[event_type],
format_time(time, unit=time_unit),
format_ratio(float(time) / total_time)
]
......@@ -912,6 +921,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.ModelView in views:
###### Print Model Summary Report ######
model_perspective_items = statistic_data.event_summary.model_perspective_items
if len(model_perspective_items) > 1:
......@@ -1013,6 +1024,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.DistributedView in views:
###### Print Distribution Summary Report ######
if statistic_data.distributed_summary.communication_range:
headers = [
......@@ -1087,6 +1100,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.OperatorView in views:
###### Print Operator Summary Report ######
if statistic_data.event_summary.items:
all_row_values = []
......@@ -1119,16 +1134,18 @@ def _build_table(statistic_data,
key=lambda x: x[1].general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUAvg:
sorted_items = sorted(items.items(),
sorted_items = sorted(
items.items(),
key=lambda x: x[1].avg_general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUMax:
sorted_items = sorted(items.items(),
sorted_items = sorted(
items.items(),
key=lambda x: x[1].max_general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUMin:
sorted_items = sorted(items.items(),
key=lambda x: x[1].min_general_gpu_time)
sorted_items = sorted(
items.items(), key=lambda x: x[1].min_general_gpu_time)
total_op_cpu_time = 0
total_op_gpu_time = 0
......@@ -1144,7 +1161,8 @@ def _build_table(statistic_data,
if total_op_gpu_time == 0:
gpu_ratio = 0
else:
gpu_ratio = float(item.general_gpu_time) / total_op_gpu_time
gpu_ratio = float(
item.general_gpu_time) / total_op_gpu_time
row_values = [
name, item.call, '{} / {} / {} / {} / {}'.format(
format_time(item.cpu_time, unit=time_unit),
......@@ -1154,9 +1172,12 @@ def _build_table(statistic_data,
format_ratio(cpu_ratio)),
'{} / {} / {} / {} / {}'.format(
format_time(item.general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time, unit=time_unit),
format_time(item.max_general_gpu_time, unit=time_unit),
format_time(item.min_general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time,
unit=time_unit),
format_time(item.max_general_gpu_time,
unit=time_unit),
format_time(item.min_general_gpu_time,
unit=time_unit),
format_ratio(gpu_ratio))
]
all_row_values.append(row_values)
......@@ -1174,7 +1195,8 @@ def _build_table(statistic_data,
gpu_ratio = float(innerop_node.general_gpu_time
) / item.general_gpu_time
if len(innerop_name) + 2 > name_column_width:
innerop_name = innerop_name[:name_column_width - 5]
innerop_name = innerop_name[:name_column_width -
5]
innerop_name += "..."
row_values = [
' {}'.format(innerop_name), innerop_node.call,
......@@ -1191,11 +1213,14 @@ def _build_table(statistic_data,
'{} / {} / {} / {} / {}'.format(
format_time(innerop_node.general_gpu_time,
unit=time_unit),
format_time(innerop_node.avg_general_gpu_time,
format_time(
innerop_node.avg_general_gpu_time,
unit=time_unit),
format_time(innerop_node.max_general_gpu_time,
format_time(
innerop_node.max_general_gpu_time,
unit=time_unit),
format_time(innerop_node.min_general_gpu_time,
format_time(
innerop_node.min_general_gpu_time,
unit=time_unit),
format_ratio(gpu_ratio))
]
......@@ -1208,7 +1233,8 @@ def _build_table(statistic_data,
gpu_ratio = float(
device_node.gpu_time
) / innerop_node.general_gpu_time
if len(device_node_name) + 4 > name_column_width:
if len(device_node_name
) + 4 > name_column_width:
device_node_name = device_node_name[:
name_column_width
- 7]
......@@ -1228,20 +1254,21 @@ def _build_table(statistic_data,
format_ratio(gpu_ratio))
]
all_row_values.append(row_values)
for device_node_name, device_node in item.devices.items():
for device_node_name, device_node in item.devices.items(
):
if item.general_gpu_time == 0:
gpu_ratio = 0
else:
gpu_ratio = float(
device_node.gpu_time) / item.general_gpu_time
gpu_ratio = float(device_node.gpu_time
) / item.general_gpu_time
if len(device_node_name) + 2 > name_column_width:
device_node_name = device_node_name[:
name_column_width
- 5]
device_node_name += "..."
row_values = [
' {}'.format(device_node_name), device_node.call,
'- / - / - / - / -',
' {}'.format(device_node_name),
device_node.call, '- / - / - / - / -',
'{} / {} / {} / {} / {}'.format(
format_time(device_node.gpu_time,
unit=time_unit),
......@@ -1299,6 +1326,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.KernelView in views:
###### Print Kernel Summary Report ######
if statistic_data.event_summary.kernel_items:
all_row_values = []
......@@ -1339,7 +1368,9 @@ def _build_table(statistic_data,
]
all_row_values.append(row_values)
headers = ['Name', 'Calls', 'GPU Total / Avg / Max / Min / Ratio(%)']
headers = [
'Name', 'Calls', 'GPU Total / Avg / Max / Min / Ratio(%)'
]
# Calculate the column width
name_column_width = 90
calltime_width = 6
......@@ -1384,6 +1415,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.MemoryManipulationView in views:
###### Print Memory Manipulation Summary Report ######
if statistic_data.event_summary.memory_manipulation_items:
all_row_values = []
......@@ -1456,6 +1489,9 @@ def _build_table(statistic_data,
append(header_sep)
append('')
append('')
if views is None or SummaryView.UDFView in views:
###### Print UserDefined Summary Report ######
if statistic_data.event_summary.userdefined_items:
all_row_values = []
......@@ -1490,22 +1526,25 @@ def _build_table(statistic_data,
key=lambda x: x[1].general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUAvg:
sorted_items = sorted(items.items(),
sorted_items = sorted(
items.items(),
key=lambda x: x[1].avg_general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUMax:
sorted_items = sorted(items.items(),
sorted_items = sorted(
items.items(),
key=lambda x: x[1].max_general_gpu_time,
reverse=True)
elif sorted_by == SortedKeys.GPUMin:
sorted_items = sorted(items.items(),
key=lambda x: x[1].min_general_gpu_time)
sorted_items = sorted(
items.items(), key=lambda x: x[1].min_general_gpu_time)
for name, item in sorted_items:
if gpu_total_time == 0:
gpu_ratio = 0
else:
gpu_ratio = float(item.general_gpu_time) / gpu_total_time
gpu_ratio = float(
item.general_gpu_time) / gpu_total_time
row_values = [
name,
item.call,
......@@ -1517,9 +1556,12 @@ def _build_table(statistic_data,
format_ratio(float(item.cpu_time) / total_time)),
'{} / {} / {} / {} / {}'.format(
format_time(item.general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time, unit=time_unit),
format_time(item.max_general_gpu_time, unit=time_unit),
format_time(item.min_general_gpu_time, unit=time_unit),
format_time(item.avg_general_gpu_time,
unit=time_unit),
format_time(item.max_general_gpu_time,
unit=time_unit),
format_time(item.min_general_gpu_time,
unit=time_unit),
format_ratio(gpu_ratio)),
]
all_row_values.append(row_values)
......@@ -1573,6 +1615,8 @@ def _build_table(statistic_data,
append('')
append('')
if views is None or SummaryView.MemoryView in views:
###### Print Memory Summary Report ######
if statistic_data.memory_summary.allocated_items or statistic_data.memory_summary.reserved_items:
for device_type, memory_events in statistic_data.memory_summary.allocated_items.items(
......@@ -1590,8 +1634,9 @@ def _build_table(statistic_data,
]
all_row_values.append(row_values)
sorted_reserved_items = sorted(statistic_data.memory_summary.
reserved_items[device_type].items(),
sorted_reserved_items = sorted(
statistic_data.memory_summary.reserved_items[device_type].
items(),
key=lambda x: x[1].increase_size,
reverse=True)
for event_name, item in sorted_reserved_items:
......@@ -1632,8 +1677,8 @@ def _build_table(statistic_data,
statistic_data.memory_summary.
peak_allocation_values[device_type]))
append('Peak Reserved Memory: {}'.format(
statistic_data.memory_summary.peak_reserved_values[device_type])
)
statistic_data.memory_summary.
peak_reserved_values[device_type]))
append(header_sep)
append(row_format.format(*headers))
append(header_sep)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册