Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
194d16c1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
194d16c1
编写于
8月 19, 2022
作者:
K
kuizhiqing
提交者:
GitHub
8月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Profiler] add views in summary API (#45225)
* add views in summary api * add args in the last position
上级
1aa6adb1
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
829 addition
and
764 deletion
+829
-764
python/paddle/profiler/__init__.py
python/paddle/profiler/__init__.py
+2
-1
python/paddle/profiler/profiler.py
python/paddle/profiler/profiler.py
+21
-2
python/paddle/profiler/profiler_statistic.py
python/paddle/profiler/profiler_statistic.py
+806
-761
未找到文件。
python/paddle/profiler/__init__.py
浏览文件 @
194d16c1
...
...
@@ -15,6 +15,7 @@
from
.profiler
import
ProfilerState
,
ProfilerTarget
from
.profiler
import
make_scheduler
,
export_chrome_tracing
,
export_protobuf
from
.profiler
import
Profiler
from
.profiler
import
SummaryView
from
.profiler
import
TracerEventType
from
.utils
import
RecordEvent
,
load_profiler_result
from
.profiler_statistic
import
SortedKeys
...
...
@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys
__all__
=
[
'ProfilerState'
,
'ProfilerTarget'
,
'make_scheduler'
,
'export_chrome_tracing'
,
'export_protobuf'
,
'Profiler'
,
'RecordEvent'
,
'load_profiler_result'
,
'SortedKeys'
'load_profiler_result'
,
'SortedKeys'
,
'SummaryView'
]
python/paddle/profiler/profiler.py
浏览文件 @
194d16c1
...
...
@@ -34,6 +34,22 @@ from paddle.profiler import utils
from
.timer
import
benchmark
class
SummaryView
(
Enum
):
r
"""
SummaryView define the summary view of different contents.
"""
DeviceView
=
0
OverView
=
1
ModelView
=
2
DistributedView
=
3
KernelView
=
4
OperatorView
=
5
MemoryView
=
6
MemoryManipulationView
=
7
UDFView
=
8
class
ProfilerState
(
Enum
):
r
"""
ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
...
...
@@ -734,7 +750,8 @@ class Profiler:
sorted_by
=
SortedKeys
.
CPUTotal
,
op_detail
=
True
,
thread_sep
=
False
,
time_unit
=
'ms'
):
time_unit
=
'ms'
,
views
=
None
):
r
"""
Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
...
...
@@ -743,6 +760,7 @@ class Profiler:
op_detail(bool, optional): expand each operator detail information, default value is True.
thread_sep(bool, optional): print op table each thread, default value is False.
time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
views(list[SummaryView], optional): summary tables to print, default to None means all views to be printed.
Examples:
.. code-block:: python
...
...
@@ -770,7 +788,8 @@ class Profiler:
sorted_by
=
sorted_by
,
op_detail
=
op_detail
,
thread_sep
=
thread_sep
,
time_unit
=
time_unit
))
time_unit
=
time_unit
,
views
=
views
))
def
get_profiler
(
config_path
):
...
...
python/paddle/profiler/profiler_statistic.py
浏览文件 @
194d16c1
...
...
@@ -700,7 +700,10 @@ def _build_table(statistic_data,
thread_sep
=
False
,
time_unit
=
'ms'
,
row_limit
=
100
,
max_src_column_width
=
75
):
max_src_column_width
=
75
,
views
=
None
):
from
.profiler
import
SummaryView
"""Prints a summary of events."""
# format table row
SPACING_SIZE
=
2
...
...
@@ -749,6 +752,9 @@ def _build_table(statistic_data,
total_time
=
statistic_data
.
time_range_summary
.
get_cpu_range_sum
(
TracerEventType
.
ProfileStep
)
if
views
is
None
or
SummaryView
.
DeviceView
in
views
:
###### Print Device Summary ######
headers
=
[
'Device'
,
'Utilization (%)'
]
name_column_width
=
30
...
...
@@ -769,13 +775,14 @@ def _build_table(statistic_data,
append
(
header_sep
)
row_values
=
[
'CPU(Process)'
,
format_ratio
(
float
(
statistic_data
.
extra_info
[
'Process Cpu Utilization'
]))
format_ratio
(
float
(
statistic_data
.
extra_info
[
'Process Cpu Utilization'
]))
]
append
(
row_format
.
format
(
*
row_values
))
row_values
=
[
'CPU(System)'
,
format_ratio
(
float
(
statistic_data
.
extra_info
[
'System Cpu Utilization'
]))
format_ratio
(
float
(
statistic_data
.
extra_info
[
'System Cpu Utilization'
]))
]
append
(
row_format
.
format
(
*
row_values
))
for
gpu_name
in
statistic_data
.
time_range_summary
.
get_gpu_devices
():
...
...
@@ -798,6 +805,7 @@ def _build_table(statistic_data,
if
total_time
==
0
:
return
''
.
join
(
result
)
if
views
is
None
or
SummaryView
.
OverView
in
views
:
###### Print Overview Summary ######
headers
=
[
'Event Type'
,
'Calls'
,
'CPU Time'
,
'Ratio (%)'
]
row_format_list
=
[
""
]
...
...
@@ -869,7 +877,8 @@ def _build_table(statistic_data,
reverse
=
True
)
event_type
,
time
=
sorted_items
[
0
]
row_values
=
[
'{}'
.
format
(
str
(
event_type
).
split
(
'.'
)[
1
]),
cpu_call_times
[
event_type
],
'{}'
.
format
(
str
(
event_type
).
split
(
'.'
)[
1
]),
cpu_call_times
[
event_type
],
format_time
(
time
,
unit
=
time_unit
),
format_ratio
(
float
(
time
)
/
total_time
)
]
...
...
@@ -912,6 +921,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
ModelView
in
views
:
###### Print Model Summary Report ######
model_perspective_items
=
statistic_data
.
event_summary
.
model_perspective_items
if
len
(
model_perspective_items
)
>
1
:
...
...
@@ -1013,6 +1024,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
DistributedView
in
views
:
###### Print Distribution Summary Report ######
if
statistic_data
.
distributed_summary
.
communication_range
:
headers
=
[
...
...
@@ -1087,6 +1100,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
OperatorView
in
views
:
###### Print Operator Summary Report ######
if
statistic_data
.
event_summary
.
items
:
all_row_values
=
[]
...
...
@@ -1119,16 +1134,18 @@ def _build_table(statistic_data,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMax
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMin
:
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
total_op_cpu_time
=
0
total_op_gpu_time
=
0
...
...
@@ -1144,7 +1161,8 @@ def _build_table(statistic_data,
if
total_op_gpu_time
==
0
:
gpu_ratio
=
0
else
:
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
total_op_gpu_time
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
total_op_gpu_time
row_values
=
[
name
,
item
.
call
,
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
cpu_time
,
unit
=
time_unit
),
...
...
@@ -1154,9 +1172,12 @@ def _build_table(statistic_data,
format_ratio
(
cpu_ratio
)),
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_ratio
(
gpu_ratio
))
]
all_row_values
.
append
(
row_values
)
...
...
@@ -1174,7 +1195,8 @@ def _build_table(statistic_data,
gpu_ratio
=
float
(
innerop_node
.
general_gpu_time
)
/
item
.
general_gpu_time
if
len
(
innerop_name
)
+
2
>
name_column_width
:
innerop_name
=
innerop_name
[:
name_column_width
-
5
]
innerop_name
=
innerop_name
[:
name_column_width
-
5
]
innerop_name
+=
"..."
row_values
=
[
' {}'
.
format
(
innerop_name
),
innerop_node
.
call
,
...
...
@@ -1191,11 +1213,14 @@ def _build_table(statistic_data,
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
innerop_node
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
innerop_node
.
avg_general_gpu_time
,
format_time
(
innerop_node
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
innerop_node
.
max_general_gpu_time
,
format_time
(
innerop_node
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
innerop_node
.
min_general_gpu_time
,
format_time
(
innerop_node
.
min_general_gpu_time
,
unit
=
time_unit
),
format_ratio
(
gpu_ratio
))
]
...
...
@@ -1208,7 +1233,8 @@ def _build_table(statistic_data,
gpu_ratio
=
float
(
device_node
.
gpu_time
)
/
innerop_node
.
general_gpu_time
if
len
(
device_node_name
)
+
4
>
name_column_width
:
if
len
(
device_node_name
)
+
4
>
name_column_width
:
device_node_name
=
device_node_name
[:
name_column_width
-
7
]
...
...
@@ -1228,20 +1254,21 @@ def _build_table(statistic_data,
format_ratio
(
gpu_ratio
))
]
all_row_values
.
append
(
row_values
)
for
device_node_name
,
device_node
in
item
.
devices
.
items
():
for
device_node_name
,
device_node
in
item
.
devices
.
items
(
):
if
item
.
general_gpu_time
==
0
:
gpu_ratio
=
0
else
:
gpu_ratio
=
float
(
device_node
.
gpu_time
)
/
item
.
general_gpu_time
gpu_ratio
=
float
(
device_node
.
gpu_time
)
/
item
.
general_gpu_time
if
len
(
device_node_name
)
+
2
>
name_column_width
:
device_node_name
=
device_node_name
[:
name_column_width
-
5
]
device_node_name
+=
"..."
row_values
=
[
' {}'
.
format
(
device_node_name
),
device_node
.
call
,
'- / - / - / - / -'
,
' {}'
.
format
(
device_node_name
)
,
device_node
.
call
,
'- / - / - / - / -'
,
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
device_node
.
gpu_time
,
unit
=
time_unit
),
...
...
@@ -1299,6 +1326,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
KernelView
in
views
:
###### Print Kernel Summary Report ######
if
statistic_data
.
event_summary
.
kernel_items
:
all_row_values
=
[]
...
...
@@ -1339,7 +1368,9 @@ def _build_table(statistic_data,
]
all_row_values
.
append
(
row_values
)
headers
=
[
'Name'
,
'Calls'
,
'GPU Total / Avg / Max / Min / Ratio(%)'
]
headers
=
[
'Name'
,
'Calls'
,
'GPU Total / Avg / Max / Min / Ratio(%)'
]
# Calculate the column width
name_column_width
=
90
calltime_width
=
6
...
...
@@ -1384,6 +1415,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
MemoryManipulationView
in
views
:
###### Print Memory Manipulation Summary Report ######
if
statistic_data
.
event_summary
.
memory_manipulation_items
:
all_row_values
=
[]
...
...
@@ -1456,6 +1489,9 @@ def _build_table(statistic_data,
append
(
header_sep
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
UDFView
in
views
:
###### Print UserDefined Summary Report ######
if
statistic_data
.
event_summary
.
userdefined_items
:
all_row_values
=
[]
...
...
@@ -1490,22 +1526,25 @@ def _build_table(statistic_data,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMax
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMin
:
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
for
name
,
item
in
sorted_items
:
if
gpu_total_time
==
0
:
gpu_ratio
=
0
else
:
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
gpu_total_time
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
gpu_total_time
row_values
=
[
name
,
item
.
call
,
...
...
@@ -1517,9 +1556,12 @@ def _build_table(statistic_data,
format_ratio
(
float
(
item
.
cpu_time
)
/
total_time
)),
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_ratio
(
gpu_ratio
)),
]
all_row_values
.
append
(
row_values
)
...
...
@@ -1573,6 +1615,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
MemoryView
in
views
:
###### Print Memory Summary Report ######
if
statistic_data
.
memory_summary
.
allocated_items
or
statistic_data
.
memory_summary
.
reserved_items
:
for
device_type
,
memory_events
in
statistic_data
.
memory_summary
.
allocated_items
.
items
(
...
...
@@ -1590,8 +1634,9 @@ def _build_table(statistic_data,
]
all_row_values
.
append
(
row_values
)
sorted_reserved_items
=
sorted
(
statistic_data
.
memory_summary
.
reserved_items
[
device_type
].
items
(),
sorted_reserved_items
=
sorted
(
statistic_data
.
memory_summary
.
reserved_items
[
device_type
].
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
for
event_name
,
item
in
sorted_reserved_items
:
...
...
@@ -1632,8 +1677,8 @@ def _build_table(statistic_data,
statistic_data
.
memory_summary
.
peak_allocation_values
[
device_type
]))
append
(
'Peak Reserved Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
device_type
])
)
statistic_data
.
memory_summary
.
peak_reserved_values
[
device_type
])
)
append
(
header_sep
)
append
(
row_format
.
format
(
*
headers
))
append
(
header_sep
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录