Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
194d16c1
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
194d16c1
编写于
8月 19, 2022
作者:
K
kuizhiqing
提交者:
GitHub
8月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Profiler] add views in summary API (#45225)
* add views in summary api * add args in the last position
上级
1aa6adb1
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
829 addition
and
764 deletion
+829
-764
python/paddle/profiler/__init__.py
python/paddle/profiler/__init__.py
+2
-1
python/paddle/profiler/profiler.py
python/paddle/profiler/profiler.py
+21
-2
python/paddle/profiler/profiler_statistic.py
python/paddle/profiler/profiler_statistic.py
+806
-761
未找到文件。
python/paddle/profiler/__init__.py
浏览文件 @
194d16c1
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
from
.profiler
import
ProfilerState
,
ProfilerTarget
from
.profiler
import
ProfilerState
,
ProfilerTarget
from
.profiler
import
make_scheduler
,
export_chrome_tracing
,
export_protobuf
from
.profiler
import
make_scheduler
,
export_chrome_tracing
,
export_protobuf
from
.profiler
import
Profiler
from
.profiler
import
Profiler
from
.profiler
import
SummaryView
from
.profiler
import
TracerEventType
from
.profiler
import
TracerEventType
from
.utils
import
RecordEvent
,
load_profiler_result
from
.utils
import
RecordEvent
,
load_profiler_result
from
.profiler_statistic
import
SortedKeys
from
.profiler_statistic
import
SortedKeys
...
@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys
...
@@ -22,5 +23,5 @@ from .profiler_statistic import SortedKeys
__all__
=
[
__all__
=
[
'ProfilerState'
,
'ProfilerTarget'
,
'make_scheduler'
,
'ProfilerState'
,
'ProfilerTarget'
,
'make_scheduler'
,
'export_chrome_tracing'
,
'export_protobuf'
,
'Profiler'
,
'RecordEvent'
,
'export_chrome_tracing'
,
'export_protobuf'
,
'Profiler'
,
'RecordEvent'
,
'load_profiler_result'
,
'SortedKeys'
'load_profiler_result'
,
'SortedKeys'
,
'SummaryView'
]
]
python/paddle/profiler/profiler.py
浏览文件 @
194d16c1
...
@@ -34,6 +34,22 @@ from paddle.profiler import utils
...
@@ -34,6 +34,22 @@ from paddle.profiler import utils
from
.timer
import
benchmark
from
.timer
import
benchmark
class
SummaryView
(
Enum
):
r
"""
SummaryView define the summary view of different contents.
"""
DeviceView
=
0
OverView
=
1
ModelView
=
2
DistributedView
=
3
KernelView
=
4
OperatorView
=
5
MemoryView
=
6
MemoryManipulationView
=
7
UDFView
=
8
class
ProfilerState
(
Enum
):
class
ProfilerState
(
Enum
):
r
"""
r
"""
ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
...
@@ -734,7 +750,8 @@ class Profiler:
...
@@ -734,7 +750,8 @@ class Profiler:
sorted_by
=
SortedKeys
.
CPUTotal
,
sorted_by
=
SortedKeys
.
CPUTotal
,
op_detail
=
True
,
op_detail
=
True
,
thread_sep
=
False
,
thread_sep
=
False
,
time_unit
=
'ms'
):
time_unit
=
'ms'
,
views
=
None
):
r
"""
r
"""
Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
...
@@ -743,6 +760,7 @@ class Profiler:
...
@@ -743,6 +760,7 @@ class Profiler:
op_detail(bool, optional): expand each operator detail information, default value is True.
op_detail(bool, optional): expand each operator detail information, default value is True.
thread_sep(bool, optional): print op table each thread, default value is False.
thread_sep(bool, optional): print op table each thread, default value is False.
time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
views(list[SummaryView], optional): summary tables to print, default to None means all views to be printed.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -770,7 +788,8 @@ class Profiler:
...
@@ -770,7 +788,8 @@ class Profiler:
sorted_by
=
sorted_by
,
sorted_by
=
sorted_by
,
op_detail
=
op_detail
,
op_detail
=
op_detail
,
thread_sep
=
thread_sep
,
thread_sep
=
thread_sep
,
time_unit
=
time_unit
))
time_unit
=
time_unit
,
views
=
views
))
def
get_profiler
(
config_path
):
def
get_profiler
(
config_path
):
...
...
python/paddle/profiler/profiler_statistic.py
浏览文件 @
194d16c1
...
@@ -700,7 +700,10 @@ def _build_table(statistic_data,
...
@@ -700,7 +700,10 @@ def _build_table(statistic_data,
thread_sep
=
False
,
thread_sep
=
False
,
time_unit
=
'ms'
,
time_unit
=
'ms'
,
row_limit
=
100
,
row_limit
=
100
,
max_src_column_width
=
75
):
max_src_column_width
=
75
,
views
=
None
):
from
.profiler
import
SummaryView
"""Prints a summary of events."""
"""Prints a summary of events."""
# format table row
# format table row
SPACING_SIZE
=
2
SPACING_SIZE
=
2
...
@@ -749,6 +752,9 @@ def _build_table(statistic_data,
...
@@ -749,6 +752,9 @@ def _build_table(statistic_data,
total_time
=
statistic_data
.
time_range_summary
.
get_cpu_range_sum
(
total_time
=
statistic_data
.
time_range_summary
.
get_cpu_range_sum
(
TracerEventType
.
ProfileStep
)
TracerEventType
.
ProfileStep
)
if
views
is
None
or
SummaryView
.
DeviceView
in
views
:
###### Print Device Summary ######
###### Print Device Summary ######
headers
=
[
'Device'
,
'Utilization (%)'
]
headers
=
[
'Device'
,
'Utilization (%)'
]
name_column_width
=
30
name_column_width
=
30
...
@@ -769,13 +775,14 @@ def _build_table(statistic_data,
...
@@ -769,13 +775,14 @@ def _build_table(statistic_data,
append
(
header_sep
)
append
(
header_sep
)
row_values
=
[
row_values
=
[
'CPU(Process)'
,
'CPU(Process)'
,
format_ratio
(
float
(
format_ratio
(
statistic_data
.
extra_info
[
'Process Cpu Utilization'
]))
float
(
statistic_data
.
extra_info
[
'Process Cpu Utilization'
]))
]
]
append
(
row_format
.
format
(
*
row_values
))
append
(
row_format
.
format
(
*
row_values
))
row_values
=
[
row_values
=
[
'CPU(System)'
,
'CPU(System)'
,
format_ratio
(
float
(
statistic_data
.
extra_info
[
'System Cpu Utilization'
]))
format_ratio
(
float
(
statistic_data
.
extra_info
[
'System Cpu Utilization'
]))
]
]
append
(
row_format
.
format
(
*
row_values
))
append
(
row_format
.
format
(
*
row_values
))
for
gpu_name
in
statistic_data
.
time_range_summary
.
get_gpu_devices
():
for
gpu_name
in
statistic_data
.
time_range_summary
.
get_gpu_devices
():
...
@@ -798,6 +805,7 @@ def _build_table(statistic_data,
...
@@ -798,6 +805,7 @@ def _build_table(statistic_data,
if
total_time
==
0
:
if
total_time
==
0
:
return
''
.
join
(
result
)
return
''
.
join
(
result
)
if
views
is
None
or
SummaryView
.
OverView
in
views
:
###### Print Overview Summary ######
###### Print Overview Summary ######
headers
=
[
'Event Type'
,
'Calls'
,
'CPU Time'
,
'Ratio (%)'
]
headers
=
[
'Event Type'
,
'Calls'
,
'CPU Time'
,
'Ratio (%)'
]
row_format_list
=
[
""
]
row_format_list
=
[
""
]
...
@@ -869,7 +877,8 @@ def _build_table(statistic_data,
...
@@ -869,7 +877,8 @@ def _build_table(statistic_data,
reverse
=
True
)
reverse
=
True
)
event_type
,
time
=
sorted_items
[
0
]
event_type
,
time
=
sorted_items
[
0
]
row_values
=
[
row_values
=
[
'{}'
.
format
(
str
(
event_type
).
split
(
'.'
)[
1
]),
cpu_call_times
[
event_type
],
'{}'
.
format
(
str
(
event_type
).
split
(
'.'
)[
1
]),
cpu_call_times
[
event_type
],
format_time
(
time
,
unit
=
time_unit
),
format_time
(
time
,
unit
=
time_unit
),
format_ratio
(
float
(
time
)
/
total_time
)
format_ratio
(
float
(
time
)
/
total_time
)
]
]
...
@@ -912,6 +921,8 @@ def _build_table(statistic_data,
...
@@ -912,6 +921,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
ModelView
in
views
:
###### Print Model Summary Report ######
###### Print Model Summary Report ######
model_perspective_items
=
statistic_data
.
event_summary
.
model_perspective_items
model_perspective_items
=
statistic_data
.
event_summary
.
model_perspective_items
if
len
(
model_perspective_items
)
>
1
:
if
len
(
model_perspective_items
)
>
1
:
...
@@ -1013,6 +1024,8 @@ def _build_table(statistic_data,
...
@@ -1013,6 +1024,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
DistributedView
in
views
:
###### Print Distribution Summary Report ######
###### Print Distribution Summary Report ######
if
statistic_data
.
distributed_summary
.
communication_range
:
if
statistic_data
.
distributed_summary
.
communication_range
:
headers
=
[
headers
=
[
...
@@ -1087,6 +1100,8 @@ def _build_table(statistic_data,
...
@@ -1087,6 +1100,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
OperatorView
in
views
:
###### Print Operator Summary Report ######
###### Print Operator Summary Report ######
if
statistic_data
.
event_summary
.
items
:
if
statistic_data
.
event_summary
.
items
:
all_row_values
=
[]
all_row_values
=
[]
...
@@ -1119,16 +1134,18 @@ def _build_table(statistic_data,
...
@@ -1119,16 +1134,18 @@ def _build_table(statistic_data,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMax
:
elif
sorted_by
==
SortedKeys
.
GPUMax
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMin
:
elif
sorted_by
==
SortedKeys
.
GPUMin
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
total_op_cpu_time
=
0
total_op_cpu_time
=
0
total_op_gpu_time
=
0
total_op_gpu_time
=
0
...
@@ -1144,7 +1161,8 @@ def _build_table(statistic_data,
...
@@ -1144,7 +1161,8 @@ def _build_table(statistic_data,
if
total_op_gpu_time
==
0
:
if
total_op_gpu_time
==
0
:
gpu_ratio
=
0
gpu_ratio
=
0
else
:
else
:
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
total_op_gpu_time
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
total_op_gpu_time
row_values
=
[
row_values
=
[
name
,
item
.
call
,
'{} / {} / {} / {} / {}'
.
format
(
name
,
item
.
call
,
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
cpu_time
,
unit
=
time_unit
),
format_time
(
item
.
cpu_time
,
unit
=
time_unit
),
...
@@ -1154,9 +1172,12 @@ def _build_table(statistic_data,
...
@@ -1154,9 +1172,12 @@ def _build_table(statistic_data,
format_ratio
(
cpu_ratio
)),
format_ratio
(
cpu_ratio
)),
'{} / {} / {} / {} / {}'
.
format
(
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_ratio
(
gpu_ratio
))
format_ratio
(
gpu_ratio
))
]
]
all_row_values
.
append
(
row_values
)
all_row_values
.
append
(
row_values
)
...
@@ -1174,7 +1195,8 @@ def _build_table(statistic_data,
...
@@ -1174,7 +1195,8 @@ def _build_table(statistic_data,
gpu_ratio
=
float
(
innerop_node
.
general_gpu_time
gpu_ratio
=
float
(
innerop_node
.
general_gpu_time
)
/
item
.
general_gpu_time
)
/
item
.
general_gpu_time
if
len
(
innerop_name
)
+
2
>
name_column_width
:
if
len
(
innerop_name
)
+
2
>
name_column_width
:
innerop_name
=
innerop_name
[:
name_column_width
-
5
]
innerop_name
=
innerop_name
[:
name_column_width
-
5
]
innerop_name
+=
"..."
innerop_name
+=
"..."
row_values
=
[
row_values
=
[
' {}'
.
format
(
innerop_name
),
innerop_node
.
call
,
' {}'
.
format
(
innerop_name
),
innerop_node
.
call
,
...
@@ -1191,11 +1213,14 @@ def _build_table(statistic_data,
...
@@ -1191,11 +1213,14 @@ def _build_table(statistic_data,
'{} / {} / {} / {} / {}'
.
format
(
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
innerop_node
.
general_gpu_time
,
format_time
(
innerop_node
.
general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_time
(
innerop_node
.
avg_general_gpu_time
,
format_time
(
innerop_node
.
avg_general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_time
(
innerop_node
.
max_general_gpu_time
,
format_time
(
innerop_node
.
max_general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_time
(
innerop_node
.
min_general_gpu_time
,
format_time
(
innerop_node
.
min_general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_ratio
(
gpu_ratio
))
format_ratio
(
gpu_ratio
))
]
]
...
@@ -1208,7 +1233,8 @@ def _build_table(statistic_data,
...
@@ -1208,7 +1233,8 @@ def _build_table(statistic_data,
gpu_ratio
=
float
(
gpu_ratio
=
float
(
device_node
.
gpu_time
device_node
.
gpu_time
)
/
innerop_node
.
general_gpu_time
)
/
innerop_node
.
general_gpu_time
if
len
(
device_node_name
)
+
4
>
name_column_width
:
if
len
(
device_node_name
)
+
4
>
name_column_width
:
device_node_name
=
device_node_name
[:
device_node_name
=
device_node_name
[:
name_column_width
name_column_width
-
7
]
-
7
]
...
@@ -1228,20 +1254,21 @@ def _build_table(statistic_data,
...
@@ -1228,20 +1254,21 @@ def _build_table(statistic_data,
format_ratio
(
gpu_ratio
))
format_ratio
(
gpu_ratio
))
]
]
all_row_values
.
append
(
row_values
)
all_row_values
.
append
(
row_values
)
for
device_node_name
,
device_node
in
item
.
devices
.
items
():
for
device_node_name
,
device_node
in
item
.
devices
.
items
(
):
if
item
.
general_gpu_time
==
0
:
if
item
.
general_gpu_time
==
0
:
gpu_ratio
=
0
gpu_ratio
=
0
else
:
else
:
gpu_ratio
=
float
(
gpu_ratio
=
float
(
device_node
.
gpu_time
device_node
.
gpu_time
)
/
item
.
general_gpu_time
)
/
item
.
general_gpu_time
if
len
(
device_node_name
)
+
2
>
name_column_width
:
if
len
(
device_node_name
)
+
2
>
name_column_width
:
device_node_name
=
device_node_name
[:
device_node_name
=
device_node_name
[:
name_column_width
name_column_width
-
5
]
-
5
]
device_node_name
+=
"..."
device_node_name
+=
"..."
row_values
=
[
row_values
=
[
' {}'
.
format
(
device_node_name
),
device_node
.
call
,
' {}'
.
format
(
device_node_name
)
,
'- / - / - / - / -'
,
device_node
.
call
,
'- / - / - / - / -'
,
'{} / {} / {} / {} / {}'
.
format
(
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
device_node
.
gpu_time
,
format_time
(
device_node
.
gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
...
@@ -1299,6 +1326,8 @@ def _build_table(statistic_data,
...
@@ -1299,6 +1326,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
KernelView
in
views
:
###### Print Kernel Summary Report ######
###### Print Kernel Summary Report ######
if
statistic_data
.
event_summary
.
kernel_items
:
if
statistic_data
.
event_summary
.
kernel_items
:
all_row_values
=
[]
all_row_values
=
[]
...
@@ -1339,7 +1368,9 @@ def _build_table(statistic_data,
...
@@ -1339,7 +1368,9 @@ def _build_table(statistic_data,
]
]
all_row_values
.
append
(
row_values
)
all_row_values
.
append
(
row_values
)
headers
=
[
'Name'
,
'Calls'
,
'GPU Total / Avg / Max / Min / Ratio(%)'
]
headers
=
[
'Name'
,
'Calls'
,
'GPU Total / Avg / Max / Min / Ratio(%)'
]
# Calculate the column width
# Calculate the column width
name_column_width
=
90
name_column_width
=
90
calltime_width
=
6
calltime_width
=
6
...
@@ -1384,6 +1415,8 @@ def _build_table(statistic_data,
...
@@ -1384,6 +1415,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
MemoryManipulationView
in
views
:
###### Print Memory Manipulation Summary Report ######
###### Print Memory Manipulation Summary Report ######
if
statistic_data
.
event_summary
.
memory_manipulation_items
:
if
statistic_data
.
event_summary
.
memory_manipulation_items
:
all_row_values
=
[]
all_row_values
=
[]
...
@@ -1456,6 +1489,9 @@ def _build_table(statistic_data,
...
@@ -1456,6 +1489,9 @@ def _build_table(statistic_data,
append
(
header_sep
)
append
(
header_sep
)
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
UDFView
in
views
:
###### Print UserDefined Summary Report ######
###### Print UserDefined Summary Report ######
if
statistic_data
.
event_summary
.
userdefined_items
:
if
statistic_data
.
event_summary
.
userdefined_items
:
all_row_values
=
[]
all_row_values
=
[]
...
@@ -1490,22 +1526,25 @@ def _build_table(statistic_data,
...
@@ -1490,22 +1526,25 @@ def _build_table(statistic_data,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
elif
sorted_by
==
SortedKeys
.
GPUAvg
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
avg_general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMax
:
elif
sorted_by
==
SortedKeys
.
GPUMax
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
key
=
lambda
x
:
x
[
1
].
max_general_gpu_time
,
reverse
=
True
)
reverse
=
True
)
elif
sorted_by
==
SortedKeys
.
GPUMin
:
elif
sorted_by
==
SortedKeys
.
GPUMin
:
sorted_items
=
sorted
(
items
.
items
(),
sorted_items
=
sorted
(
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
items
.
items
(),
key
=
lambda
x
:
x
[
1
].
min_general_gpu_time
)
for
name
,
item
in
sorted_items
:
for
name
,
item
in
sorted_items
:
if
gpu_total_time
==
0
:
if
gpu_total_time
==
0
:
gpu_ratio
=
0
gpu_ratio
=
0
else
:
else
:
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
gpu_total_time
gpu_ratio
=
float
(
item
.
general_gpu_time
)
/
gpu_total_time
row_values
=
[
row_values
=
[
name
,
name
,
item
.
call
,
item
.
call
,
...
@@ -1517,9 +1556,12 @@ def _build_table(statistic_data,
...
@@ -1517,9 +1556,12 @@ def _build_table(statistic_data,
format_ratio
(
float
(
item
.
cpu_time
)
/
total_time
)),
format_ratio
(
float
(
item
.
cpu_time
)
/
total_time
)),
'{} / {} / {} / {} / {}'
.
format
(
'{} / {} / {} / {} / {}'
.
format
(
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
avg_general_gpu_time
,
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
max_general_gpu_time
,
unit
=
time_unit
),
format_time
(
item
.
min_general_gpu_time
,
unit
=
time_unit
),
format_ratio
(
gpu_ratio
)),
format_ratio
(
gpu_ratio
)),
]
]
all_row_values
.
append
(
row_values
)
all_row_values
.
append
(
row_values
)
...
@@ -1573,6 +1615,8 @@ def _build_table(statistic_data,
...
@@ -1573,6 +1615,8 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
append
(
''
)
append
(
''
)
if
views
is
None
or
SummaryView
.
MemoryView
in
views
:
###### Print Memory Summary Report ######
###### Print Memory Summary Report ######
if
statistic_data
.
memory_summary
.
allocated_items
or
statistic_data
.
memory_summary
.
reserved_items
:
if
statistic_data
.
memory_summary
.
allocated_items
or
statistic_data
.
memory_summary
.
reserved_items
:
for
device_type
,
memory_events
in
statistic_data
.
memory_summary
.
allocated_items
.
items
(
for
device_type
,
memory_events
in
statistic_data
.
memory_summary
.
allocated_items
.
items
(
...
@@ -1590,8 +1634,9 @@ def _build_table(statistic_data,
...
@@ -1590,8 +1634,9 @@ def _build_table(statistic_data,
]
]
all_row_values
.
append
(
row_values
)
all_row_values
.
append
(
row_values
)
sorted_reserved_items
=
sorted
(
statistic_data
.
memory_summary
.
sorted_reserved_items
=
sorted
(
reserved_items
[
device_type
].
items
(),
statistic_data
.
memory_summary
.
reserved_items
[
device_type
].
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
reverse
=
True
)
for
event_name
,
item
in
sorted_reserved_items
:
for
event_name
,
item
in
sorted_reserved_items
:
...
@@ -1632,8 +1677,8 @@ def _build_table(statistic_data,
...
@@ -1632,8 +1677,8 @@ def _build_table(statistic_data,
statistic_data
.
memory_summary
.
statistic_data
.
memory_summary
.
peak_allocation_values
[
device_type
]))
peak_allocation_values
[
device_type
]))
append
(
'Peak Reserved Memory: {}'
.
format
(
append
(
'Peak Reserved Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
device_type
])
statistic_data
.
memory_summary
.
)
peak_reserved_values
[
device_type
])
)
append
(
header_sep
)
append
(
header_sep
)
append
(
row_format
.
format
(
*
headers
))
append
(
row_format
.
format
(
*
headers
))
append
(
header_sep
)
append
(
header_sep
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录