Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
VisualDL
提交
3bc3e2e6
V
VisualDL
项目概览
PaddlePaddle
/
VisualDL
1 年多 前同步成功
通知
88
Star
4655
Fork
642
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
2
Wiki
5
Wiki
分析
仓库
DevOps
项目成员
Pages
V
VisualDL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
2
合并请求
2
Pages
分析
分析
仓库分析
DevOps
Wiki
5
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3bc3e2e6
编写于
9月 13, 2022
作者:
C
chenjian
提交者:
GitHub
9月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add protobuf file support in profiling
上级
8d3e2818
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
286 addition
and
52 deletion
+286
-52
requirements.txt
requirements.txt
+2
-1
visualdl/component/profiler/parser/event_node.py
visualdl/component/profiler/parser/event_node.py
+152
-5
visualdl/component/profiler/parser/utils.py
visualdl/component/profiler/parser/utils.py
+25
-3
visualdl/component/profiler/profiler_data.py
visualdl/component/profiler/profiler_data.py
+66
-32
visualdl/component/profiler/profiler_reader.py
visualdl/component/profiler/profiler_reader.py
+41
-11
未找到文件。
requirements.txt
浏览文件 @
3bc3e2e6
...
...
@@ -7,4 +7,5 @@ protobuf >= 3.11.0
requests
six
>= 1.14.0
matplotlib
pandas
\ No newline at end of file
pandas
packaging
\ No newline at end of file
visualdl/component/profiler/parser/event_node.py
浏览文件 @
3bc3e2e6
...
...
@@ -17,6 +17,9 @@ import functools
import
json
import
re
import
sys
import
tempfile
from
.utils
import
traverse_tree
_show_name_pattern
=
re
.
compile
(
r
'(.+)(\[.+\])'
)
_show_tid_pattern
=
re
.
compile
(
r
'\w+(\(.+\))'
)
...
...
@@ -78,6 +81,25 @@ class HostNode:
self
.
mem_node
=
[]
return
self
@
classmethod
def
from_protobuf
(
cls
,
obj
):
self
=
cls
()
self
.
name
=
obj
.
name
self
.
type
=
str
(
obj
.
type
).
split
(
'.'
)[
1
]
self
.
start_ns
=
obj
.
start_ns
self
.
end_ns
=
obj
.
end_ns
self
.
process_id
=
obj
.
process_id
self
.
thread_id
=
obj
.
thread_id
self
.
correlation_id
=
obj
.
correlation_id
self
.
input_shapes
=
obj
.
input_shapes
self
.
dtypes
=
obj
.
dtypes
self
.
callstack
=
obj
.
callstack
self
.
children_node
=
[]
self
.
runtime_node
=
[]
self
.
device_node
=
[]
self
.
mem_node
=
[]
return
self
class
MemNode
:
def
__init__
(
self
):
...
...
@@ -118,6 +140,22 @@ class MemNode:
'peak_reserved'
]
if
'peak_reserved'
in
json_obj
[
'args'
]
else
0
return
self
@
classmethod
def
from_protobuf
(
cls
,
obj
):
self
=
cls
()
self
.
type
=
str
(
obj
.
type
).
split
(
'.'
)[
1
]
self
.
timestamp_ns
=
obj
.
timestamp_ns
self
.
addr
=
hex
(
int
(
obj
.
addr
))
self
.
process_id
=
obj
.
process_id
self
.
thread_id
=
obj
.
thread_id
self
.
increase_bytes
=
obj
.
increase_bytes
self
.
place
=
obj
.
place
self
.
current_allocated
=
obj
.
current_allocated
self
.
current_reserved
=
obj
.
current_reserved
self
.
peak_allocated
=
obj
.
peak_allocated
self
.
peak_reserved
=
obj
.
peak_reserved
return
self
class
DeviceNode
:
def
__init__
(
self
):
...
...
@@ -176,9 +214,35 @@ class DeviceNode:
"warps per SM"
]
if
"warps per SM"
in
json_obj
[
'args'
]
else
0
return
self
@
classmethod
def
from_protobuf
(
cls
,
obj
):
self
=
cls
()
self
.
name
=
obj
.
name
self
.
type
=
str
(
obj
.
type
).
split
(
'.'
)[
1
]
self
.
start_ns
=
obj
.
start_ns
self
.
end_ns
=
obj
.
end_ns
self
.
device_id
=
obj
.
device_id
self
.
stream_id
=
obj
.
stream_id
self
.
context_id
=
obj
.
context_id
self
.
correlation_id
=
obj
.
correlation_id
self
.
block_x
,
self
.
block_y
,
self
.
block_z
=
[
obj
.
block_x
,
obj
.
block_y
,
obj
.
block_z
]
self
.
grid_x
,
self
.
grid_y
,
self
.
grid_z
=
[
obj
.
grid_x
,
obj
.
grid_y
,
obj
.
grid_z
]
self
.
shared_memory
=
obj
.
shared_memory
self
.
registers_per_thread
=
obj
.
registers_per_thread
self
.
num_bytes
=
obj
.
num_bytes
self
.
value
=
obj
.
value
self
.
occupancy
=
obj
.
occupancy
*
100
self
.
blocks_per_sm
=
obj
.
blocks_per_sm
self
.
warps_per_sm
=
obj
.
warps_per_sm
return
self
class
ProfilerResult
:
def
__init__
(
self
,
json_
data
):
def
__init__
(
self
,
data
):
self
.
device_infos
=
None
self
.
span_idx
=
None
self
.
data
=
None
...
...
@@ -187,11 +251,18 @@ class ProfilerResult:
self
.
has_hostnodes
=
True
self
.
has_devicenodes
=
True
self
.
has_memnodes
=
True
self
.
parse
(
json_data
)
self
.
content
=
json_data
self
.
start_in_timeline_ns
=
None
def
parse
(
self
,
json_data
):
if
isinstance
(
data
,
dict
):
self
.
parse_json
(
data
)
self
.
content
=
data
else
:
self
.
parse_protobuf
(
data
)
with
tempfile
.
NamedTemporaryFile
(
"r"
)
as
fp
:
data
.
save
(
fp
.
name
,
"json"
)
fp
.
seek
(
0
)
self
.
content
=
json
.
loads
(
fp
.
read
())
def
parse_json
(
self
,
json_data
):
self
.
schema_version
=
json_data
[
'schemaVersion'
]
self
.
span_idx
=
json_data
[
'span_indx'
]
self
.
device_infos
=
{
...
...
@@ -232,6 +303,82 @@ class ProfilerResult:
memnodes
)
self
.
extra_info
=
json_data
[
'ExtraInfo'
]
def
parse_protobuf
(
self
,
protobuf_data
):
# noqa: C901
self
.
schema_version
=
protobuf_data
.
get_version
()
self
.
span_idx
=
str
(
protobuf_data
.
get_span_indx
())
try
:
self
.
device_infos
=
{
device_id
:
{
'name'
:
device_property
.
name
,
'totalGlobalMem'
:
device_property
.
total_memory
,
'computeMajor'
:
device_property
.
major
,
'computeMinor'
:
device_property
.
minor
}
for
device_id
,
device_property
in
protobuf_data
.
get_device_property
().
items
()
}
except
Exception
:
print
(
"paddlepaddle-gpu version is needed to get GPU device informations."
)
self
.
device_infos
=
{}
self
.
extra_info
=
protobuf_data
.
get_extra_info
()
self
.
start_in_timeline_ns
=
float
(
'inf'
)
self
.
has_hostnodes
=
False
self
.
has_devicenodes
=
False
self
.
has_memnodes
=
False
node_trees
=
protobuf_data
.
get_data
()
new_node_trees
=
{}
for
threadid
,
root
in
node_trees
.
items
():
stack
=
[]
new_stack
=
[]
new_root
=
HostNode
.
from_protobuf
(
root
)
new_node_trees
[
threadid
]
=
new_root
stack
.
append
(
root
)
new_stack
.
append
(
new_root
)
while
stack
:
current_node
=
stack
.
pop
()
new_current_node
=
new_stack
.
pop
()
for
child_node
in
current_node
.
children_node
:
if
self
.
has_hostnodes
is
False
:
self
.
has_hostnodes
=
True
new_child_node
=
HostNode
.
from_protobuf
(
child_node
)
new_current_node
.
children_node
.
append
(
new_child_node
)
stack
.
append
(
child_node
)
new_stack
.
append
(
new_child_node
)
for
runtime_node
in
current_node
.
runtime_node
:
new_runtime_node
=
HostNode
.
from_protobuf
(
runtime_node
)
new_current_node
.
runtime_node
.
append
(
new_runtime_node
)
for
device_node
in
runtime_node
.
device_node
:
new_device_node
=
DeviceNode
.
from_protobuf
(
device_node
)
new_runtime_node
.
device_node
.
append
(
new_device_node
)
for
mem_node
in
current_node
.
mem_node
:
new_mem_node
=
MemNode
.
from_protobuf
(
mem_node
)
new_current_node
.
mem_node
.
append
(
new_mem_node
)
new_node_tree_list
=
traverse_tree
(
new_node_trees
)
for
threadid
,
node_tree_list
in
new_node_tree_list
.
items
():
for
node
in
node_tree_list
[
1
:]:
# skip root
if
node
.
start_ns
<
self
.
start_in_timeline_ns
:
self
.
start_in_timeline_ns
=
node
.
start_ns
for
threadid
,
node_tree_list
in
new_node_tree_list
.
items
():
for
node
in
node_tree_list
:
if
node
!=
node_tree_list
[
0
]:
# skip root
node
.
start_ns
-=
self
.
start_in_timeline_ns
node
.
end_ns
-=
self
.
start_in_timeline_ns
for
runtimenode
in
node
.
runtime_node
:
runtimenode
.
end_ns
-=
self
.
start_in_timeline_ns
runtimenode
.
start_ns
-=
self
.
start_in_timeline_ns
for
device_node
in
runtimenode
.
device_node
:
if
self
.
has_devicenodes
is
False
:
self
.
has_devicenodes
=
True
device_node
.
start_ns
-=
self
.
start_in_timeline_ns
device_node
.
end_ns
-=
self
.
start_in_timeline_ns
for
mem_node
in
node
.
mem_node
:
if
self
.
has_memnodes
is
False
:
self
.
has_memnodes
=
True
mem_node
.
timestamp_ns
-=
self
.
start_in_timeline_ns
self
.
data
=
new_node_trees
def
build_tree
(
# noqa: C901
self
,
hostnodes
,
runtimenodes
,
devicenodes
,
memnodes
):
thread2host_event_nodes
=
collections
.
defaultdict
(
list
)
...
...
visualdl/component/profiler/parser/utils.py
浏览文件 @
3bc3e2e6
...
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
import
os
import
sys
StageType
=
[
'Dataloader'
,
'Forward'
,
'Backward'
,
'Optimization'
]
...
...
@@ -466,7 +468,6 @@ def format_time(time, unit='ms', inf_subs='-'):
result
/=
1e6
elif
unit
==
'us'
:
result
/=
1e3
# return '{:.2f}'.format(result)
return
round
(
result
,
2
)
...
...
@@ -474,7 +475,6 @@ def format_ratio(ratio):
r
"""
Transform ratio within [0, 1] to percentage presentation.
"""
# return '{:.2f}'.format(ratio * 100)
return
round
(
ratio
*
100
,
2
)
...
...
@@ -490,5 +490,27 @@ def format_memory(memory, memory_unit='KB'):
result
/=
(
1024
*
1024
)
elif
memory_unit
==
'KB'
:
result
/=
1024
# return '{:.2f}'.format(result)
return
round
(
result
,
2
)
class
RedirectStdStreams
(
object
):
def
__init__
(
self
,
stdout
=
None
,
stderr
=
None
):
self
.
_stdout
=
stdout
or
sys
.
stdout
self
.
_stderr
=
stderr
or
sys
.
stderr
def
__enter__
(
self
):
'''
Replace stdout and stderr to specified stream.
'''
sys
.
stdout
.
flush
()
sys
.
stderr
.
flush
()
self
.
old_stdout_fileno
,
self
.
old_stderr_fileno
=
os
.
dup
(
sys
.
stdout
.
fileno
()),
os
.
dup
(
sys
.
stderr
.
fileno
())
os
.
dup2
(
self
.
_stdout
.
fileno
(),
sys
.
stdout
.
fileno
())
os
.
dup2
(
self
.
_stderr
.
fileno
(),
sys
.
stderr
.
fileno
())
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
os
.
dup2
(
self
.
old_stdout_fileno
,
sys
.
stdout
.
fileno
())
os
.
dup2
(
self
.
old_stderr_fileno
,
sys
.
stderr
.
fileno
())
os
.
close
(
self
.
old_stdout_fileno
)
os
.
close
(
self
.
old_stderr_fileno
)
visualdl/component/profiler/profiler_data.py
浏览文件 @
3bc3e2e6
...
...
@@ -147,39 +147,73 @@ class ProfilerData:
else
:
device_type
=
'GPU'
gpu_id
=
int
(
next
(
iter
(
self
.
gpu_ids
)))
return
{
"device_type"
:
device_type
,
"CPU"
:
{
"process_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"Process Cpu Utilization"
])),
"system_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"System Cpu Utilization"
]))
},
"GPU"
:
{
"name"
:
self
.
device_infos
[
gpu_id
][
'name'
],
"memory"
:
"{} GB"
.
format
(
format_memory
(
self
.
device_infos
[
gpu_id
][
'totalGlobalMem'
],
'GB'
)),
"compute_capability"
:
'{}.{}'
.
format
(
self
.
device_infos
[
gpu_id
][
'computeMajor'
],
self
.
device_infos
[
gpu_id
][
'computeMinor'
]),
"utilization"
:
format_ratio
(
self
.
gpu_ulitization
),
"sm_efficiency"
:
format_ratio
(
self
.
sm_efficiency
/
self
.
model_perspective_items
[
'ProfileStep'
].
cpu_time
),
"achieved_occupancy"
:
format_ratio
(
self
.
occupancy
),
"tensor_core_percentage"
:
format_ratio
(
self
.
tensorcore_ratio
)
if
gpu_id
in
self
.
device_infos
:
return
{
"device_type"
:
device_type
,
"CPU"
:
{
"process_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"Process Cpu Utilization"
])),
"system_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"System Cpu Utilization"
]))
},
"GPU"
:
{
"name"
:
self
.
device_infos
[
gpu_id
][
'name'
],
"memory"
:
"{} GB"
.
format
(
format_memory
(
self
.
device_infos
[
gpu_id
][
'totalGlobalMem'
],
'GB'
)),
"compute_capability"
:
'{}.{}'
.
format
(
self
.
device_infos
[
gpu_id
][
'computeMajor'
],
self
.
device_infos
[
gpu_id
][
'computeMinor'
]),
"utilization"
:
format_ratio
(
self
.
gpu_ulitization
),
"sm_efficiency"
:
format_ratio
(
self
.
sm_efficiency
/
self
.
model_perspective_items
[
'ProfileStep'
].
cpu_time
),
"achieved_occupancy"
:
format_ratio
(
self
.
occupancy
),
"tensor_core_percentage"
:
format_ratio
(
self
.
tensorcore_ratio
)
}
}
else
:
return
{
"device_type"
:
device_type
,
"CPU"
:
{
"process_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"Process Cpu Utilization"
])),
"system_utilization"
:
format_ratio
(
float
(
self
.
extra_infos
[
"System Cpu Utilization"
]))
},
"GPU"
:
{
"name"
:
"-"
,
"memory"
:
"-"
,
"compute_capability"
:
'-'
,
"utilization"
:
format_ratio
(
self
.
gpu_ulitization
),
"sm_efficiency"
:
format_ratio
(
self
.
sm_efficiency
/
self
.
model_perspective_items
[
'ProfileStep'
].
cpu_time
),
"achieved_occupancy"
:
format_ratio
(
self
.
occupancy
),
"tensor_core_percentage"
:
format_ratio
(
self
.
tensorcore_ratio
)
}
}
}
def
get_model_perspective
(
self
,
time_unit
):
'''
...
...
visualdl/component/profiler/profiler_reader.py
浏览文件 @
3bc3e2e6
...
...
@@ -16,11 +16,14 @@ import os
import
re
from
threading
import
Thread
import
packaging.version
from
multiprocess
import
Process
from
multiprocess
import
Queue
from
.parser.const_description
import
*
# noqa: F403
from
.parser.event_node
import
load_profiler_json
from
.parser.event_node
import
ProfilerResult
from
.parser.utils
import
RedirectStdStreams
from
.run_manager
import
RunManager
from
visualdl.io
import
bfile
...
...
@@ -175,17 +178,44 @@ class ProfilerReader(object):
if
match
:
worker_name
=
match
.
group
(
1
)
if
'.pb'
in
filename
:
try
:
from
paddle.profiler
import
load_profiler_result
except
Exception
:
print
(
'Load paddle.profiler error. Please check paddle >= 2.3.0'
)
exit
(
0
)
profile_result
=
load_profiler_result
(
os
.
path
.
join
(
run
,
filename
))
self
.
run_managers
[
run
].
add_profile_result
(
filename
,
worker_name
,
profile_result
)
def
_load_profiler_protobuf
(
run
,
filename
,
worker_name
):
devnull
=
open
(
os
.
devnull
,
'w+'
)
with
RedirectStdStreams
(
stdout
=
devnull
,
stderr
=
devnull
):
try
:
import
paddle
except
Exception
as
e
:
print
(
'Paddle is required to read protobuf file.
\
Please install [paddlepaddle](https://www.paddlepaddle.org.cn/install/quick?
\
docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html) first.'
)
raise
RuntimeError
(
str
(
e
))
if
packaging
.
version
.
parse
(
'2.4.0'
)
>
packaging
.
version
.
parse
(
paddle
.
__version__
):
raise
RuntimeError
(
"Please make sure paddlepaddle version >= 2.4.0"
)
from
paddle.profiler
import
load_profiler_result
try
:
content
=
load_profiler_result
(
os
.
path
.
join
(
run
,
filename
))
if
content
is
None
:
raise
RuntimeError
(
"Missing required fields."
)
profile_result
=
ProfilerResult
(
content
)
except
Exception
as
e
:
raise
RuntimeError
(
"An error occurred while loading the protobuf file, which may be caused
\
by the outdated version of paddle that generated the profile file.
\
Please make sure protobuf file is exported by paddlepaddle version >= 2.4.0.
\
Error message: {}"
.
format
(
e
))
self
.
profile_result_queue
.
put
(
(
run
,
filename
,
worker_name
,
profile_result
))
Process
(
target
=
_load_profiler_protobuf
,
args
=
(
run
,
filename
,
worker_name
)).
start
()
else
:
def
_load_profiler_json
(
run
,
filename
,
worker_name
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录