Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindinsight
提交
50c4c1a5
M
mindinsight
项目概览
MindSpore
/
mindinsight
通知
8
Star
4
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindinsight
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
50c4c1a5
编写于
6月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!275 profiler: record and display timeline info
Merge pull request !275 from zhangyunshu/zys_timeline_0616
上级
615c8e61
50b4b0b9
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
698 addition
and
50 deletion
+698
-50
mindinsight/backend/profiler/profile_api.py
mindinsight/backend/profiler/profile_api.py
+46
-0
mindinsight/profiler/analyser/__init__.py
mindinsight/profiler/analyser/__init__.py
+1
-1
mindinsight/profiler/analyser/timeline_analyser.py
mindinsight/profiler/analyser/timeline_analyser.py
+340
-0
mindinsight/profiler/parser/container.py
mindinsight/profiler/parser/container.py
+91
-0
mindinsight/profiler/parser/optime_parser.py
mindinsight/profiler/parser/optime_parser.py
+180
-45
mindinsight/profiler/profiling.py
mindinsight/profiler/profiling.py
+40
-3
tests/st/func/profiler/test_analyse.py
tests/st/func/profiler/test_analyse.py
+0
-1
未找到文件。
mindinsight/backend/profiler/profile_api.py
浏览文件 @
50c4c1a5
...
@@ -401,6 +401,52 @@ def get_minddata_pipeline_queue_info():
...
@@ -401,6 +401,52 @@ def get_minddata_pipeline_queue_info():
return
jsonify
(
op_queue_info
)
return
jsonify
(
op_queue_info
)
@
BLUEPRINT
.
route
(
"/profile/timeline-summary"
,
methods
=
[
"GET"
])
def
get_timeline_summary
():
"""
Get timeline summary info.
Returns:
Response, the timeline summary info.
Examples:
>>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
"""
summary_dir
=
request
.
args
.
get
(
"dir"
)
profiler_dir
=
validate_and_normalize_profiler_path
(
summary_dir
,
settings
.
SUMMARY_BASE_DIR
)
device_id
=
request
.
args
.
get
(
"device_id"
,
default
=
'0'
)
_
=
to_int
(
device_id
,
'device_id'
)
analyser
=
AnalyserFactory
.
instance
().
get_analyser
(
'timeline'
,
profiler_dir
,
device_id
)
summary
=
analyser
.
get_timeline_summary
()
return
summary
@
BLUEPRINT
.
route
(
"/profile/timeline"
,
methods
=
[
"GET"
])
def
get_timeline_detail
():
"""
Get timeline detail.
Returns:
Response, the detail information of timeline.
Examples:
>>> GET http://xxxx/v1/mindinsight/profile/timeline
"""
summary_dir
=
request
.
args
.
get
(
"dir"
)
profiler_dir
=
validate_and_normalize_profiler_path
(
summary_dir
,
settings
.
SUMMARY_BASE_DIR
)
device_id
=
request
.
args
.
get
(
"device_id"
,
default
=
'0'
)
_
=
to_int
(
device_id
,
'device_id'
)
analyser
=
AnalyserFactory
.
instance
().
get_analyser
(
'timeline'
,
profiler_dir
,
device_id
)
timeline
=
analyser
.
get_display_timeline
()
return
jsonify
(
timeline
)
def
init_module
(
app
):
def
init_module
(
app
):
"""
"""
Init module entry.
Init module entry.
...
...
mindinsight/profiler/analyser/__init__.py
浏览文件 @
50c4c1a5
...
@@ -14,4 +14,4 @@
...
@@ -14,4 +14,4 @@
# ============================================================================
# ============================================================================
"""The analyser module."""
"""The analyser module."""
from
.
import
analyser
,
minddata_pipeline_analyser
,
step_trace_analyser
,
\
from
.
import
analyser
,
minddata_pipeline_analyser
,
step_trace_analyser
,
\
minddata_analyser
minddata_analyser
,
timeline_analyser
mindinsight/profiler/analyser/timeline_analyser.py
0 → 100644
浏览文件 @
50c4c1a5
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The Timeline Analyser."""
import
json
import
os
from
mindinsight.profiler.analyser.base_analyser
import
BaseAnalyser
from
mindinsight.profiler.parser.container
import
TimelineContainer
from
mindinsight.profiler.common.exceptions.exceptions
import
ProfilerFileNotFoundException
,
\
ProfilerIOException
from
mindinsight.profiler.common.log
import
logger
from
mindinsight.profiler.common.validator.validate_path
import
validate_and_normalize_path
SIZE_LIMIT
=
20
*
1024
*
1024
# 20MB
class
TimelineAnalyser
(
BaseAnalyser
):
"""
Analyse timeline data from file.
"""
__col_names__
=
[
'op_name'
,
'stream_id'
,
'start_time'
,
'duration'
]
_output_timeline_data_file_path
=
'output_timeline_data_{}.txt'
_min_cycle_counter_file_path
=
'min_cycle_counter_{}.txt'
_timeline_filename
=
'timeline_detail_{}.json'
_display_filename
=
'timeline_display_{}.json'
_timeline_summary_filename
=
'timeline_summary_{}.json'
_timeline_meta
=
[]
_timeline_summary
=
{
'total_time'
:
0
,
'num_of_streams'
:
0
,
'num_of_ops'
:
0
,
'op_exe_times'
:
0
}
def
_load
(
self
):
"""Load data according to the parsed profiling files."""
self
.
load_timeline_data
()
self
.
_timeline_summary
[
'op_exe_times'
]
=
len
(
self
.
_timeline_meta
)
def
_filter
(
self
,
filter_condition
):
"""
Filter the profiling data according to the filter condition.
Args:
filter_condition (dict): The filter condition.
"""
def
get_display_timeline
(
self
):
"""
Get timeline data for UI display.
Returns:
json, the content of timeline data.
"""
# Search timeline json file under profiling dir.
file_path
=
None
for
filename
in
os
.
listdir
(
self
.
_profiling_dir
):
if
filename
.
startswith
(
'timeline_display'
)
and
filename
.
endswith
(
'.json'
):
file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
filename
)
logger
.
debug
(
'Display file found.'
)
break
elif
filename
.
startswith
(
'timeline_detail'
)
and
filename
.
endswith
(
'.json'
):
file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
filename
)
logger
.
debug
(
'Original file found.'
)
break
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid timeline json path.'
)
timeline
=
[]
if
os
.
path
.
exists
(
file_path
):
try
:
with
open
(
file_path
,
'r'
)
as
f_obj
:
timeline
=
json
.
load
(
f_obj
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when read timeline display file: %s'
,
err
)
raise
ProfilerIOException
else
:
logger
.
info
(
'No timeline file. Please check the output path.'
)
return
timeline
def
get_timeline_summary
(
self
):
"""
Get timeline summary information for UI display.
Returns:
json, the content of timeline summary information.
"""
file_path
=
None
summary_file_name
=
'timeline_summary_{}.json'
.
format
(
self
.
_device_id
)
if
summary_file_name
in
os
.
listdir
(
self
.
_profiling_dir
):
file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
summary_file_name
)
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid timeline summary path.'
)
timeline_summary
=
{}
if
os
.
path
.
exists
(
file_path
):
try
:
with
open
(
file_path
,
'r'
)
as
f_obj
:
timeline_summary
=
json
.
load
(
f_obj
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when read timeline summary file: %s'
,
err
)
raise
ProfilerIOException
return
timeline_summary
def
write_timeline
(
self
):
"""Load data according to the parsed profiling files."""
# Write timeline to file.
file_size
=
self
.
write_timeline_to_json
()
# If the file size is larger than 20MB, open a new file and
# write the first 20MB content into it.
if
file_size
>
SIZE_LIMIT
:
logger
.
debug
(
'File size is larger than 20MB, will be resized...'
)
# write to json file for display
self
.
write_timeline_to_json_by_limitation
()
def
write_timeline_to_json
(
self
):
"""Write timeline to json."""
timeline_filename
=
self
.
_timeline_filename
.
format
(
self
.
_device_id
)
timeline_file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
timeline_filename
)
timeline_file_path
=
validate_and_normalize_path
(
timeline_file_path
,
raise_key
=
'Invalid timeline json path.'
)
try
:
with
open
(
timeline_file_path
,
'w'
)
as
json_file
:
json
.
dump
(
self
.
_timeline_meta
,
json_file
)
file_size
=
os
.
path
.
getsize
(
timeline_file_path
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when write timeline full details: %s'
,
err
)
raise
ProfilerIOException
return
file_size
def
write_timeline_to_json_by_limitation
(
self
):
"""Write timeline to json by limitation."""
display_filename
=
self
.
_display_filename
.
format
(
self
.
_device_id
)
display_file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
display_filename
)
display_file_path
=
validate_and_normalize_path
(
display_file_path
,
raise_key
=
'Invalid timeline display json path.'
)
try
:
with
open
(
display_file_path
,
'w'
)
as
json_file
:
json_file
.
write
(
'['
)
for
item
in
self
.
_timeline_meta
:
json
.
dump
(
item
,
json_file
)
file_size
=
os
.
path
.
getsize
(
display_file_path
)
if
file_size
>
SIZE_LIMIT
:
break
json_file
.
write
(
','
)
json_file
.
write
(
']'
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when write timeline display file: %s'
,
err
)
raise
ProfilerIOException
def
write_timeline_summary
(
self
):
"""Write timeline summary to json."""
timeline_summary_file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
self
.
_timeline_summary_filename
.
format
(
self
.
_device_id
)
)
timeline_summary_file_path
=
validate_and_normalize_path
(
timeline_summary_file_path
,
raise_key
=
'Invalid timeline summary path.'
)
try
:
with
open
(
timeline_summary_file_path
,
'w'
)
as
json_file
:
json
.
dump
(
self
.
_timeline_summary
,
json_file
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when write timeline summary file: %s'
,
err
)
raise
ProfilerIOException
def
load_timeline_data
(
self
):
"""Load timeline data from file."""
file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
self
.
_output_timeline_data_file_path
.
format
(
self
.
_device_id
)
)
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid timeline txt file path.'
)
if
not
os
.
path
.
exists
(
file_path
):
logger
.
error
(
"Failed to find parsed timeline file."
)
raise
ProfilerFileNotFoundException
(
'parsed timeline file'
)
stream_count_dict
=
{}
try
:
with
open
(
file_path
,
'r'
)
as
f_obj
:
for
line
in
f_obj
:
if
not
line
.
startswith
(
'='
)
and
not
line
.
startswith
(
'op_name'
)
and
\
not
line
.
startswith
(
'-'
):
line_list
=
line
.
split
()
self
.
_parse_timeline_data
(
line_list
)
self
.
_update_num_of_streams
(
line_list
,
stream_count_dict
)
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when read timeline intermediate file: %s'
,
err
)
raise
ProfilerIOException
# Update timeline summary info
self
.
_timeline_summary
[
'num_of_streams'
]
=
len
(
stream_count_dict
.
keys
())
def
_parse_timeline_data
(
self
,
line_list
):
"""Parse timeline data."""
factor
=
1000
op_meta
=
TimelineContainer
(
line_list
)
timeline_dict
=
{}
timeline_dict
[
'name'
]
=
op_meta
.
op_name
timeline_dict
[
'ph'
]
=
'X'
timeline_dict
[
'pid'
]
=
int
(
self
.
_device_id
)
timeline_dict
[
'tid'
]
=
op_meta
.
stream_id
timeline_dict
[
'ts'
]
=
op_meta
.
start_time
*
factor
dur
=
op_meta
.
duration
*
factor
timeline_dict
[
'dur'
]
=
dur
self
.
_timeline_summary
[
'total_time'
]
+=
dur
self
.
_timeline_meta
.
append
(
timeline_dict
)
@
staticmethod
def
_update_num_of_streams
(
line_list
,
stream_count_dict
):
"""Update number of streams."""
stream_id
=
line_list
[
1
]
if
stream_id
not
in
stream_count_dict
.
keys
():
stream_count_dict
[
stream_id
]
=
1
else
:
stream_count_dict
[
stream_id
]
+=
1
def
get_min_cycle_counter_from_file
(
self
):
"""
Get minimum cycle counter.
Returns:
float, the minimum value of the cycle counter.
"""
file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
self
.
_min_cycle_counter_file_path
.
format
(
self
.
_device_id
)
)
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid min cycle counter file path.'
)
if
os
.
path
.
exists
(
file_path
):
try
:
with
open
(
file_path
,
'r'
)
as
f_obj
:
min_cycle_counter
=
f_obj
.
read
()
min_cycle_counter
=
float
(
min_cycle_counter
)
\
if
not
min_cycle_counter
==
'inf'
else
0
except
(
IOError
,
OSError
)
as
err
:
logger
.
error
(
'Error occurred when read minimum cycle counter: %s'
,
err
)
raise
ProfilerIOException
else
:
min_cycle_counter
=
0
logger
.
info
(
"No min cycle counter recorded."
)
return
min_cycle_counter
def
add_all_reduce_info
(
self
,
all_reduce_info
):
"""
Add all reduce info into timeline metadata.
Args:
all_reduce_info (list<dict>): The metadata of AllReduce operator.
[
{
'stream_id_1': [(start_time, end_time, duration, field_name)],
...
},
{...}
]
"""
logger
.
info
(
'Adding AllReduce info...'
)
factor
=
100
min_cycle_counter
=
self
.
get_min_cycle_counter_from_file
()
for
step_meta
in
all_reduce_info
:
for
stream_id
,
time_info_list
in
step_meta
.
items
():
for
time_info
in
time_info_list
:
start
,
_
,
dur
,
name
=
time_info
all_reduce_dict
=
{}
all_reduce_dict
[
'name'
]
=
name
all_reduce_dict
[
'ph'
]
=
'X'
# Using 10000 to represent AllReduce
all_reduce_dict
[
'pid'
]
=
10000
all_reduce_dict
[
'tid'
]
=
int
(
stream_id
)
all_reduce_dict
[
'ts'
]
=
(
start
-
min_cycle_counter
)
/
factor
all_reduce_dict
[
'dur'
]
=
dur
/
factor
self
.
_timeline_meta
.
append
(
all_reduce_dict
)
self
.
_timeline_summary
[
'total_time'
]
+=
all_reduce_dict
[
'dur'
]
def
add_framework_info
(
self
,
framework_info
):
"""
Add framework info into timeline metadata.
Args:
framework_info (dict): The framework metadata.
"""
logger
.
info
(
'Adding framework info...'
)
framework_obj_list
=
framework_info
.
get
(
'object'
)
self
.
_timeline_summary
[
'num_of_ops'
]
=
len
(
framework_obj_list
)
for
framework_obj
in
framework_obj_list
:
op_name
=
framework_obj
[
0
]
op_type
=
framework_obj
[
1
]
op_full_name
=
framework_obj
[
4
]
op_info
=
framework_obj
[
5
]
for
timeline_obj
in
self
.
_timeline_meta
:
if
op_full_name
==
timeline_obj
.
get
(
'name'
):
timeline_obj
[
'name'
]
=
op_name
timeline_obj
[
'args'
]
=
{
'type'
:
op_type
,
'fullname'
:
op_full_name
}
timeline_obj
[
'args'
].
update
(
op_info
)
mindinsight/profiler/parser/container.py
0 → 100644
浏览文件 @
50c4c1a5
"""The container of metadata used in profiler parser."""
class
HWTSContainer
:
"""
HWTS output container.
Args:
split_list (list): The split list of metadata in HWTS output file.
"""
def
__init__
(
self
,
split_list
):
self
.
_op_name
=
''
self
.
_duration
=
None
self
.
_status
=
split_list
[
0
]
self
.
_task_id
=
split_list
[
6
]
self
.
_cycle_counter
=
float
(
split_list
[
7
])
self
.
_stream_id
=
split_list
[
8
]
@
property
def
status
(
self
):
"""Get the status of the operator, i.e. Start or End."""
return
self
.
_status
@
property
def
task_id
(
self
):
"""Get the task id of the operator."""
return
self
.
_task_id
@
property
def
cycle_counter
(
self
):
"""Get the cycle counter."""
return
self
.
_cycle_counter
@
property
def
stream_id
(
self
):
"""Get the stream id of the operator."""
return
self
.
_stream_id
@
property
def
op_name
(
self
):
"""Get the name of the operator."""
return
self
.
_op_name
@
op_name
.
setter
def
op_name
(
self
,
name
):
"""Set the name of the operator."""
self
.
_op_name
=
name
@
property
def
duration
(
self
):
"""Get the duration of the operator execution."""
return
self
.
_duration
@
duration
.
setter
def
duration
(
self
,
value
):
"""Set the duration of the operator execution."""
self
.
_duration
=
value
class
TimelineContainer
:
"""
A container of operator computation metadata.
Args:
split_list (list): The split list of metadata in op_compute output file.
"""
def
__init__
(
self
,
split_list
):
self
.
_op_name
=
split_list
[
0
]
self
.
_stream_id
=
int
(
split_list
[
1
])
self
.
_start_time
=
float
(
split_list
[
2
])
self
.
_duration
=
float
(
split_list
[
3
])
@
property
def
op_name
(
self
):
"""Get the name of the operator."""
return
self
.
_op_name
@
property
def
stream_id
(
self
):
"""Get the stream id of the operator."""
return
self
.
_stream_id
@
property
def
start_time
(
self
):
"""Get the execution start time of the operator."""
return
self
.
_start_time
@
property
def
duration
(
self
):
"""Get the duration of the operator execution."""
return
self
.
_duration
mindinsight/profiler/parser/optime_parser.py
浏览文件 @
50c4c1a5
...
@@ -13,8 +13,15 @@
...
@@ -13,8 +13,15 @@
# limitations under the License.
# limitations under the License.
# ============================================================================
# ============================================================================
"""Op compute time files parser."""
"""Op compute time files parser."""
import
os
from
tabulate
import
tabulate
from
tabulate
import
tabulate
from
mindinsight.profiler.common._utils
import
fwrite_format
from
mindinsight.profiler.common._utils
import
fwrite_format
from
mindinsight.profiler.common.exceptions.exceptions
import
ProfilerFileNotFoundException
from
mindinsight.profiler.common.log
import
logger
from
mindinsight.profiler.common.validator.validate_path
import
validate_and_normalize_path
from
mindinsight.profiler.parser.container
import
HWTSContainer
class
OPComputeTimeParser
:
class
OPComputeTimeParser
:
"""
"""
...
@@ -28,11 +35,20 @@ class OPComputeTimeParser:
...
@@ -28,11 +35,20 @@ class OPComputeTimeParser:
_dst_file_title
=
'title:op compute time'
_dst_file_title
=
'title:op compute time'
_dst_file_column_title
=
[
'op_name'
,
'compute_time(ms)'
,
'stream_id'
]
_dst_file_column_title
=
[
'op_name'
,
'compute_time(ms)'
,
'stream_id'
]
_timeline_file_title
=
'title:timeline info'
_timeline_file_column_title
=
[
'op_name'
,
'stream_id'
,
'start_time'
,
'duration'
]
def
__init__
(
self
,
hwts_output_file
,
output_filename
,
op_task_info
):
def
__init__
(
self
,
hwts_output_file
,
output_filename
,
op_task_info
,
output_path
,
device_id
):
hwts_output_file
=
validate_and_normalize_path
(
hwts_output_file
,
raise_key
=
'Invalid hwts output file path.'
)
self
.
_hwts_output_file
=
hwts_output_file
self
.
_hwts_output_file
=
hwts_output_file
self
.
_output_filename
=
output_filename
self
.
_output_filename
=
output_filename
self
.
_op_task_info
=
op_task_info
self
.
_op_task_info
=
op_task_info
self
.
_output_path
=
output_path
self
.
_device_id
=
device_id
self
.
_min_cycle_counter
=
float
(
"inf"
)
def
_get_op_task_id_map
(
self
):
def
_get_op_task_id_map
(
self
):
"""
"""
...
@@ -44,78 +60,197 @@ class OPComputeTimeParser:
...
@@ -44,78 +60,197 @@ class OPComputeTimeParser:
op_map_result
=
[]
op_map_result
=
[]
hwts_list
=
[]
hwts_list
=
[]
with
(
open
(
self
.
_hwts_output_file
,
'r'
))
as
data_file
:
if
not
os
.
path
.
exists
(
self
.
_hwts_output_file
):
logger
.
error
(
'The hwts output file does not exist.'
)
raise
ProfilerFileNotFoundException
(
'hwts output file'
)
with
open
(
self
.
_hwts_output_file
,
'r'
)
as
data_file
:
lines
=
data_file
.
readlines
()
lines
=
data_file
.
readlines
()
for
line
in
lines
:
for
line
in
lines
:
if
line
.
startswith
(
"Start of task"
):
if
line
.
startswith
(
"Start of task"
)
or
line
.
startswith
(
"End of task"
)
:
line_split
=
line
.
split
()
line_split
=
line
.
split
()
hwts_list
.
append
([
line_split
[
0
],
line_split
[
6
],
line_split
[
7
],
line_split
[
8
]])
container
=
HWTSContainer
(
line_split
)
if
line
.
startswith
(
'End of task'
):
hwts_list
.
append
(
container
)
line_split
=
line
.
split
()
hwts_list
.
append
([
line_split
[
0
],
line_split
[
6
],
line_split
[
7
],
line_split
[
8
]])
# hwts op map by taskId
# hwts op map by taskId
for
hwts
in
hwts_list
:
for
hwts
in
hwts_list
:
if
hwts
[
1
]
in
self
.
_op_task_info
.
keys
():
if
hwts
.
task_id
in
self
.
_op_task_info
.
keys
():
op_map_result
.
append
([
self
.
_op_task_info
[
hwts
[
1
]],
hwts
[
0
],
hwts
[
1
],
hwts
[
2
],
hwts
[
3
]])
hwts
.
op_name
=
self
.
_op_task_info
[
hwts
.
task_id
]
op_map_result
.
append
(
hwts
)
return
op_map_result
return
op_map_result
def
execute
(
self
):
def
execute
(
self
):
"""Execute the parser, compute all op, get op time, and write it to the output file."""
"""Execute the parser, compute all op, get op time, and write it to the output file."""
# Calculate the execution time of operators,
# and update the minimum cycle counter.
tmp_result_data
=
self
.
_calculate_op_execution_time
()
# Convert time units from nanoseconds to milliseconds.
# The unit of the cycle counter is 10 nanoseconds.
op_name_time_dict
=
{}
op_name_stream_dict
=
{}
op_name_count_dict
=
{}
op_name_task_dict
=
{}
op_name_start_time
=
{}
self
.
_convert_op_time_unit
(
tmp_result_data
,
op_name_time_dict
,
op_name_stream_dict
,
op_name_count_dict
,
op_name_task_dict
,
op_name_start_time
)
result_data
=
[]
result_data
=
[]
for
op_name
,
time
in
op_name_time_dict
.
items
():
if
op_name
in
op_name_stream_dict
.
keys
():
stream_id
=
op_name_stream_dict
[
op_name
]
avg_time
=
time
/
op_name_count_dict
[
op_name
]
result_data
.
append
([
op_name
,
avg_time
,
stream_id
])
timeline_data
=
[]
for
op_name
,
time
in
op_name_time_dict
.
items
():
if
op_name
in
op_name_stream_dict
.
keys
():
stream_id
=
op_name_stream_dict
[
op_name
]
start_time_list
=
op_name_start_time
.
get
(
op_name
)
for
(
start_time
,
duration
)
in
start_time_list
:
timeline_data
.
append
([
op_name
,
stream_id
,
start_time
,
duration
])
# Write the metadata of operators into the file,
# including operator name, average time, and stream id.
self
.
_write_op_time_into_file
(
result_data
)
# Write the timeline data into file,
# including operator name, stream id, start time, and duration.
self
.
_write_timeline_data_into_file
(
timeline_data
)
# Write the minimum cycle counter into the file.
self
.
write_min_cycle_counter_to_file
()
def
_write_op_time_into_file
(
self
,
result_data
):
"""
Write the metadata of operators into the file, including
op name, average time, and stream id.
Args:
result_data (list): The metadata to be written into the file.
[
['op_name_1', 'avg_time_1', 'stream_id_1'],
['op_name_2', 'avg_time_2', 'stream_id_2'],
[...]
]
"""
result_data
.
sort
(
key
=
lambda
x
:
x
[
0
])
total_time
=
0
for
item
in
result_data
:
total_time
+=
item
[
1
]
result_data
.
append
([
"total op"
,
total_time
,
0
])
fwrite_format
(
self
.
_output_filename
,
data_source
=
self
.
_dst_file_title
,
is_start
=
True
)
fwrite_format
(
self
.
_output_filename
,
data_source
=
tabulate
(
result_data
,
self
.
_dst_file_column_title
,
tablefmt
=
'simple'
))
def
_write_timeline_data_into_file
(
self
,
timeline_data
):
"""
Write the timeline information into the file, including
operator name, stream id, start time and duration.
Args:
timeline_data (list): The metadata to be written into the file.
[
['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
[...]
]
"""
# sorted by start times
timeline_data
.
sort
(
key
=
lambda
x
:
float
(
x
[
2
]))
filename
=
'output_timeline_data_{}.txt'
.
format
(
self
.
_device_id
)
file_path
=
os
.
path
.
join
(
self
.
_output_path
,
filename
)
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid file path of timeline data.'
)
# write to file
fwrite_format
(
file_path
,
data_source
=
self
.
_timeline_file_title
,
is_start
=
True
)
fwrite_format
(
file_path
,
data_source
=
tabulate
(
timeline_data
,
self
.
_timeline_file_column_title
,
tablefmt
=
'simple'
))
def
_calculate_op_execution_time
(
self
):
"""
Calculate the execution time of each operator.
Returns:
list, including the intermediate data of op execution time.
"""
tmp_result_data
=
[]
tmp_result_data
=
[]
op_map_list
=
self
.
_get_op_task_id_map
()
op_map_list
=
self
.
_get_op_task_id_map
()
cur_index
=
0
cur_index
=
0
length
=
len
(
op_map_list
)
length
=
len
(
op_map_list
)
min_cycle_counter
=
float
(
"inf"
)
while
cur_index
<
length
:
while
cur_index
<
length
:
if
cur_index
+
1
==
length
:
if
cur_index
+
1
==
length
:
break
break
op_start
=
op_map_list
[
cur_index
]
op_end
=
op_map_list
[
cur_index
+
1
]
if
op_start
[
1
]
==
"Start"
and
op_end
[
1
]
==
"End"
\
op_start
=
op_map_list
[
cur_index
]
and
op_start
[
0
]
==
op_end
[
0
]:
op_end
=
op_map_list
[
cur_index
+
1
]
# op_name, task_id, cycle counter, stream_id
if
op_start
.
status
==
"Start"
and
op_end
.
status
==
"End"
\
tmp_result_data
.
append
([
op_start
[
0
],
op_start
[
2
],
int
(
op_end
[
3
])
-
int
(
op_start
[
3
]),
op_start
[
4
]])
and
op_start
.
op_name
==
op_end
.
op_name
:
op_start
.
duration
=
op_end
.
cycle_counter
-
op_start
.
cycle_counter
tmp_result_data
.
append
(
op_start
)
cur_index
+=
2
cur_index
+=
2
if
not
op_start
.
op_name
.
startswith
(
"assign"
):
min_cycle_counter
=
min
(
min_cycle_counter
,
op_start
.
cycle_counter
)
else
:
else
:
cur_index
+=
1
cur_index
+=
1
op_name_time_dict
=
{}
# Update the value of minimum cycle counter.
op_name_steamid_dict
=
{}
self
.
_min_cycle_counter
=
min_cycle_counter
op_name_count_dict
=
{}
op_name_task_dict
=
{}
# compute all op
return
tmp_result_data
for
item
in
tmp_result_data
:
if
item
[
0
]
in
op_name_time_dict
.
keys
():
op_name_time_dict
[
item
[
0
]]
+=
float
(
item
[
2
])
/
1e5
# cycle counter/1*10^5 ms
if
item
[
1
]
==
op_name_task_dict
[
item
[
0
]]:
op_name_count_dict
[
item
[
0
]]
+=
1
else
:
def
_convert_op_time_unit
(
self
,
op_data_list
,
op_name_time_dict
,
op_name_stream_dict
,
op_name_time_dict
[
item
[
0
]]
=
float
(
item
[
2
])
/
1e5
op_name_count_dict
,
op_name_task_dict
,
op_name_start_time
):
op_name_steamid_dict
[
item
[
0
]]
=
item
[
-
1
]
"""
op_name_task_dict
[
item
[
0
]]
=
item
[
1
]
Calculate the execution time of operator and convert it into millisecond.
op_name_count_dict
[
item
[
0
]]
=
1
for
op_name
,
time
in
op_name_time_dict
.
items
():
Args:
if
op_name
in
op_name_steamid_dict
.
keys
():
op_data_list (list): The list of operator metadata.
stream_id
=
op_name_steamid_dict
[
op_name
]
op_name_time_dict (dict): The mapping relation of operator name and its execution time.
avg_time
=
time
/
op_name_count_dict
[
op_name
]
op_name_stream_dict (dict): The mapping relation of operator name and its stream id.
result_data
.
append
([
op_name
,
avg_time
,
stream_id
])
op_name_count_dict (dict): The mapping relation of operator name and its count.
op_name_task_dict (dict): The mapping relation of operator name and its task id.
op_name_start_time (dict): The mapping relation of operator name and its start time.
"""
factor
=
1e5
for
item
in
op_data_list
:
op_name
=
item
.
op_name
# Unit conversion: converting the cycle counter into ms.
op_start_time_str
=
str
((
item
.
cycle_counter
-
self
.
_min_cycle_counter
)
/
factor
)
op_duration
=
item
.
duration
/
factor
op_duration_str
=
str
(
item
.
duration
/
factor
)
if
op_name
in
op_name_time_dict
.
keys
():
op_name_time_dict
[
op_name
]
+=
op_duration
if
item
.
task_id
==
op_name_task_dict
[
op_name
]:
op_name_count_dict
[
op_name
]
+=
1
op_name_start_time
[
op_name
].
append
(
(
op_start_time_str
,
op_duration_str
)
)
result_data
.
sort
(
key
=
lambda
x
:
x
[
0
])
else
:
total_time
=
0
op_name_time_dict
[
op_name
]
=
op_duration
for
item
in
result_data
:
op_name_stream_dict
[
op_name
]
=
item
.
stream_id
total_time
+=
item
[
1
]
op_name_task_dict
[
op_name
]
=
item
.
task_id
result_data
.
append
([
"total op"
,
total_time
,
0
])
op_name_count_dict
[
op_name
]
=
1
op_name_start_time
[
op_name
]
=
[]
op_name_start_time
[
op_name
].
append
(
(
op_start_time_str
,
op_duration_str
)
)
fwrite_format
(
self
.
_output_filename
,
data_source
=
self
.
_dst_file_title
,
is_start
=
True
)
def
write_min_cycle_counter_to_file
(
self
):
fwrite_format
(
self
.
_output_filename
,
data_source
=
tabulate
(
result_data
,
"""Write minimum cycle counter into a txt file."""
self
.
_dst_file_column_title
,
min_cycle_counter
=
self
.
_min_cycle_counter
tablefmt
=
'simple'
))
file_name
=
'min_cycle_counter_'
+
self
.
_device_id
+
'.txt'
file_path
=
os
.
path
.
join
(
self
.
_output_path
,
file_name
)
file_path
=
validate_and_normalize_path
(
file_path
,
raise_key
=
'Invalid min cycle counter file path.'
)
with
open
(
file_path
,
'w'
)
as
file
:
file
.
write
(
str
(
min_cycle_counter
))
mindinsight/profiler/profiling.py
浏览文件 @
50c4c1a5
...
@@ -16,11 +16,14 @@
...
@@ -16,11 +16,14 @@
import
os
import
os
import
time
import
time
from
marshmallow
import
ValidationError
from
tabulate
import
tabulate
from
tabulate
import
tabulate
from
mindinsight.profiler.analyser.analyser_factory
import
AnalyserFactory
from
mindinsight.profiler.analyser.analyser_factory
import
AnalyserFactory
from
mindinsight.profiler.analyser.integrator
import
Integrator
from
mindinsight.profiler.analyser.integrator
import
Integrator
from
mindinsight.profiler.common._utils
import
get_file_names
,
fwrite_format
from
mindinsight.profiler.common._utils
import
get_file_names
,
fwrite_format
from
mindinsight.profiler.common.exceptions.exceptions
import
ProfilerFileNotFoundException
,
\
ProfilerIOException
from
mindinsight.profiler.common.log
import
logger
from
mindinsight.profiler.common.log
import
logger
from
mindinsight.profiler.common.validator.checkparam
import
\
from
mindinsight.profiler.common.validator.checkparam
import
\
check_bool
,
check_subgraph
check_bool
,
check_subgraph
...
@@ -76,6 +79,7 @@ class Profiler:
...
@@ -76,6 +79,7 @@ class Profiler:
optypes_to_deal
=
''
,
optypes_not_deal
=
'Variable'
,
job_id
=
""
):
optypes_to_deal
=
''
,
optypes_not_deal
=
'Variable'
,
job_id
=
""
):
# get device_id and device_target
# get device_id and device_target
device_target
=
""
device_target
=
""
dev_id
=
""
try
:
try
:
import
mindspore.context
as
context
import
mindspore.context
as
context
dev_id
=
str
(
context
.
get_context
(
"device_id"
))
dev_id
=
str
(
context
.
get_context
(
"device_id"
))
...
@@ -83,7 +87,7 @@ class Profiler:
...
@@ -83,7 +87,7 @@ class Profiler:
except
ImportError
:
except
ImportError
:
logger
.
error
(
"Profiling: fail to import context from mindspore."
)
logger
.
error
(
"Profiling: fail to import context from mindspore."
)
except
ValueError
as
err
:
except
ValueError
as
err
:
logger
.
error
(
"Profiling: fail to get context, %s"
,
err
.
message
)
logger
.
error
(
"Profiling: fail to get context, %s"
,
err
)
if
not
dev_id
:
if
not
dev_id
:
dev_id
=
os
.
getenv
(
'DEVICE_ID'
)
dev_id
=
os
.
getenv
(
'DEVICE_ID'
)
...
@@ -185,7 +189,10 @@ class Profiler:
...
@@ -185,7 +189,10 @@ class Profiler:
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
opcompute_output_filename
=
self
.
_opcompute_output_filename_target
+
self
.
_dev_id
+
".txt"
opcompute_output_filename
=
self
.
_opcompute_output_filename_target
+
self
.
_dev_id
+
".txt"
opcompute_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
opcompute_output_filename
)
opcompute_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
opcompute_output_filename
)
optime_parser
=
OPComputeTimeParser
(
hwts_output_filename
,
opcompute_output_filename
,
op_task_dict
)
optime_parser
=
OPComputeTimeParser
(
hwts_output_filename
,
opcompute_output_filename
,
op_task_dict
,
self
.
_output_path
,
self
.
_dev_id
)
optime_parser
.
execute
()
optime_parser
.
execute
()
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
...
@@ -216,6 +223,9 @@ class Profiler:
...
@@ -216,6 +223,9 @@ class Profiler:
# analyse step trace info
# analyse step trace info
self
.
_analyse_step_trace
(
source_path
,
framework_parser
)
self
.
_analyse_step_trace
(
source_path
,
framework_parser
)
# analyse timeline info
self
.
_analyse_timeline
()
def
_analyse_step_trace
(
self
,
source_path
,
framework_parser
):
def
_analyse_step_trace
(
self
,
source_path
,
framework_parser
):
"""
"""
Analyse step trace data and save the result.
Analyse step trace data and save the result.
...
@@ -240,7 +250,34 @@ class Profiler:
...
@@ -240,7 +250,34 @@ class Profiler:
parser
.
parse_and_save
()
parser
.
parse_and_save
()
# print parser result
# print parser result
parser
.
show
()
parser
.
show
()
logger
.
info
(
"Finish save the intermediate result %s"
,
step_trace_intermediate_file_path
)
def
_analyse_timeline
(
self
):
"""
Analyse and parse timeline info.
"""
# Get framework info
aicoredetail_analyser
=
AnalyserFactory
.
instance
().
get_analyser
(
'aicore_detail'
,
self
.
_output_path
,
self
.
_dev_id
)
framework_info
=
aicoredetail_analyser
.
query
()
# Get all reduce info
step_trace_analyser
=
AnalyserFactory
.
instance
().
get_analyser
(
'step_trace'
,
self
.
_output_path
,
self
.
_dev_id
)
all_reduce_info
=
step_trace_analyser
.
query_for_all_reduce
()
# Get timeline info
timeline_analyser
=
AnalyserFactory
.
instance
().
get_analyser
(
'timeline'
,
self
.
_output_path
,
self
.
_dev_id
)
timeline_analyser
.
add_framework_info
(
framework_info
)
timeline_analyser
.
add_all_reduce_info
(
all_reduce_info
)
try
:
timeline_analyser
.
write_timeline
()
timeline_analyser
.
write_timeline_summary
()
except
(
ProfilerIOException
,
ProfilerFileNotFoundException
,
ValidationError
)
as
err
:
logger
.
warning
(
'Fail to write timeline data: %s'
,
err
)
def
__del__
(
self
):
def
__del__
(
self
):
"""Disable the profiling collection service, called after training."""
"""Disable the profiling collection service, called after training."""
...
...
tests/st/func/profiler/test_analyse.py
浏览文件 @
50c4c1a5
...
@@ -72,7 +72,6 @@ class TestProfilerAnalyse(TestCase):
...
@@ -72,7 +72,6 @@ class TestProfilerAnalyse(TestCase):
def
test_step_trace_file_exist
(
self
):
def
test_step_trace_file_exist
(
self
):
"""Test the step trace file has been generated"""
"""Test the step trace file has been generated"""
output_files
=
os
.
listdir
(
self
.
profiler
)
output_files
=
os
.
listdir
(
self
.
profiler
)
assert
len
(
output_files
)
==
9
assert
self
.
step_trace_file
in
output_files
assert
self
.
step_trace_file
in
output_files
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
level0
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录