Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindinsight
提交
1460ab4a
M
mindinsight
项目概览
MindSpore
/
mindinsight
通知
7
Star
3
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindinsight
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1460ab4a
编写于
8月 13, 2020
作者:
Y
yuximiao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu profiler
上级
60aa5e29
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
184 addition
and
13 deletion
+184
-13
mindinsight/backend/profiler/profile_api.py
mindinsight/backend/profiler/profile_api.py
+1
-1
mindinsight/datavisual/data_transform/data_manager.py
mindinsight/datavisual/data_transform/data_manager.py
+10
-1
mindinsight/datavisual/data_transform/summary_watcher.py
mindinsight/datavisual/data_transform/summary_watcher.py
+8
-4
mindinsight/datavisual/processors/train_task_manager.py
mindinsight/datavisual/processors/train_task_manager.py
+1
-0
mindinsight/profiler/analyser/__init__.py
mindinsight/profiler/analyser/__init__.py
+1
-1
mindinsight/profiler/analyser/gpu_analyser.py
mindinsight/profiler/analyser/gpu_analyser.py
+129
-0
mindinsight/profiler/common/util.py
mindinsight/profiler/common/util.py
+15
-2
mindinsight/profiler/common/validator/validate.py
mindinsight/profiler/common/validator/validate.py
+19
-4
未找到文件。
mindinsight/backend/profiler/profile_api.py
浏览文件 @
1460ab4a
...
...
@@ -114,7 +114,7 @@ def get_profile_device_list():
except
ValidationError
:
raise
ParamValueError
(
"Invalid profiler dir"
)
device_list
=
analyse_device_list_from_profiler_dir
(
profiler_dir_abs
)
device_list
,
_
=
analyse_device_list_from_profiler_dir
(
profiler_dir_abs
)
return
jsonify
(
device_list
)
...
...
mindinsight/datavisual/data_transform/data_manager.py
浏览文件 @
1460ab4a
...
...
@@ -59,14 +59,17 @@ class _BasicTrainJob:
create_time (DateTime): The create time of summary directory.
update_time (DateTime): The latest modify time of summary files directly in the summary directory.
profiler_dir (str): The relative path of profiler directory.
profiler_type (str): The profiler device type.
"""
def
__init__
(
self
,
train_id
,
abs_summary_base_dir
,
abs_summary_dir
,
create_time
,
update_time
,
profiler_dir
):
def
__init__
(
self
,
train_id
,
abs_summary_base_dir
,
abs_summary_dir
,
create_time
,
update_time
,
profiler_dir
,
profiler_type
=
""
):
self
.
_train_id
=
train_id
self
.
_abs_summary_base_dir
=
abs_summary_base_dir
self
.
_abs_summary_dir
=
abs_summary_dir
self
.
_create_time
=
create_time
self
.
_update_time
=
update_time
self
.
_profiler_dir
=
profiler_dir
self
.
_profiler_type
=
profiler_type
@
property
def
abs_summary_dir
(
self
):
...
...
@@ -98,6 +101,11 @@ class _BasicTrainJob:
"""Get update time."""
return
self
.
_update_time
@
property
def
profiler_type
(
self
):
"""Get profiler type"""
return
self
.
_profiler_type
class
CachedTrainJob
:
"""
...
...
@@ -952,6 +960,7 @@ class DataManager:
create_time
=
info
[
'create_time'
],
update_time
=
info
[
'update_time'
],
profiler_dir
=
None
if
profiler
is
None
else
profiler
[
'directory'
],
profiler_type
=
""
if
profiler
is
None
else
profiler
[
'profiler_type'
],
))
self
.
_brief_cache
.
update_cache
(
basic_train_jobs
)
...
...
mindinsight/datavisual/data_transform/summary_watcher.py
浏览文件 @
1460ab4a
...
...
@@ -109,6 +109,7 @@ class SummaryWatcher:
'directory'
:
profiler
[
'directory'
],
'create_time'
:
profiler
[
'ctime'
],
'update_time'
:
profiler
[
'mtime'
],
'profiler_type'
:
profiler
[
'profiler_type'
]
}
directories
.
append
(
directory
)
...
...
@@ -226,13 +227,15 @@ class SummaryWatcher:
elif
entry
.
is_dir
():
profiler_pattern
=
re
.
search
(
self
.
PROFILER_DIRECTORY_REGEX
,
entry
.
name
)
full_dir_path
=
os
.
path
.
join
(
summary_base_dir
,
relative_path
,
entry
.
name
)
if
profiler_pattern
is
None
or
not
self
.
_is_valid_profiler_directory
(
full_dir_path
):
is_valid_profiler_dir
,
profiler_type
=
self
.
_is_valid_profiler_directory
(
full_dir_path
)
if
profiler_pattern
is
None
or
not
is_valid_profiler_dir
:
return
profiler
=
{
'directory'
:
os
.
path
.
join
(
'.'
,
entry
.
name
),
'ctime'
:
ctime
,
'mtime'
:
mtime
,
"profiler_type"
:
profiler_type
}
summary_dict
[
relative_path
]
=
{
...
...
@@ -286,19 +289,20 @@ class SummaryWatcher:
profiler_pattern
=
re
.
search
(
self
.
PROFILER_DIRECTORY_REGEX
,
entry
.
name
)
if
profiler_pattern
is
not
None
and
entry
.
is_dir
():
full_path
=
os
.
path
.
realpath
(
os
.
path
.
join
(
summary_directory
,
entry
.
name
))
if
self
.
_is_valid_profiler_directory
(
full_path
):
if
self
.
_is_valid_profiler_directory
(
full_path
)
[
0
]
:
return
True
return
False
def
_is_valid_profiler_directory
(
self
,
directory
):
profiler_type
=
""
try
:
from
mindinsight.profiler.common.util
import
analyse_device_list_from_profiler_dir
device_list
=
analyse_device_list_from_profiler_dir
(
directory
)
device_list
,
profiler_type
=
analyse_device_list_from_profiler_dir
(
directory
)
except
ImportError
:
device_list
=
[]
return
bool
(
device_list
)
return
bool
(
device_list
)
,
profiler_type
def
list_summary_directories_by_pagination
(
self
,
summary_base_dir
,
offset
=
0
,
limit
=
10
):
"""
...
...
mindinsight/datavisual/processors/train_task_manager.py
浏览文件 @
1460ab4a
...
...
@@ -144,6 +144,7 @@ class TrainTaskManager(BaseProcessor):
update_time
=
basic_info
.
update_time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
),
profiler_dir
=
basic_info
.
profiler_dir
,
cache_status
=
train_job
.
cache_status
.
value
,
profiler_type
=
basic_info
.
profiler_type
,
)
if
train_job
.
cache_status
==
CacheStatus
.
CACHED
:
...
...
mindinsight/profiler/analyser/__init__.py
浏览文件 @
1460ab4a
...
...
@@ -14,4 +14,4 @@
# ============================================================================
"""The analyser module."""
from
.
import
analyser
,
minddata_pipeline_analyser
,
step_trace_analyser
,
\
minddata_analyser
,
timeline_analyser
minddata_analyser
,
timeline_analyser
,
gpu_analyser
mindinsight/profiler/analyser/gpu_analyser.py
0 → 100644
浏览文件 @
1460ab4a
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The gpu base analyser."""
import
csv
import
os
from
mindinsight.profiler.analyser.base_analyser
import
BaseAnalyser
from
mindinsight.profiler.common.log
import
logger
class
GpuAnalyser
(
BaseAnalyser
):
"""Gpu base analyser."""
_csv_file_to_analyse
=
""
def
_load
(
self
):
"""Load data according to the parsed AICORE operator types file."""
op_type_file_path
=
os
.
path
.
join
(
self
.
_profiling_dir
,
self
.
_csv_file_to_analyse
.
format
(
self
.
_device_id
)
)
if
not
os
.
path
.
isfile
(
op_type_file_path
):
logger
.
warning
(
'The file <%s> does not exist.'
,
op_type_file_path
)
return
with
open
(
op_type_file_path
,
'r'
)
as
file
:
csv_reader
=
csv
.
reader
(
file
)
_
=
next
(
csv_reader
)
for
info
in
csv_reader
:
self
.
_data
.
append
(
self
.
_convert_field_type
(
info
))
@
staticmethod
def
_convert_field_type
(
row
):
"""
Convert the field type to the specific type.
Args:
row (list): One row data from parsed data.
Returns:
list, the converted data.
"""
return
row
def
_filter
(
self
,
filter_condition
):
"""
Filter the profiling data according to the filter condition.
Args:
filter_condition (dict): The filter condition.
"""
def
_inner_filter
(
item
:
list
):
return
self
.
_default_filter
(
item
,
filter_condition
)
self
.
_result
=
list
(
filter
(
_inner_filter
,
self
.
_data
))
class
GpuOpTypeAnalyser
(
GpuAnalyser
):
"""Gpu operation type analyser."""
_col_names
=
[
"op_type"
,
"type_occurrences"
,
"total_time"
,
"proportion"
,
"avg_time"
]
_csv_file_to_analyse
=
'gpu_op_type_info_{}.csv'
@
staticmethod
def
_convert_field_type
(
row
):
"""
Convert the field type to the specific type.
Args:
row (list): One row data from parsed data.
Returns:
list, the converted data.
"""
return
[
row
[
0
],
int
(
row
[
1
]),
float
(
row
[
2
]),
float
(
row
[
3
]),
float
(
row
[
4
])]
class
GpuOpInfoAnalyser
(
GpuAnalyser
):
"""Gpu operation detail info analyser."""
_col_names
=
[
"op_side"
,
"op_type"
,
"op_name"
,
"op_full_name"
,
"op_occurrences"
,
"op_total_time"
,
"op_avg_time"
,
"proportion"
,
"cuda_activity_cost_time"
,
"cuda_activity_call_count"
]
_csv_file_to_analyse
=
'gpu_op_detail_info_{}.csv'
@
staticmethod
def
_convert_field_type
(
row
):
"""
Convert the field type to the specific type.
Args:
row (list): One row data from parsed data.
Returns:
list, the converted data.
"""
return
[
row
[
0
],
row
[
1
],
row
[
2
],
row
[
3
],
int
(
row
[
4
]),
float
(
row
[
5
]),
float
(
row
[
6
]),
float
(
row
[
7
]),
float
(
row
[
8
]),
int
(
row
[
9
])]
class
GpuCudaActivityAnalyser
(
GpuAnalyser
):
"""Gpu activity type analyser."""
_col_names
=
[
"name"
,
"type"
,
"op_full_name"
,
"stream_id"
,
"block_dim"
,
"grid_dim"
,
"occurrences"
,
"total_duration"
,
"avg_duration"
,
"max_duration"
,
"min_duration"
]
_csv_file_to_analyse
=
'gpu_activity_data_{}.csv'
@
staticmethod
def
_convert_field_type
(
row
):
"""
Convert the field type to the specific type.
Args:
row (list): One row data from parsed data.
Returns:
list, the converted data.
"""
return
[
row
[
0
],
row
[
1
],
row
[
2
],
row
[
3
],
row
[
4
],
row
[
5
],
int
(
row
[
6
]),
float
(
row
[
7
]),
float
(
row
[
8
]),
float
(
row
[
9
]),
float
(
row
[
10
])]
mindinsight/profiler/common/util.py
浏览文件 @
1460ab4a
...
...
@@ -36,8 +36,10 @@ def analyse_device_list_from_profiler_dir(profiler_dir):
list, the device_id list.
"""
profiler_file_prefix
=
[
"timeline_display"
,
"output_op_compute_time"
]
gpu_profiler_file_prefix
=
[
"gpu_op_detail_info"
,
"gpu_activity_data"
,
"gpu_op_type_info"
]
device_id_list
=
set
()
gpu_device_id_list
=
set
()
for
_
,
_
,
filenames
in
os
.
walk
(
profiler_dir
):
for
filename
in
filenames
:
if
filename
.
startswith
(
"step_trace_raw"
):
...
...
@@ -51,8 +53,19 @@ def analyse_device_list_from_profiler_dir(profiler_dir):
if
device_num
.
isdigit
()
and
'_'
.
join
(
items
[:
-
1
])
in
profiler_file_prefix
:
device_id_list
.
add
(
device_num
)
return
sorted
(
list
(
device_id_list
))
elif
device_num
.
isdigit
()
and
'_'
.
join
(
items
[:
-
1
])
in
gpu_profiler_file_prefix
:
gpu_device_id_list
.
add
(
device_num
)
if
device_id_list
:
result_list
=
sorted
(
list
(
device_id_list
))
profiler_type
=
"ascend"
elif
gpu_device_id_list
:
result_list
=
sorted
(
list
(
gpu_device_id_list
))
profiler_type
=
"gpu"
else
:
result_list
=
[]
profiler_type
=
""
return
result_list
,
profiler_type
def
query_latest_trace_time_file
(
profiler_dir
,
device_id
=
0
):
...
...
mindinsight/profiler/common/validator/validate.py
浏览文件 @
1460ab4a
...
...
@@ -27,6 +27,13 @@ AICORE_TYPE_COL = ["op_type", "execution_time", "execution_frequency", "precent"
AICORE_DETAIL_COL
=
[
"op_name"
,
"op_type"
,
"avg_execution_time"
,
"subgraph"
,
"full_op_name"
]
AICPU_COL
=
[
"serial_number"
,
"op_type"
,
"total_time"
,
"dispatch_time"
,
"run_start"
,
"run_end"
]
GPU_TYPE_COL
=
[
"op_type"
,
"type_occurrences"
,
"total_time"
,
"proportion"
,
"avg_time"
]
GPU_ACTIVITY_COL
=
[
"name"
,
"type"
,
"op_full_name"
,
"stream_id"
,
"block_dim"
,
"grid_dim"
,
"occurrences"
,
"total_duration"
,
"avg_duration"
,
"max_duration"
,
"min_duration"
]
GPU_DETAIL_COL
=
[
"op_side"
,
"op_type"
,
"op_name"
,
"op_full_name"
,
"op_occurrences"
,
"op_total_time"
,
"op_avg_time"
,
"proportion"
,
"cuda_activity_cost_time"
,
"cuda_activity_call_count"
]
MINDDATA_PIPELINE_COL
=
[
'op_id'
,
'op_type'
,
'num_workers'
,
'output_queue_average_size'
,
'output_queue_length'
,
'output_queue_usage_rate'
,
'sample_interval'
,
...
...
@@ -67,10 +74,20 @@ def validate_condition(search_condition):
search_scope
=
AICORE_TYPE_COL
elif
op_type
==
"aicore_detail"
:
search_scope
=
AICORE_DETAIL_COL
elif
op_type
==
"gpu_op_type"
:
search_scope
=
GPU_TYPE_COL
elif
op_type
==
"gpu_op_info"
:
search_scope
=
GPU_DETAIL_COL
elif
op_type
==
"gpu_cuda_activity"
:
search_scope
=
GPU_ACTIVITY_COL
else
:
raise
ProfilerOpTypeException
(
"The op_type must in ['aicpu', 'aicore_type', 'aicore_detail']"
)
raise
ProfilerOpTypeException
(
"The op_type must in ['aicpu', 'aicore_type', 'aicore_detail', "
"'gpu_op_type', 'gpu_op_info', 'gpu_cuda_activity']"
)
else
:
raise
ProfilerOpTypeException
(
"The op_type must in ['aicpu', 'aicore_type', 'aicore_detail']"
)
raise
ProfilerOpTypeException
(
"The op_type must in ['aicpu', 'aicore_type', 'aicore_detail', "
"'gpu_op_type', 'gpu_op_info', 'gpu_cuda_activity']"
)
if
"group_condition"
in
search_condition
:
validate_group_condition
(
search_condition
)
...
...
@@ -199,8 +216,6 @@ def validate_filter_condition(search_condition):
if
"op_name"
in
filter_condition
:
op_name_condition
=
filter_condition
.
get
(
"op_name"
)
validate_op_filter_condition
(
op_name_condition
)
if
"op_type"
not
in
filter_condition
and
"op_name"
not
in
filter_condition
:
raise
ProfilerFilterConditionException
(
"The key of filter_condition is not support"
)
def
validate_and_set_job_id_env
(
job_id_env
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录