提交 3956a90a 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!287 Change the method to find the step trace files.

Merge pull request !287 from yelihua/dev_profiler
......@@ -31,6 +31,7 @@ from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, \
unquote_args, to_int, get_device_id
from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
from mindinsight.profiler.common.validator.validate import validate_condition, \
......@@ -131,9 +132,13 @@ def get_training_trace_graph():
graph_type = to_int(graph_type, 'graph_type')
device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
graph_info = {}
try:
analyser = AnalyserFactory.instance().get_analyser(
'step_trace', profiler_dir, device_id)
except ProfilerFileNotFoundException:
return jsonify(graph_info)
analyser = AnalyserFactory.instance().get_analyser(
'step_trace', profiler_dir, device_id)
graph_info = analyser.query({
'filter_condition': {
'mode': 'step',
......
......@@ -106,9 +106,12 @@ def get_summary_for_step_trace(average_info, header):
tail = get_field_value(average_info, 'tail', header)
summary = {
'total_time': total_time,
'iteration_interval': calculate_percent(iteration_interval, total_time),
'fp_and_bp': calculate_percent(fp_and_bp, total_time),
'tail': calculate_percent(tail, total_time)
'iteration_interval': iteration_interval,
'iteration_interval_percent': calculate_percent(iteration_interval, total_time),
'fp_and_bp': fp_and_bp,
'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
'tail': tail,
'tail_percent': calculate_percent(tail, total_time)
}
return summary
......
......@@ -21,10 +21,9 @@ from collections import namedtuple
from decimal import Decimal
from mindinsight.profiler.common.exceptions.exceptions import ProfilerPathErrorException, \
JobIdMismatchException
JobIdMismatchException, ProfilerIOException
from mindinsight.profiler.common.log import logger as log
from mindinsight.profiler.common.util import get_summary_for_step_trace
from mindinsight.utils.exceptions import MindInsightException
StepTraceStruct = namedtuple(
'TrainingTraceStruct', ['tag_id', 'task_id', 'stream_id', 'sys_count']
......@@ -72,25 +71,39 @@ class StepTraceParser:
def parse_and_save(self):
"""Parse step trace files and save the result."""
try:
source_file = self._get_step_trace_file()
self._parse(source_file)
source_files = self._get_step_trace_files()
self._parse(source_files)
self._save()
except MindInsightException as err:
log.error("Failed to parse and save step trace files.")
except IOError as err:
log.exception(err)
raise ProfilerIOException()
else:
log.info("Finish to save intermediate result for step trace file.")
def _get_step_trace_file(self):
"""Get step trace file."""
profiling_path = self._input_dir
def _get_step_trace_files(self):
"""Get step trace files."""
# step trace files may under $profiler_dir or $profiler_dir/data
profiler_dir = self._input_dir
step_trace_files = self._search_file(profiler_dir)
if not step_trace_files:
# try to find step trace files under $profiler_dir/data
profiler_dir = os.path.join(profiler_dir, 'data')
step_trace_files = self._search_file(profiler_dir)
if not step_trace_files:
raise ProfilerPathErrorException('Training trace file does not exist.')
return step_trace_files
@staticmethod
def _search_file(input_dir):
"""Search step trace file under specific input directory."""
# validate input_dir
if not os.path.isdir(profiling_path):
if not os.path.isdir(input_dir):
raise ProfilerPathErrorException(
'{} does not exist or is not a dir'.format(profiling_path)
'{} does not exist or is not a dir'.format(input_dir)
)
# get step trace files
files = os.listdir(profiling_path)
files = os.listdir(input_dir)
step_trace_files = list(
filter(
lambda file: file.startswith('training_trace') and not file.endswith('.done'),
......@@ -98,36 +111,46 @@ class StepTraceParser:
)
)
# validate result
if not step_trace_files:
raise ProfilerPathErrorException('training trace file does not exist')
if len(step_trace_files) > 1:
log.warning("Not enable to parse multiple step trace files yet.")
step_trace_file = os.path.join(profiling_path, step_trace_files[0])
return step_trace_file
# the format of file name is like
# `training_trace.46.dev.profiler_default_tag.$id.slice_$number`
# use the $number as the sorted key
try:
step_trace_files.sort(key=lambda path: int(path.rsplit('_', 1)[-1]))
except ValueError as err:
log.warning("Unable to parse file names: %s. %s", step_trace_files, err)
step_trace_files = []
file_paths = [os.path.join(input_dir, file) for file in step_trace_files]
log.info("Find %d step trace files.", len(file_paths))
return file_paths
def _parse(self, source_file):
"""Parse source step trace file."""
log.info("Start to parse step trace file.")
with open(source_file, 'rb') as handler:
content = handler.read()
for step_trace in self._get_next_step_trace(content):
if self._skip_first_step:
self._skip_first_step = False
else:
def _parse(self, source_files):
"""Parse source step trace files."""
log.info("Start to parse step trace file.")
event_info = {}
for source_file in source_files:
with open(source_file, 'rb') as handler:
content = handler.read()
for step_trace in self._get_next_step_trace(content, event_info):
if self._skip_first_step:
self._skip_first_step = False
continue
self._record_trace_event(step_trace)
self._record_average_info()
log.info("Finish to parse step trace file.")
def _get_next_step_trace(self, content):
def _get_next_step_trace(self, content, event_info):
"""
Get next step trace info.
Args:
content (bytes): The input step trace info
content (bytes): The input step trace info.
event_info (dict): The event info.
Returns:
Generator, return the step trace one by one.
"""
event_info = {}
for pos in range(0, len(content), 20):
next_event = self._get_trace_struct(content[pos:pos + self._event_size])
self._construct_event_info(next_event, event_info)
......@@ -251,7 +274,7 @@ class StepTraceParser:
log.info("Finish add average info for step trace.")
def _save(self):
log.info("Start to save step trace file.")
log.info("Start to save step trace file.")
if not self._header:
return
with open(self._output_path, 'w') as file_handle:
......
......@@ -221,7 +221,10 @@ class Profiler:
logger.warning(err.message)
# analyse step trace info
self._analyse_step_trace(source_path, framework_parser)
try:
self._analyse_step_trace(source_path, framework_parser)
except MindInsightException as err:
logger.warning(err.message)
# analyse timeline info
self._analyse_timeline()
......
......@@ -149,9 +149,12 @@ class TestProfilerAnalyse(TestCase):
summary = analyser.summary
assert summary == {
'total_time': 205.3809,
'iteration_interval': '0.1%',
'fp_and_bp': '57.48%',
'tail': '42.42%',
'iteration_interval': 0.2038,
'iteration_interval_percent': '0.1%',
'fp_and_bp': 118.054,
'fp_and_bp_percent': '57.48%',
'tail': 87.1231,
'tail_percent': '42.42%',
'total_steps': 322}
@pytest.mark.level0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册