change the method to find step trace files

82ed137d · yelihua · 615c8e61 · 82ed137d · 82ed137d · 82ed137d
5 changed file
--- a/mindinsight/backend/profiler/profile_api.py
+++ b/mindinsight/backend/profiler/profile_api.py
@@ -31,6 +31,7 @@ from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, \
    unquote_args, to_int, get_device_id
 from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
 from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
+from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
 from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
 from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
 from mindinsight.profiler.common.validator.validate import validate_condition, \
@@ -131,9 +132,13 @@ def get_training_trace_graph():
    graph_type = to_int(graph_type, 'graph_type')
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')
+    graph_info = {}
+    try:
        analyser = AnalyserFactory.instance().get_analyser(
            'step_trace', profiler_dir, device_id)
+    except ProfilerFileNotFoundException:
+        return jsonify(graph_info)
    graph_info = analyser.query({
        'filter_condition': {
            'mode': 'step',

--- a/mindinsight/profiler/common/util.py
+++ b/mindinsight/profiler/common/util.py
@@ -106,9 +106,12 @@ def get_summary_for_step_trace(average_info, header):
    tail = get_field_value(average_info, 'tail', header)
    summary = {
        'total_time': total_time,
-        'iteration_interval': calculate_percent(iteration_interval, total_time),
+        'iteration_interval': iteration_interval,
-        'fp_and_bp': calculate_percent(fp_and_bp, total_time),
+        'iteration_interval_percent': calculate_percent(iteration_interval, total_time),
-        'tail': calculate_percent(tail, total_time)
+        'fp_and_bp': fp_and_bp,
+        'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
+        'tail': tail,
+        'tail_percent': calculate_percent(tail, total_time)
    }
    return summary

--- a/mindinsight/profiler/parser/step_trace_parser.py
+++ b/mindinsight/profiler/parser/step_trace_parser.py
@@ -21,10 +21,9 @@ from collections import namedtuple
 from decimal import Decimal
 from mindinsight.profiler.common.exceptions.exceptions import ProfilerPathErrorException, \
-    JobIdMismatchException
+    JobIdMismatchException, ProfilerIOException
 from mindinsight.profiler.common.log import logger as log
 from mindinsight.profiler.common.util import get_summary_for_step_trace
-from mindinsight.utils.exceptions import MindInsightException
 StepTraceStruct = namedtuple(
    'TrainingTraceStruct', ['tag_id', 'task_id', 'stream_id', 'sys_count']
@@ -72,25 +71,39 @@ class StepTraceParser:
    def parse_and_save(self):
        """Parse step trace files and save the result."""
        try:
-            source_file = self._get_step_trace_file()
+            source_files = self._get_step_trace_files()
-            self._parse(source_file)
+            self._parse(source_files)
            self._save()
-        except MindInsightException as err:
+        except IOError as err:
-            log.error("Failed to parse and save step trace files.")
            log.exception(err)
+            raise ProfilerIOException()
        else:
            log.info("Finish to save intermediate result for step trace file.")
-    def _get_step_trace_file(self):
+    def _get_step_trace_files(self):
-        """Get step trace file."""
+        """Get step trace files."""
-        profiling_path = self._input_dir
+        # step trace files may under $profiler_dir or $profiler_dir/data
+        profiler_dir = self._input_dir
+        step_trace_files = self._search_file(profiler_dir)
+        if not step_trace_files:
+            # try to find step trace files under $profiler_dir/data
+            profiler_dir = os.path.join(profiler_dir, 'data')
+            step_trace_files = self._search_file(profiler_dir)
+        if not step_trace_files:
+            raise ProfilerPathErrorException('Training trace file does not exist.')
+        return step_trace_files
+    @staticmethod
+    def _search_file(input_dir):
+        """Search step trace file under specific input directory."""
        # validate input_dir
-        if not os.path.isdir(profiling_path):
+        if not os.path.isdir(input_dir):
            raise ProfilerPathErrorException(
-                '{} does not exist or is not a dir'.format(profiling_path)
+                '{} does not exist or is not a dir'.format(input_dir)
            )
        # get step trace files
-        files = os.listdir(profiling_path)
+        files = os.listdir(input_dir)
        step_trace_files = list(
            filter(
                lambda file: file.startswith('training_trace') and not file.endswith('.done'),
@@ -98,36 +111,46 @@ class StepTraceParser:
            )
        )
        # validate result
-        if not step_trace_files:
-            raise ProfilerPathErrorException('training trace file does not exist')
        if len(step_trace_files) > 1:
-            log.warning("Not enable to parse multiple step trace files yet.")
+            # the format of file name is like
-        step_trace_file = os.path.join(profiling_path, step_trace_files[0])
+            # `training_trace.46.dev.profiler_default_tag.$id.slice_$number`
-        return step_trace_file
+            # use the $number as the sorted key
+            try:
+                step_trace_files.sort(key=lambda path: int(path.rsplit('_', 1)[-1]))
+            except ValueError as err:
+                log.warning("Unable to parse file names: %s. %s", step_trace_files, err)
+                step_trace_files = []
-    def _parse(self, source_file):
+        file_paths = [os.path.join(input_dir, file) for file in step_trace_files]
-        """Parse source step trace file."""
+        log.info("Find %d step trace files.", len(file_paths))
+        return file_paths
+    def _parse(self, source_files):
+        """Parse source step trace files."""
        log.info("Start to parse step trace file.")
+        event_info = {}
+        for source_file in source_files:
            with open(source_file, 'rb') as handler:
                content = handler.read()
-            for step_trace in self._get_next_step_trace(content):
+                for step_trace in self._get_next_step_trace(content, event_info):
                    if self._skip_first_step:
                        self._skip_first_step = False
-                else:
+                        continue
                    self._record_trace_event(step_trace)
        self._record_average_info()
        log.info("Finish to parse step trace file.")
-    def _get_next_step_trace(self, content):
+    def _get_next_step_trace(self, content, event_info):
        """
        Get next step trace info.
        Args:
-            content (bytes): The input step trace info
+            content (bytes): The input step trace info.
+            event_info (dict): The event info.
        Returns:
            Generator, return the step trace one by one.
        """
-        event_info = {}
        for pos in range(0, len(content), 20):
            next_event = self._get_trace_struct(content[pos:pos + self._event_size])
            self._construct_event_info(next_event, event_info)

--- a/mindinsight/profiler/profiling.py
+++ b/mindinsight/profiler/profiling.py
@@ -214,7 +214,10 @@ class Profiler:
            logger.warning(err.message)
        # analyse step trace info
+        try:
            self._analyse_step_trace(source_path, framework_parser)
+        except MindInsightException as err:
+            logger.warning(err.message)
    def _analyse_step_trace(self, source_path, framework_parser):
        """

--- a/tests/st/func/profiler/test_analyse.py
+++ b/tests/st/func/profiler/test_analyse.py
@@ -150,9 +150,12 @@ class TestProfilerAnalyse(TestCase):
        summary = analyser.summary
        assert summary == {
            'total_time': 205.3809,
-            'iteration_interval': '0.1%',
+            'iteration_interval': 0.2038,
-            'fp_and_bp': '57.48%',
+            'iteration_interval_percent': '0.1%',
-            'tail': '42.42%',
+            'fp_and_bp': 118.054,
+            'fp_and_bp_percent': '57.48%',
+            'tail': 87.1231,
+            'tail_percent': '42.42%',
            'total_steps': 322}
    @pytest.mark.level0