From ef2286515f5555bf067fa037f58a041257b9b16c Mon Sep 17 00:00:00 2001 From: zhangyunshu Date: Tue, 23 Jun 2020 15:55:24 +0800 Subject: [PATCH] profiler: add AI CPU into timeline --- .../profiler/analyser/step_trace_analyser.py | 9 ++-- .../profiler/analyser/timeline_analyser.py | 33 ++++++++---- .../profiler/parser/aicpu_data_parser.py | 51 +++++++++++++++++++ mindinsight/profiler/parser/optime_parser.py | 20 +++----- mindinsight/profiler/profiling.py | 13 ++--- 5 files changed, 92 insertions(+), 34 deletions(-) diff --git a/mindinsight/profiler/analyser/step_trace_analyser.py b/mindinsight/profiler/analyser/step_trace_analyser.py index 2dade02..58ea05e 100644 --- a/mindinsight/profiler/analyser/step_trace_analyser.py +++ b/mindinsight/profiler/analyser/step_trace_analyser.py @@ -69,7 +69,7 @@ class StepTraceAnalyser(BaseAnalyser): return self._result - def query_for_all_reduce(self, min_cycle_counter): + def query_for_all_reduce(self): """ Query for all reduce info. @@ -81,7 +81,7 @@ class StepTraceAnalyser(BaseAnalyser): reduce_infos = [] for row_info in self._data[:-1]: row_info_dict = self._get_info_dict_from_row_data(row_info, 'systime') - reduce_info = self._sort_reduce_by_time(row_info_dict, min_cycle_counter) + reduce_info = self._sort_reduce_by_time(row_info_dict) if reduce_info: reduce_infos.append(reduce_info) @@ -252,13 +252,12 @@ class StepTraceAnalyser(BaseAnalyser): reduce_events.sort(key=lambda elem: elem[1]) return reduce_info - def _sort_reduce_by_time(self, row_info_dict, min_cycle_counter): + def _sort_reduce_by_time(self, row_info_dict): """ Sort reduce info by time. Args: row_info_dict (dict): Step trace information. - min_cycle_counter (int): The minimum cycle counter. Returns: list, including the all reduce info sorted by start time only. @@ -275,7 +274,7 @@ class StepTraceAnalyser(BaseAnalyser): if field_name.startswith('stream_') and not field_name.endswith('point')] for reduce_field in reduce_fields: reduce_start = row_info_dict.get(reduce_field + '_start_point') - reduce_start = (reduce_start - min_cycle_counter) / factor \ + reduce_start = reduce_start / factor \ if reduce_start else 0 reduce_duration = row_info_dict.get(reduce_field) reduce_duration = reduce_duration / factor if reduce_duration else 0 diff --git a/mindinsight/profiler/analyser/timeline_analyser.py b/mindinsight/profiler/analyser/timeline_analyser.py index c266636..731a479 100644 --- a/mindinsight/profiler/analyser/timeline_analyser.py +++ b/mindinsight/profiler/analyser/timeline_analyser.py @@ -189,7 +189,7 @@ class TimelineAnalyser(BaseAnalyser): return timeline_list - def _parse_timeline_data(self, timeline): + def _parse_timeline_data(self, timeline, min_cycle_counter): """Parse timeline data.""" # factor to convert the time unit from 1ms to 1us for timeline display factor = 1000 @@ -198,15 +198,15 @@ class TimelineAnalyser(BaseAnalyser): timeline_dict['name'] = op_meta.op_name timeline_dict['ph'] = 'X' timeline_dict['tid'] = op_meta.stream_id - timeline_dict['ts'] = op_meta.start_time * factor + timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) * factor dur = op_meta.duration * factor timeline_dict['dur'] = dur - if op_meta.pid == 10000: # AllReduce PID - timeline_dict['pid'] = 10000 - else: + if op_meta.pid is None: timeline_dict['pid'] = int(self._device_id) # Update total time of operator execution. self._timeline_summary['total_time'] += dur + else: # AllReduce and AI CPU pid + timeline_dict['pid'] = op_meta.pid self._timeline_meta.append(timeline_dict) @staticmethod @@ -249,14 +249,19 @@ class TimelineAnalyser(BaseAnalyser): return min_cycle_counter - def init_timeline(self, all_reduce_info, framework_info): + def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter): """ Init timeline metadata, adding all collected info. Args: all_reduce_info (list[list]): The metadata of AllReduce operator. framework_info (dict): The framework metadata. + aicpu_info (dict): The metadata of AI CPU operator. + min_cycle_counter (float): The minimum cycle counter of the timeline. """ + if min_cycle_counter == float('inf'): + min_cycle_counter = 0 + logger.info('Initiating timeline...') timeline_list = self._load_timeline_data() self._timeline_summary['op_exe_times'] = len(timeline_list) @@ -267,10 +272,20 @@ class TimelineAnalyser(BaseAnalyser): timeline_list.extend(all_reduce_info) timeline_list.sort(key=lambda x: float(x[2])) + # Add AI CPU data into timeline temp list and sort by start time. + aicpu_data = aicpu_info.get('info') + if aicpu_data: + timeline_list.extend(aicpu_data) + timeline_list.sort(key=lambda x: float(x[2])) + self._timeline_summary['op_exe_times'] += aicpu_info.get('op_exe_times', 0) + self._timeline_summary['num_of_streams'] += aicpu_info.get('num_of_streams', 0) + self._timeline_summary['num_of_ops'] += aicpu_info.get('num_of_ops', 0) + self._timeline_summary['total_time'] += aicpu_info.get('total_time', 0) + # Init a dict for counting the num of streams. stream_count_dict = {} for timeline in timeline_list: - self._parse_timeline_data(timeline) + self._parse_timeline_data(timeline, min_cycle_counter) # Updating the collection of streams. if len(timeline) == 4: self._update_num_of_streams(timeline, stream_count_dict) @@ -278,12 +293,12 @@ class TimelineAnalyser(BaseAnalyser): # Get framework metadata. framework_obj_list = framework_info.get('object') # The length of list is the number of operators. - self._timeline_summary['num_of_ops'] = len(framework_obj_list) + self._timeline_summary['num_of_ops'] += len(framework_obj_list) self._add_framework_info(framework_obj_list) logger.info('Finished adding info into timeline...') # Update timeline summary info - self._timeline_summary['num_of_streams'] = len(stream_count_dict.keys()) + self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys()) def _add_framework_info(self, framework_obj_list): """ diff --git a/mindinsight/profiler/parser/aicpu_data_parser.py b/mindinsight/profiler/parser/aicpu_data_parser.py index 0e32d62..1022315 100644 --- a/mindinsight/profiler/parser/aicpu_data_parser.py +++ b/mindinsight/profiler/parser/aicpu_data_parser.py @@ -48,6 +48,8 @@ class DataPreProcessParser: self._thread_flag = 7 self._ms_kernel_run_end_index = 2 self._other_kernel_run_end_index = 5 + self._result_list = [] + self._min_cycle_counter = float('inf') def _get_source_file(self): """Get log file name, which was created by ada service.""" @@ -129,3 +131,52 @@ class DataPreProcessParser: data_source=tabulate(result_list, self._dst_file_column_title, tablefmt='simple'), is_start=True, is_print=True) + + # For timeline display. + self._result_list = result_list + + def query_aicpu_data(self): + """ + Get execution time of AI CPU operator. + + Returns: + a dict, the metadata of AI CPU operator execution time. + """ + stream_id = 0 # Default stream id for AI CPU. + pid = 9000 # Default pid for AI CPU. + factor = 1000 # Convert time unit from 1us to 1ms + total_time = 0 + min_cycle_counter = float('inf') + aicpu_info = [] + op_count_list = [] + for aicpu_item in self._result_list: + if "AI CPU Total Time(ms):" in aicpu_item: + total_time = aicpu_item[-1] + continue + + op_name = aicpu_item[1] + start_time = float(aicpu_item[4]) / factor + min_cycle_counter = min(min_cycle_counter, start_time) + end_time = float(aicpu_item[5]) / factor + duration = end_time - start_time + aicpu_info.append([op_name, stream_id, start_time, duration, pid]) + + # Record the number of operator types. + if op_name not in op_count_list: + op_count_list.append(op_name) + + self._min_cycle_counter = min_cycle_counter + aicpu_dict = { + 'info': aicpu_info, + 'total_time': float(total_time), + 'op_exe_times': len(aicpu_info), + 'num_of_ops': len(op_count_list), + 'num_of_streams': 1 + } + + return aicpu_dict + + @property + def min_cycle_counter(self): + """Get minimum cycle counter in AI CPU.""" + return self._min_cycle_counter diff --git a/mindinsight/profiler/parser/optime_parser.py b/mindinsight/profiler/parser/optime_parser.py index d9d37d7..b5e29b1 100755 --- a/mindinsight/profiler/parser/optime_parser.py +++ b/mindinsight/profiler/parser/optime_parser.py @@ -122,8 +122,6 @@ class OPComputeTimeParser: # Write the timeline data into file, # including operator name, stream id, start time, and duration. self._write_timeline_data_into_file(timeline_data) - # Write the minimum cycle counter into the file. - self.write_min_cycle_counter_to_file() def _write_op_time_into_file(self, result_data): """ @@ -201,7 +199,7 @@ class OPComputeTimeParser: cur_index += 1 # Update the value of minimum cycle counter. - self._min_cycle_counter = min_cycle_counter + self._min_cycle_counter = min_cycle_counter / 1e5 # Convert the time unit from 10ns to 1ms return tmp_result_data @@ -222,7 +220,7 @@ class OPComputeTimeParser: for item in op_data_list: op_name = item.op_name # Unit conversion: converting the cycle counter into ms. - op_start_time_str = str((item.cycle_counter - self._min_cycle_counter) / factor) + op_start_time_str = str(item.cycle_counter / factor) op_duration = item.duration / factor op_duration_str = str(item.duration / factor) if op_name in op_name_time_dict.keys(): @@ -243,13 +241,7 @@ class OPComputeTimeParser: (op_start_time_str, op_duration_str) ) - def write_min_cycle_counter_to_file(self): - """Write minimum cycle counter into a txt file.""" - min_cycle_counter = self._min_cycle_counter - file_name = 'min_cycle_counter_' + self._device_id + '.txt' - file_path = os.path.join(self._output_path, file_name) - file_path = validate_and_normalize_path( - file_path, raise_key='Invalid min cycle counter file path.' - ) - with open(file_path, 'w') as file: - file.write(str(min_cycle_counter)) + @property + def min_cycle_counter(self): + """Get minimum cycle counter.""" + return self._min_cycle_counter diff --git a/mindinsight/profiler/profiling.py b/mindinsight/profiler/profiling.py index e34c01e..e52a300 100644 --- a/mindinsight/profiler/profiling.py +++ b/mindinsight/profiler/profiling.py @@ -196,7 +196,7 @@ class Profiler: # analyse timeline info try: - self._analyse_timeline() + self._analyse_timeline(aicpu_data_parser, optime_parser) except (ProfilerIOException, ProfilerFileNotFoundException, ValidationError) as err: logger.warning('Fail to write timeline data: %s', err) @@ -233,14 +233,13 @@ class Profiler: logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path) logger.info("The point info is: %s", point_info) - def _analyse_timeline(self): + def _analyse_timeline(self, aicpu_parser, optime_parser): """ Analyse and parse timeline info. """ timeline_analyser = AnalyserFactory.instance().get_analyser( 'timeline', self._output_path, self._dev_id ) - min_cycle_counter = timeline_analyser.get_min_cycle_counter() # Get framework info aicoredetail_analyser = AnalyserFactory.instance().get_analyser( @@ -252,14 +251,16 @@ class Profiler: step_trace_analyser = AnalyserFactory.instance().get_analyser( 'step_trace', self._output_path, self._dev_id ) - all_reduce_info = step_trace_analyser.query_for_all_reduce(min_cycle_counter) + all_reduce_info = step_trace_analyser.query_for_all_reduce() # Get timeline info logger.info('Start writing timeline info...') logger.info('Warm Prompt: It could take a few minutes if you are training ' 'with a complex network or more than 10 steps.') - # Add AllReduce and framework info into timeline - timeline_analyser.init_timeline(all_reduce_info, framework_info) + # Add info into timeline, such as AI CPU, AllReduce, framework info. + aicpu_info = aicpu_parser.query_aicpu_data() + min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter) + timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, min_cycle_counter) timeline_analyser.write_timeline() timeline_analyser.write_timeline_summary() -- GitLab