diff --git a/mindinsight/profiler/parser/framework_parser.py b/mindinsight/profiler/parser/framework_parser.py index a8e80d72e6cbc5286ec72df0a420ede531282f6e..2861895900b9ad6ee772f8eac4b0d46c9d99129a 100644 --- a/mindinsight/profiler/parser/framework_parser.py +++ b/mindinsight/profiler/parser/framework_parser.py @@ -188,16 +188,23 @@ class FrameworkParser: 'output_data_type', 'output_shape' ] + # if the task id is less than the task id threshold, The combination of + # task id and Stream id represents one operator, else the task id represents + # one operator + _task_id_threshold = 25000 + def __init__(self, profiling_id, device_id, output_path='./'): self._profiling_path = self._get_raw_profiling_path(profiling_id) self._backend_type = None - self._framework_path = {'graph': [], 'task': []} + self._framework_path = {'graph': [], 'task': [], 'point': []} self._search_file(profiling_id, device_id) self._device_id = device_id self._save_path = self._get_save_path(device_id, output_path) self._task_id_full_op_name_dict = {} self._task_cache = {} + self._point_info = {} self._parse_task_files() + self._parse_point_files() @property def save_path(self): @@ -209,6 +216,16 @@ class FrameworkParser: """ return self._save_path + @property + def point_info(self): + """ + The property of the framework point information. + + Returns: + dict, the framework point information. + """ + return self._point_info + def to_task_id_full_op_name_dict(self): """ Get the task id and full operator name dict. @@ -282,7 +299,11 @@ class FrameworkParser: Raises: ProfilerFileNotFoundException: If the framework files are not found. """ - self._search_file_from_job_path(device_id) + # first search in the JOB dir, and if not, search in the sub directory + # in the JOB + self._search_file_from_job_path(device_id, search_in_sub_path=False) + if self._backend_type is None: + self._search_file_from_job_path(device_id, search_in_sub_path=True) self._search_file_from_data_path(profiling_id, device_id) if self._backend_type is None: @@ -290,19 +311,26 @@ class FrameworkParser: self._framework_path['graph'].sort() self._framework_path['task'].sort() - def _search_file_from_job_path(self, device_id): + def _search_file_from_job_path(self, device_id, search_in_sub_path=False): """ Search framework files from job path. Args: device_id (str): The device ID. + search_in_sub_path (bool): `True` if search file in profiling dir, + else search in profiling sub dir. Default: False. Raises: ProfilerRawFileException: If the framework file type is inconsistent. ProfilerDeviceIdMismatchException: If the device id is mismatch with framework in the raw dir. """ - files = os.listdir(self._profiling_path) + profiling_dir = os.path.join(self._profiling_path, 'data') \ + if search_in_sub_path else self._profiling_path + if not os.path.isdir(profiling_dir): + return + + files = os.listdir(profiling_dir) for file in files: pattern = re.search(self._regex_framework, file) if not pattern or file.endswith('.done'): @@ -325,11 +353,15 @@ class FrameworkParser: self._backend_type = 'ge' if data_type.startswith('graph_desc_info'): self._framework_path['graph'].append( - os.path.join(self._profiling_path, file) + os.path.join(profiling_dir, file) ) elif data_type.startswith('task_desc_info'): self._framework_path['task'].append( - os.path.join(self._profiling_path, file) + os.path.join(profiling_dir, file) + ) + elif data_type.startswith('point'): + self._framework_path['point'].append( + os.path.join(profiling_dir, file) ) def _search_file_from_data_path(self, profiling_id, device_id): @@ -384,6 +416,10 @@ class FrameworkParser: self._framework_path['task'].append( os.path.join(profiling_data_path, file) ) + elif data_type.startswith('point'): + self._framework_path['point'].append( + os.path.join(profiling_data_path, file) + ) def _get_save_path(self, device_id, output_path): """ @@ -418,7 +454,13 @@ class FrameworkParser: infos = task_info.strip('\n').split(' ') # key is op name, values is task id, stream id, block_dim self._task_cache[infos[0]] = [infos[2], infos[3], infos[1]] - self._task_id_full_op_name_dict[infos[2]] = infos[0] + + # if the task id is less than the task id threshold, the + # stream id and task id correspond to an operator + task_id = infos[2] + if int(task_id) < self._task_id_threshold: + task_id = '_'.join([infos[3], task_id]) + self._task_id_full_op_name_dict[task_id] = infos[0] def _parse_graph_files_and_save(self, task_cache): """ @@ -546,3 +588,11 @@ class FrameworkParser: else: op_name = '+'.join([op_name, name_str.split('/')[-1]]) return op_name + + def _parse_point_files(self): + """Parse the framework point files.""" + for path in self._framework_path['point']: + with open(path, 'r') as file: + for point_info in file: + infos = point_info.strip('\n').split(' ') + self._point_info[int(infos[0])] = infos[1] diff --git a/mindinsight/profiler/profiling.py b/mindinsight/profiler/profiling.py index 24af157379933430d6bf07de1e76bf7679103a4f..31e5d0af51ca2a2c61cf037afd1a465751510b42 100644 --- a/mindinsight/profiler/profiling.py +++ b/mindinsight/profiler/profiling.py @@ -209,7 +209,9 @@ class Profiler: # parse minddata pipeline operator and queue try: - pipeline_parser = MinddataPipelineParser(job_id, self._dev_id) + pipeline_parser = MinddataPipelineParser( + self._output_path, self._dev_id, self._output_path + ) pipeline_parser.parse() except MindInsightException as err: logger.warning(err.message) diff --git a/tests/ut/profiler/parser/test_framework_parser.py b/tests/ut/profiler/parser/test_framework_parser.py index 32d8ea6860e22f6aa3ad66f78a5e9308d5144adb..88b16f03c2767edf990b7a581e8aa63b70b906ad 100644 --- a/tests/ut/profiler/parser/test_framework_parser.py +++ b/tests/ut/profiler/parser/test_framework_parser.py @@ -59,10 +59,14 @@ class TestFrameworkParser: self._output_path_2 = tempfile.mkdtemp(prefix='test_framework_parser_') self._parser_2 = FrameworkParser('JOB2', '0', self._output_path_2) + self._output_path_4 = tempfile.mkdtemp(prefix='test_framework_parser_') + self._parser_4 = FrameworkParser('JOB4', '0', self._output_path_4) + def teardown_method(self) -> None: """Clear up after test case execution.""" shutil.rmtree(self._output_path_1) shutil.rmtree(self._output_path_2) + shutil.rmtree(self._output_path_4) FrameworkParser._raw_data_dir = '/var/log/npu/profiling' def test_save_path(self): @@ -73,6 +77,14 @@ class TestFrameworkParser: expect_result = os.path.join(self._output_path_2, 'framework_raw_0.csv') assert expect_result == self._parser_2.save_path + def test_point_info(self): + """Test the querying point info function.""" + expect_result = { + 1: 'Default/Cast-op6', + 2: 'Default/TransData-op7' + } + assert expect_result == self._parser_4.point_info + def test_to_task_id_full_op_name_dict(self): """Test the querying task id and full operator name dict function.""" expect_result = { @@ -85,6 +97,15 @@ class TestFrameworkParser: assert expect_result == self._parser_1.to_task_id_full_op_name_dict() assert expect_result == self._parser_2.to_task_id_full_op_name_dict() + expect_result = { + '0_1': 'Default/Cast-op6', + '0_2': 'Default/TransData-op7', + '0_3': 'Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5', + '0_4': 'Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/' + '0-ResidualBlock/conv1-Conv2d/Cast-op28' + } + assert expect_result == self._parser_4.to_task_id_full_op_name_dict() + def test_parse(self): """Test the parse function.""" expect_framework_file = os.path.join(PROFILER_DIR, 'framework_raw_0.csv') diff --git a/tests/utils/resource/JOB4/data/Framework.host.vm.graph_desc_info.0.slice_0 b/tests/utils/resource/JOB4/data/Framework.host.vm.graph_desc_info.0.slice_0 new file mode 100644 index 0000000000000000000000000000000000000000..9b3e7b322c5cb97b1a85e28e327c52ee9ce29824 --- /dev/null +++ b/tests/utils/resource/JOB4/data/Framework.host.vm.graph_desc_info.0.slice_0 @@ -0,0 +1,4 @@ +op_name:Default/Cast-op6 op_type:Cast input_id:0 input_format:DefaultFormat input_data_type:40 input_shape:"32,3,224,224" output_id:0 output_format:DefaultFormat output_data_type:39 output_shape:"32,3,224,224" +op_name:Default/TransData-op7 op_type:TransData input_id:0 input_format:DefaultFormat input_data_type:39 input_shape:"32,3,224,224" output_id:0 output_format:NC1HWC0 output_data_type:39 output_shape:"32,1,224,224,16" +op_name:Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5 op_type:Cast input_id:0 input_format:FracZ input_data_type:40 input_shape:"49,4,16,16" output_id:0 output_format:FracZ output_data_type:39 output_shape:"49,4,16,16" +op_name:Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/0-ResidualBlock/conv1-Conv2d/Cast-op28 op_type:Cast input_id:0 input_format:FracZ input_data_type:40 input_shape:"4,4,16,16" output_id:0 output_format:FracZ output_data_type:39 output_shape:"4,4,16,16" diff --git a/tests/utils/resource/JOB4/data/Framework.host.vm.point.0.slice_0 b/tests/utils/resource/JOB4/data/Framework.host.vm.point.0.slice_0 new file mode 100644 index 0000000000000000000000000000000000000000..01bcf6f3ca63cb1e128d9301497923b584495b71 --- /dev/null +++ b/tests/utils/resource/JOB4/data/Framework.host.vm.point.0.slice_0 @@ -0,0 +1,2 @@ +1 Default/Cast-op6 +2 Default/TransData-op7 diff --git a/tests/utils/resource/JOB4/data/Framework.host.vm.task_desc_info.0.slice_0 b/tests/utils/resource/JOB4/data/Framework.host.vm.task_desc_info.0.slice_0 new file mode 100644 index 0000000000000000000000000000000000000000..e49673789f9e543c3af261db58dd6aa210a523c7 --- /dev/null +++ b/tests/utils/resource/JOB4/data/Framework.host.vm.task_desc_info.0.slice_0 @@ -0,0 +1,4 @@ +Default/Cast-op6 32 1 0 +Default/TransData-op7 32 2 0 +Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5 32 3 0 +Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/0-ResidualBlock/conv1-Conv2d/Cast-op28 4 4 0