未验证 提交 365975fd 编写于 作者: C chenjian 提交者: GitHub

[cherry-pick] Refine statistic table and bug fix (#41581)

* Refine statistic table (#41524)

* Add get profiler from config (#41532)

* no

* maintain old profiler

* add get profiler from serialization config

* add unit test

* improve coverage

* fix

* Revert "improve coverage"

This reverts commit 4a980bfda48adadee551d0e1c5740bc5b7389200.

* fix unit

* fix

* fix
上级 f3296eae
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
import tempfile
import paddle import paddle
import paddle.profiler as profiler import paddle.profiler as profiler
...@@ -138,6 +139,146 @@ class TestNvprof(unittest.TestCase): ...@@ -138,6 +139,146 @@ class TestNvprof(unittest.TestCase):
y = x / 2.0 y = x / 2.0
class TestGetProfiler(unittest.TestCase):
def test_getprofiler(self):
config_content = '''
{
"targets": ["CPU"],
"scheduler": [3,4],
"on_trace_ready": {
"export_chrome_tracing":{
"module": "paddle.profiler",
"use_direct": false,
"args": [],
"kwargs": {
"dir_name": "testdebug/"
}
}
},
"timer_only": false
}
'''
filehandle = tempfile.NamedTemporaryFile(mode='w')
filehandle.write(config_content)
filehandle.flush()
import paddle.profiler.profiler as profiler
profiler = profiler.get_profiler(filehandle.name)
x_value = np.random.randn(2, 3, 3)
x = paddle.to_tensor(
x_value, stop_gradient=False, place=paddle.CPUPlace())
with profiler:
for i in range(5):
y = x / 2.0
ones_like_y = paddle.ones_like(y)
profiler.step()
# below tests are just for coverage, wrong config
# test use_direct
config_content = '''
{
"targets": ["Cpu", "Gpu"],
"scheduler": {
"make_scheduler":{
"module": "paddle.profiler",
"use_direct": true,
"args": [],
"kwargs": {}
}
},
"on_trace_ready": {
"export_chrome_tracing":{
"module": "paddle.profiler1",
"use_direct": true,
"args": [],
"kwargs": {
}
}
},
"timer_only": false
}
'''
filehandle = tempfile.NamedTemporaryFile(mode='w')
filehandle.write(config_content)
filehandle.flush()
import paddle.profiler.profiler as profiler
try:
profiler = profiler.get_profiler(filehandle.name)
except:
pass
# test scheduler
config_content = '''
{
"targets": ["Cpu", "Gpu"],
"scheduler": {
"make_scheduler":{
"module": "paddle.profiler",
"use_direct": false,
"args": [],
"kwargs": {
"closed": 1,
"ready": 1,
"record": 2
}
}
},
"on_trace_ready": {
"export_chrome_tracing":{
"module": "paddle.profiler",
"use_direct": true,
"args": [],
"kwargs": {
}
}
},
"timer_only": false
}
'''
filehandle = tempfile.NamedTemporaryFile(mode='w')
filehandle.write(config_content)
filehandle.flush()
import paddle.profiler.profiler as profiler
profiler = profiler.get_profiler(filehandle.name)
# test exception
config_content = '''
{
"targets": [1],
"scheduler": {
"make_scheduler1":{
"module": "paddle.profiler",
"use_direct": false,
"args": [],
"kwargs": {
"closed": 1,
"ready": 1,
"record": 2
}
}
},
"on_trace_ready": {
"export_chrome_tracing1":{
"module": "paddle.profiler",
"use_direct": false,
"args": [],
"kwargs": {
"dir_name": "testdebug/"
}
}
},
"timer_only": 1
}
'''
filehandle = tempfile.NamedTemporaryFile(mode='w')
filehandle.write(config_content)
filehandle.flush()
import paddle.profiler.profiler as profiler
profiler = profiler.get_profiler(filehandle.name)
# test path error
import paddle.profiler.profiler as profiler
profiler = profiler.get_profiler('nopath.json')
class RandomDataset(Dataset): class RandomDataset(Dataset):
def __init__(self, num_samples): def __init__(self, num_samples):
self.num_samples = num_samples self.num_samples = num_samples
......
...@@ -185,20 +185,22 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -185,20 +185,22 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.Communication), 5) profiler.TracerEventType.Communication), 5)
self.assertEqual(len(event_summary.items), 2) self.assertEqual(len(event_summary.items), 2)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 3) self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 100)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].gpu_time, 135) event_summary.model_perspective_items['Forward'].general_gpu_time,
135)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Backward'].gpu_time, 0) event_summary.model_perspective_items['Backward'].general_gpu_time,
0)
self.assertEqual( self.assertEqual(
event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15)
self.assertEqual( self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy']
event_summary.memory_manipulation_items['AsyncMemcpy'].gpu_time, 60) .general_gpu_time, 60)
print( print(
profiler.profiler_statistic._build_table( profiler.profiler_statistic._build_table(
statistic_data, statistic_data,
...@@ -226,31 +228,31 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -226,31 +228,31 @@ class TestProfilerStatistic(unittest.TestCase):
userdefined_node = HostPythonNode('Communication Time', userdefined_node = HostPythonNode('Communication Time',
profiler.TracerEventType.UserDefined, profiler.TracerEventType.UserDefined,
100, 110, 1000, 1001) 100, 110, 1000, 1001)
reduce_all_launchkernel0 = HostPythonNode( allreduce_launchkernel0 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 102, 104, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 102, 104,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel0 = DevicePythonNode( nccl_allreduce_kernel0 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 105, 120, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 105, 120,
0, 0, 2) 0, 0, 2)
communication_node = HostPythonNode( communication_node = HostPythonNode(
'Communication', profiler.TracerEventType.Communication, 105, 110, 'Communication', profiler.TracerEventType.Communication, 105, 110,
1000, 1001) 1000, 1001)
reduce_all_op1 = HostPythonNode('reduce_all_op1', allreduce_op1 = HostPythonNode('allreduce_op1',
profiler.TracerEventType.Operator, 105, profiler.TracerEventType.Operator, 105,
108, 1000, 1001) 108, 1000, 1001)
reduce_all_op1_infershape = HostPythonNode( allreduce_op1_infershape = HostPythonNode(
'reduce_all_op1::infershape', 'allreduce_op1::infershape', profiler.TracerEventType.OperatorInner,
profiler.TracerEventType.OperatorInner, 105, 106, 1000, 1001) 105, 106, 1000, 1001)
reduce_all_launchkernel1 = HostPythonNode( allreduce_launchkernel1 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 106, 107, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 106, 107,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel1 = DevicePythonNode( nccl_allreduce_kernel1 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 130, 150, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 130, 150,
0, 0, 2) 0, 0, 2)
backward_node = HostPythonNode('Gradient Backward', backward_node = HostPythonNode('Gradient Backward',
...@@ -305,19 +307,19 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -305,19 +307,19 @@ class TestProfilerStatistic(unittest.TestCase):
'sync_batch_norm_memcpy', profiler.TracerEventType.Memcpy, 150, 200, 'sync_batch_norm_memcpy', profiler.TracerEventType.Memcpy, 150, 200,
0, 0, 1) 0, 0, 1)
reduce_all_node2 = HostPythonNode('reduce_all', allreduce_node2 = HostPythonNode('allreduce',
profiler.TracerEventType.Operator, profiler.TracerEventType.Operator, 230,
230, 250, 1000, 1001) 250, 1000, 1001)
reduce_all_node2_infershape = HostPythonNode( allreduce_node2_infershape = HostPythonNode(
'reduce_all_node2::infershape', 'allreduce_node2::infershape',
profiler.TracerEventType.OperatorInner, 231, 232, 1000, 1001) profiler.TracerEventType.OperatorInner, 231, 232, 1000, 1001)
reduce_all_launchkernel2 = HostPythonNode( allreduce_launchkernel2 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 235, 240, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 235, 240,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel2 = DevicePythonNode( nccl_allreduce_kernel2 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 250, 280, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 250, 280,
0, 0, 2) 0, 0, 2)
root_node.children_node.append(profilerstep_node) root_node.children_node.append(profilerstep_node)
...@@ -329,12 +331,12 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -329,12 +331,12 @@ class TestProfilerStatistic(unittest.TestCase):
yolonet_node.children_node.extend( yolonet_node.children_node.extend(
[sync_batch_norm_node, userdefined_node]) [sync_batch_norm_node, userdefined_node])
userdefined_node.children_node.append(communication_node) userdefined_node.children_node.append(communication_node)
userdefined_node.runtime_node.append(reduce_all_launchkernel0) userdefined_node.runtime_node.append(allreduce_launchkernel0)
reduce_all_launchkernel0.device_node.append(nccl_reduce_all_kernel0) allreduce_launchkernel0.device_node.append(nccl_allreduce_kernel0)
communication_node.children_node.append(reduce_all_op1) communication_node.children_node.append(allreduce_op1)
reduce_all_op1.children_node.append(reduce_all_op1_infershape) allreduce_op1.children_node.append(allreduce_op1_infershape)
reduce_all_op1.runtime_node.append(reduce_all_launchkernel1) allreduce_op1.runtime_node.append(allreduce_launchkernel1)
reduce_all_launchkernel1.device_node.append(nccl_reduce_all_kernel1) allreduce_launchkernel1.device_node.append(nccl_allreduce_kernel1)
conv2d_node.children_node.extend( conv2d_node.children_node.extend(
[conv2d_infer_shape, conv2d_compute, conv2d_MemCpy]) [conv2d_infer_shape, conv2d_compute, conv2d_MemCpy])
conv2d_compute.runtime_node.append(conv2d_launchkernel) conv2d_compute.runtime_node.append(conv2d_launchkernel)
...@@ -350,10 +352,10 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -350,10 +352,10 @@ class TestProfilerStatistic(unittest.TestCase):
sync_batch_norm_MemCpy.runtime_node.append(sync_batch_norm_cudaMemCpy) sync_batch_norm_MemCpy.runtime_node.append(sync_batch_norm_cudaMemCpy)
sync_batch_norm_launchkernel.device_node.append(sync_batch_norm_kernel) sync_batch_norm_launchkernel.device_node.append(sync_batch_norm_kernel)
sync_batch_norm_cudaMemCpy.device_node.append(sync_batch_norm_memcpy) sync_batch_norm_cudaMemCpy.device_node.append(sync_batch_norm_memcpy)
optimization_node.children_node.append(reduce_all_node2) optimization_node.children_node.append(allreduce_node2)
reduce_all_node2.children_node.append(reduce_all_node2_infershape) allreduce_node2.children_node.append(allreduce_node2_infershape)
reduce_all_node2.runtime_node.append(reduce_all_launchkernel2) allreduce_node2.runtime_node.append(allreduce_launchkernel2)
reduce_all_launchkernel2.device_node.append(nccl_reduce_all_kernel2) allreduce_launchkernel2.device_node.append(nccl_allreduce_kernel2)
thread_tree = {'thread1001': root_node} thread_tree = {'thread1001': root_node}
extra_info = { extra_info = {
'Process Cpu Utilization': '1.02', 'Process Cpu Utilization': '1.02',
...@@ -415,20 +417,22 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -415,20 +417,22 @@ class TestProfilerStatistic(unittest.TestCase):
distributed_summary.overlap_range), 85) distributed_summary.overlap_range), 85)
self.assertEqual(len(event_summary.items), 4) self.assertEqual(len(event_summary.items), 4)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 3) self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 100)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].gpu_time, 315) event_summary.model_perspective_items['Forward'].general_gpu_time,
315)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Backward'].gpu_time, 0) event_summary.model_perspective_items['Backward'].general_gpu_time,
0)
self.assertEqual( self.assertEqual(
event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15)
self.assertEqual( self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy']
event_summary.memory_manipulation_items['AsyncMemcpy'].gpu_time, 60) .general_gpu_time, 60)
print( print(
profiler.profiler_statistic._build_table( profiler.profiler_statistic._build_table(
statistic_data, statistic_data,
......
...@@ -18,6 +18,8 @@ import datetime ...@@ -18,6 +18,8 @@ import datetime
from enum import Enum from enum import Enum
from typing import Any, Callable, Iterable, Optional, Union from typing import Any, Callable, Iterable, Optional, Union
from warnings import warn from warnings import warn
import importlib
import json
import paddle import paddle
from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions, from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions,
...@@ -741,3 +743,73 @@ class Profiler: ...@@ -741,3 +743,73 @@ class Profiler:
op_detail=op_detail, op_detail=op_detail,
thread_sep=thread_sep, thread_sep=thread_sep,
time_unit=time_unit)) time_unit=time_unit))
def get_profiler(config_path):
try:
with open(config_path, 'r') as filehandle:
config_dict = json.load(filehandle)
except Exception as e:
print('Load config file for profiler error: {}'.format(e))
print('Use default parameters instead.')
return Profiler()
translated_config_dict = {}
if "targets" in config_dict:
try:
translated_config_dict['targets'] = []
for target in config_dict['targets']:
if target.lower() == "cpu":
translated_config_dict['targets'].append(ProfilerTarget.CPU)
elif target.lower() == 'gpu':
translated_config_dict['targets'].append(ProfilerTarget.GPU)
except:
print('Set targets parameter error, use default parameter instead.')
translated_config_dict['targets'] = None
if "scheduler" in config_dict:
try:
if isinstance(config_dict['scheduler'], dict):
for key, value in config_dict['scheduler'].items():
module_path = value['module']
use_direct = value['use_direct']
module = importlib.import_module(module_path)
method = getattr(module, key)
if not use_direct:
translated_config_dict['scheduler'] = method(
*value['args'], **value['kwargs'])
else:
translated_config_dict['scheduler'] = method
else:
translated_config_dict['scheduler'] = [
config_dict['scheduler'][0], config_dict['scheduler'][1]
]
except:
print(
'Set scheduler parameter error, use default parameter instead.')
translated_config_dict['scheduler'] = None
if "on_trace_ready" in config_dict:
try:
if isinstance(config_dict['on_trace_ready'], dict):
for key, value in config_dict['on_trace_ready'].items():
module_path = value['module']
use_direct = value['use_direct']
module = importlib.import_module(module_path)
method = getattr(module, key)
if not use_direct:
translated_config_dict['on_trace_ready'] = method(
*value['args'], **value['kwargs'])
else:
translated_config_dict['on_trace_ready'] = method
except:
print(
'Set on_trace_ready parameter error, use default parameter instead.'
)
translated_config_dict['on_trace_ready'] = None
if "timer_only" in config_dict:
if isinstance(config_dict['timer_only'], bool):
translated_config_dict['timer_only'] = config_dict['timer_only']
else:
print(
'Set timer_only parameter error, use default parameter instead.')
return Profiler(**translated_config_dict)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册