未验证 提交 f43af275 编写于 作者: C chenjian 提交者: GitHub

Refine statistic table (#41524)

上级 14dba636
...@@ -185,20 +185,22 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -185,20 +185,22 @@ class TestProfilerStatistic(unittest.TestCase):
profiler.TracerEventType.Communication), 5) profiler.TracerEventType.Communication), 5)
self.assertEqual(len(event_summary.items), 2) self.assertEqual(len(event_summary.items), 2)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 3) self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 100)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].gpu_time, 135) event_summary.model_perspective_items['Forward'].general_gpu_time,
135)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Backward'].gpu_time, 0) event_summary.model_perspective_items['Backward'].general_gpu_time,
0)
self.assertEqual( self.assertEqual(
event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15)
self.assertEqual( self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy']
event_summary.memory_manipulation_items['AsyncMemcpy'].gpu_time, 60) .general_gpu_time, 60)
print( print(
profiler.profiler_statistic._build_table( profiler.profiler_statistic._build_table(
statistic_data, statistic_data,
...@@ -226,31 +228,31 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -226,31 +228,31 @@ class TestProfilerStatistic(unittest.TestCase):
userdefined_node = HostPythonNode('Communication Time', userdefined_node = HostPythonNode('Communication Time',
profiler.TracerEventType.UserDefined, profiler.TracerEventType.UserDefined,
100, 110, 1000, 1001) 100, 110, 1000, 1001)
reduce_all_launchkernel0 = HostPythonNode( allreduce_launchkernel0 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 102, 104, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 102, 104,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel0 = DevicePythonNode( nccl_allreduce_kernel0 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 105, 120, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 105, 120,
0, 0, 2) 0, 0, 2)
communication_node = HostPythonNode( communication_node = HostPythonNode(
'Communication', profiler.TracerEventType.Communication, 105, 110, 'Communication', profiler.TracerEventType.Communication, 105, 110,
1000, 1001) 1000, 1001)
reduce_all_op1 = HostPythonNode('reduce_all_op1', allreduce_op1 = HostPythonNode('allreduce_op1',
profiler.TracerEventType.Operator, 105, profiler.TracerEventType.Operator, 105,
108, 1000, 1001) 108, 1000, 1001)
reduce_all_op1_infershape = HostPythonNode( allreduce_op1_infershape = HostPythonNode(
'reduce_all_op1::infershape', 'allreduce_op1::infershape', profiler.TracerEventType.OperatorInner,
profiler.TracerEventType.OperatorInner, 105, 106, 1000, 1001) 105, 106, 1000, 1001)
reduce_all_launchkernel1 = HostPythonNode( allreduce_launchkernel1 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 106, 107, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 106, 107,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel1 = DevicePythonNode( nccl_allreduce_kernel1 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 130, 150, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 130, 150,
0, 0, 2) 0, 0, 2)
backward_node = HostPythonNode('Gradient Backward', backward_node = HostPythonNode('Gradient Backward',
...@@ -305,19 +307,19 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -305,19 +307,19 @@ class TestProfilerStatistic(unittest.TestCase):
'sync_batch_norm_memcpy', profiler.TracerEventType.Memcpy, 150, 200, 'sync_batch_norm_memcpy', profiler.TracerEventType.Memcpy, 150, 200,
0, 0, 1) 0, 0, 1)
reduce_all_node2 = HostPythonNode('reduce_all', allreduce_node2 = HostPythonNode('allreduce',
profiler.TracerEventType.Operator, profiler.TracerEventType.Operator, 230,
230, 250, 1000, 1001) 250, 1000, 1001)
reduce_all_node2_infershape = HostPythonNode( allreduce_node2_infershape = HostPythonNode(
'reduce_all_node2::infershape', 'allreduce_node2::infershape',
profiler.TracerEventType.OperatorInner, 231, 232, 1000, 1001) profiler.TracerEventType.OperatorInner, 231, 232, 1000, 1001)
reduce_all_launchkernel2 = HostPythonNode( allreduce_launchkernel2 = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 235, 240, 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 235, 240,
1000, 1001) 1000, 1001)
nccl_reduce_all_kernel2 = DevicePythonNode( nccl_allreduce_kernel2 = DevicePythonNode(
'nccl_reduce_all_kernel', profiler.TracerEventType.Kernel, 250, 280, 'nccl_allreduce_kernel', profiler.TracerEventType.Kernel, 250, 280,
0, 0, 2) 0, 0, 2)
root_node.children_node.append(profilerstep_node) root_node.children_node.append(profilerstep_node)
...@@ -329,12 +331,12 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -329,12 +331,12 @@ class TestProfilerStatistic(unittest.TestCase):
yolonet_node.children_node.extend( yolonet_node.children_node.extend(
[sync_batch_norm_node, userdefined_node]) [sync_batch_norm_node, userdefined_node])
userdefined_node.children_node.append(communication_node) userdefined_node.children_node.append(communication_node)
userdefined_node.runtime_node.append(reduce_all_launchkernel0) userdefined_node.runtime_node.append(allreduce_launchkernel0)
reduce_all_launchkernel0.device_node.append(nccl_reduce_all_kernel0) allreduce_launchkernel0.device_node.append(nccl_allreduce_kernel0)
communication_node.children_node.append(reduce_all_op1) communication_node.children_node.append(allreduce_op1)
reduce_all_op1.children_node.append(reduce_all_op1_infershape) allreduce_op1.children_node.append(allreduce_op1_infershape)
reduce_all_op1.runtime_node.append(reduce_all_launchkernel1) allreduce_op1.runtime_node.append(allreduce_launchkernel1)
reduce_all_launchkernel1.device_node.append(nccl_reduce_all_kernel1) allreduce_launchkernel1.device_node.append(nccl_allreduce_kernel1)
conv2d_node.children_node.extend( conv2d_node.children_node.extend(
[conv2d_infer_shape, conv2d_compute, conv2d_MemCpy]) [conv2d_infer_shape, conv2d_compute, conv2d_MemCpy])
conv2d_compute.runtime_node.append(conv2d_launchkernel) conv2d_compute.runtime_node.append(conv2d_launchkernel)
...@@ -350,10 +352,10 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -350,10 +352,10 @@ class TestProfilerStatistic(unittest.TestCase):
sync_batch_norm_MemCpy.runtime_node.append(sync_batch_norm_cudaMemCpy) sync_batch_norm_MemCpy.runtime_node.append(sync_batch_norm_cudaMemCpy)
sync_batch_norm_launchkernel.device_node.append(sync_batch_norm_kernel) sync_batch_norm_launchkernel.device_node.append(sync_batch_norm_kernel)
sync_batch_norm_cudaMemCpy.device_node.append(sync_batch_norm_memcpy) sync_batch_norm_cudaMemCpy.device_node.append(sync_batch_norm_memcpy)
optimization_node.children_node.append(reduce_all_node2) optimization_node.children_node.append(allreduce_node2)
reduce_all_node2.children_node.append(reduce_all_node2_infershape) allreduce_node2.children_node.append(allreduce_node2_infershape)
reduce_all_node2.runtime_node.append(reduce_all_launchkernel2) allreduce_node2.runtime_node.append(allreduce_launchkernel2)
reduce_all_launchkernel2.device_node.append(nccl_reduce_all_kernel2) allreduce_launchkernel2.device_node.append(nccl_allreduce_kernel2)
thread_tree = {'thread1001': root_node} thread_tree = {'thread1001': root_node}
extra_info = { extra_info = {
'Process Cpu Utilization': '1.02', 'Process Cpu Utilization': '1.02',
...@@ -415,20 +417,22 @@ class TestProfilerStatistic(unittest.TestCase): ...@@ -415,20 +417,22 @@ class TestProfilerStatistic(unittest.TestCase):
distributed_summary.overlap_range), 85) distributed_summary.overlap_range), 85)
self.assertEqual(len(event_summary.items), 4) self.assertEqual(len(event_summary.items), 4)
self.assertEqual(len(event_summary.userdefined_items), 1) self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 3) self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.memory_manipulation_items), 1) self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15) self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].gpu_time, 25) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100) event_summary.model_perspective_items['Forward'].cpu_time, 100)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Forward'].gpu_time, 315) event_summary.model_perspective_items['Forward'].general_gpu_time,
315)
self.assertEqual( self.assertEqual(
event_summary.model_perspective_items['Backward'].gpu_time, 0) event_summary.model_perspective_items['Backward'].general_gpu_time,
0)
self.assertEqual( self.assertEqual(
event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15)
self.assertEqual( self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy']
event_summary.memory_manipulation_items['AsyncMemcpy'].gpu_time, 60) .general_gpu_time, 60)
print( print(
profiler.profiler_statistic._build_table( profiler.profiler_statistic._build_table(
statistic_data, statistic_data,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册