diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 64e46e789b40629aef3a895525e6640b7e993c91..b4461740f7843383a3740c1182402b6eaf02c50b 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1386,7 +1386,7 @@ static std::string GenerateGradNodeCreationContent( "%s" " {\n" " paddle::platform::RecordEvent node_creation_record_event(\"%s\", " - "paddle::platform::TracerEventType::Operator, 1);\n" + "paddle::platform::TracerEventType::OperatorInner, 1);\n" "%s" " if(require_any_grad) {\n" " VLOG(6) << \" Construct Grad for %s \"; \n" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py index 2aa44de8497731285aa85adecaee4c125e1fa066..f358e4d332d6455c86f1c07d96b4bdd9d2bc80e9 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py @@ -111,7 +111,7 @@ PARSE_PYTHON_C_ARGS_TEMPLATE = \ RECORD_EVENT_TEMPLATE = \ -"paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::Operator, 1);" +"paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::UserDefined, 1);" RETURN_INPLACE_PYOBJECT_TEMPLATE = \ diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index c4797029abf3c292fe737a870be6eb98a6686c88..3206b9e7cfac0ed9f9a92095b0b9ba8e80a15dc8 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -851,7 +851,7 @@ void Backward( bool retain_graph) { VLOG(3) << "Run in Backward"; paddle::platform::RecordEvent backward_record_event( - "backward", paddle::platform::TracerEventType::Operator, 1); + "backward", paddle::platform::TracerEventType::UserDefined, 1); RunBackward(tensors, grad_tensors, retain_graph); phi::autotune::AutoTuneStatus::Instance().Update(); } diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 8d6165e53766a044bcec9c0551c5af0527b97f88..15aa320123921252a878247b0510759d13642ddf 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -603,7 +603,7 @@ void ChromeTracingLogger::StartLog() { std::string( R"JSON( { - "id": %d, "name": "%s", "totalGlobalMem": %u, + "id": %d, "name": "%s", "totalGlobalMem": %llu, "computeMajor": %d, "computeMinor": %d, "maxThreadsPerBlock": %d, "maxThreadsPerMultiprocessor": %d, "regsPerBlock": %d, "regsPerMultiprocessor": %d, "warpSize": %d, @@ -633,7 +633,7 @@ void ChromeTracingLogger::StartLog() { std::string( R"JSON( { - "id": %d, "name": "%s", "totalGlobalMem": %u, + "id": %d, "name": "%s", "totalGlobalMem": %llu, "computeMajor": %d, "computeMinor": %d, "maxThreadsPerBlock": %d, "maxThreadsPerMultiprocessor": %d, "regsPerBlock": %d, "regsPerMultiprocessor": %d, "warpSize": %d, diff --git a/python/paddle/fluid/tests/unittests/test_profiler_statistic.py b/python/paddle/fluid/tests/unittests/test_profiler_statistic.py index 6481e0f825df1c291bef2b6f651a94ca5e7563c0..f6269f548216fd73a5ff24772860915cf7e0e0a7 100644 --- a/python/paddle/fluid/tests/unittests/test_profiler_statistic.py +++ b/python/paddle/fluid/tests/unittests/test_profiler_statistic.py @@ -82,9 +82,9 @@ class TestProfilerStatistic(unittest.TestCase): profiler.TracerEventType.Forward, 50, 110, 1000, 1001) - userdefined_node = HostPythonNode('Communication Time', - profiler.TracerEventType.UserDefined, - 100, 110, 1000, 1001) + userdefined_node = HostPythonNode( + 'Communication Time', profiler.TracerEventType.PythonUserDefined, + 100, 110, 1000, 1001) communication_node = HostPythonNode( 'Communication', profiler.TracerEventType.Communication, 105, 110, @@ -209,7 +209,7 @@ class TestProfilerStatistic(unittest.TestCase): 0, profiler.TracerEventType.Memcpy), 60) self.assertEqual( time_range_summary.get_cpu_range_sum( - profiler.TracerEventType.UserDefined), 25) + profiler.TracerEventType.UserDefined), 15) self.assertEqual( time_range_summary.get_cpu_range_sum( profiler.TracerEventType.Communication), 5) @@ -277,9 +277,9 @@ class TestProfilerStatistic(unittest.TestCase): profiler.TracerEventType.Forward, 50, 110, 1000, 1001) - userdefined_node = HostPythonNode('Communication Time', - profiler.TracerEventType.UserDefined, - 100, 110, 1000, 1001) + userdefined_node = HostPythonNode( + 'Communication Time', profiler.TracerEventType.PythonUserDefined, + 100, 110, 1000, 1001) allreduce_launchkernel0 = HostPythonNode( 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 102, 104, 1000, 1001) @@ -451,7 +451,7 @@ class TestProfilerStatistic(unittest.TestCase): 0, profiler.TracerEventType.Memcpy), 60) self.assertEqual( time_range_summary.get_cpu_range_sum( - profiler.TracerEventType.UserDefined), 25) + profiler.TracerEventType.UserDefined), 15) self.assertEqual( time_range_summary.get_cpu_range_sum( profiler.TracerEventType.Communication), 5) @@ -518,9 +518,9 @@ class TestProfilerStatistic(unittest.TestCase): optimization_node = HostPythonNode( 'Optimization', profiler.TracerEventType.Optimization, 220, 300, 1000, 1001) - userdefined_node = HostPythonNode('Communication Time', - profiler.TracerEventType.UserDefined, - 60, 70, 1000, 1001) + userdefined_node = HostPythonNode( + 'Communication Time', profiler.TracerEventType.PythonUserDefined, + 60, 70, 1000, 1001) conv2d_node = HostPythonNode('conv2d', profiler.TracerEventType.Operator, 25, 25, diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py index c1dd9c48fb5a16a847c1a3c5694bd16e21891633..63cdafbbf8d5925225c496ed6f2bdfc2977bf6c4 100755 --- a/python/paddle/profiler/profiler_statistic.py +++ b/python/paddle/profiler/profiler_statistic.py @@ -514,7 +514,8 @@ class EventSummary: or 'memset' in host_statistic_node.name.lower(): self.add_memory_manipulation_item(host_statistic_node) else: - self.add_userdefined_item(host_statistic_node) + if host_statistic_node.type == TracerEventType.PythonUserDefined: + self.add_userdefined_item(host_statistic_node) self.add_kernel_item(host_statistic_nodes[0]) for threadid, root_statistic_node in node_statistic_trees.items():