diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index c1478915b7cdcf75340980a71d18c0fe4903664d..1f87bf7bfa1bfb301e1a5b18f05e61c2dd2b6af4 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END"; std::unordered_map> ProfilingUtils::graph_kernel_name_; uint32_t ProfilingUtils::custom_node_index_ = 1; -ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(const NotNull graph_ptr) { +ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { MS_LOG(INFO) << "get env start"; custom_node_index_ = 1; auto &cnode_exec_order = graph_ptr->execution_order(); @@ -148,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector &cnode_exe } if (bp_end_str.empty()) { - auto last_cnode = cnode_exec_order.back(); - MS_EXCEPTION_IF_NULL(last_cnode); - bp_end_str = last_cnode->fullname_with_scope(); + bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order); } return bp_end_str; } +std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector &cnode_exec_order) { + std::string last_tbe_kernel_name = ""; + // find last tbe_kernel + for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) { + if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) { + last_tbe_kernel_name = (*iter)->fullname_with_scope(); + break; + } + } + if (last_tbe_kernel_name.empty()) { + MS_LOG(WARNING) << "tbe kernel not found in graph"; + } + return last_tbe_kernel_name; +} + std::string ProfilingUtils::GetTraceNetoutput(const std::vector &cnode_exec_order) { const char *trace_netoutput = std::getenv(kIterEndNode); - auto &last_cnode = cnode_exec_order.back(); - MS_EXCEPTION_IF_NULL(last_cnode); - return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); + return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput); } NotNull ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index 99245b2c57f6793c1ee30fff27a0770cc24cedd3..59909c1f2f889f4cae057d7bc4e7ef30dfe7ec0c 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -94,7 +94,7 @@ class ProfilingUtils { // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode' // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode' // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption. - static ProfilingTraceInfo GetProfilingTraceFromEnv(const NotNull graph_ptr); + static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull graph_ptr); // Insert two profiling trace points, one in front and one behind static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, @@ -114,6 +114,7 @@ class ProfilingUtils { static std::string GetTraceBegin(const std::vector &cnode_exec_order); static std::string GetTraceBpEnd(const std::vector &cnode_exec_order); static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); + static std::string GetGraphLastTbeKernelName(const std::vector &cnode_exec_order); static void GetTraceHccl(const std::vector &cnode_exec_order, NotNull profiling_trace); static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order,