diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 7960a08938c8c05a13481fa5518ce416ba8667ba..fdfff96fde2d7e03c95cc9532a1954a4ab27a355 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -73,9 +73,45 @@ void ProfilingUtils::GetTraceHccl(const std::vector &cnode_exec_order, std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exec_order) { const char *trace_begin = std::getenv(kFpStartNode); - auto &first_cnode = cnode_exec_order.front(); - MS_EXCEPTION_IF_NULL(first_cnode); - return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); + if (trace_begin != nullptr) { + return std::string(trace_begin); + } + + std::string fp_start_str = ""; + std::set getnext_outputs; + GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs)); + if (getnext_outputs.empty()) { + auto first_node = cnode_exec_order.front(); + MS_EXCEPTION_IF_NULL(first_node); + fp_start_str = first_node->fullname_with_scope(); + } else { + for (auto &cnode : cnode_exec_order) { + if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) { + fp_start_str = cnode->fullname_with_scope(); + break; + } + } + } + return fp_start_str; +} + +void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order, + NotNull *> getnext_outputs) { + for (auto cnode : cnode_exec_order) { + for (auto input : cnode->inputs()) { + auto prev_cnode = AnfAlgo::VisitKernel(input, 0); + if (!prev_cnode.first->isa()) { + continue; + } + if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) { + getnext_outputs->insert(cnode->fullname_with_scope()); + MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope(); + } + } + } + if (getnext_outputs->empty()) { + MS_LOG(WARNING) << "GetNext not found"; + } } std::string ProfilingUtils::GetTraceBpEnd(const std::vector &cnode_exec_order) { @@ -158,17 +194,22 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node NotNull *> kernel_list) { if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin; - auto job_id = ProfilingManager::GetInstance().GetJobId(); - ProfilingContent job_profiling_context = {false, job_id, 0}; - auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); - kernel_list->emplace_back(job_profiling_node); - + ProfilingTraceJobId(anf_node, graph_ptr, kernel_list); ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); kernel_list->emplace_back(fp_profiling_node); } } +void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull graph_ptr, + NotNull *> kernel_list) { + MS_LOG(INFO) << "Profiling Match start"; + auto job_id = ProfilingManager::GetInstance().GetJobId(); + ProfilingContent job_profiling_context = {false, job_id, 0}; + auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); + kernel_list->emplace_back(job_profiling_node); +} + CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node, const ProfilingContent &profiling_content, NotNull graph_ptr) { diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index f9f08c9d3f5aa14d7b9a6e9f5c39bf72f7669b73..1f7815b320adb504c2ad7b347ecf9828254293b1 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -65,6 +65,9 @@ class ProfilingUtils { NotNull graph_ptr, NotNull *> kernel_list); + static void ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull graph_ptr, + NotNull *> kernel_list); + // Insert net output profiling node, which tells the device to stop profiling. // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host. static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, @@ -113,6 +116,8 @@ class ProfilingUtils { static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); static void GetTraceHccl(const std::vector &cnode_exec_order, NotNull profiling_trace); + static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order, + NotNull *> getnext_outputs); // graph id --> (kernel name list) static std::unordered_map> graph_kernel_name_; diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index b557436db94b6c8a65ca0724f6502d3ac4a16aa0..e8f38aa33979e1930273b4737c83e353360b656f 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -464,10 +464,13 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra } std::vector new_cnode_list; std::vector cnode_ptr_list = kernel_graph_ptr->execution_order(); + if (cnode_ptr_list.empty()) { + MS_LOG(ERROR) << "No CNode in graph"; + return; + } for (const auto &cnode_ptr : cnode_ptr_list) { ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); new_cnode_list.emplace_back(cnode_ptr); - ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));