提交 0cef4d8a 编写于 作者: C caifubi

set fp_start profiling point after getnext node

上级 235c6997
......@@ -73,9 +73,45 @@ void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
const char *trace_begin = std::getenv(kFpStartNode);
auto &first_cnode = cnode_exec_order.front();
MS_EXCEPTION_IF_NULL(first_cnode);
return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin);
if (trace_begin != nullptr) {
return std::string(trace_begin);
}
std::string fp_start_str = "";
std::set<std::string> getnext_outputs;
GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs));
if (getnext_outputs.empty()) {
auto first_node = cnode_exec_order.front();
MS_EXCEPTION_IF_NULL(first_node);
fp_start_str = first_node->fullname_with_scope();
} else {
for (auto &cnode : cnode_exec_order) {
if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) {
fp_start_str = cnode->fullname_with_scope();
break;
}
}
}
return fp_start_str;
}
void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
NotNull<std::set<std::string> *> getnext_outputs) {
for (auto cnode : cnode_exec_order) {
for (auto input : cnode->inputs()) {
auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
if (!prev_cnode.first->isa<CNode>()) {
continue;
}
if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) {
getnext_outputs->insert(cnode->fullname_with_scope());
MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope();
}
}
}
if (getnext_outputs->empty()) {
MS_LOG(WARNING) << "GetNext not found";
}
}
std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
......@@ -158,17 +194,22 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin;
auto job_id = ProfilingManager::GetInstance().GetJobId();
ProfilingContent job_profiling_context = {false, job_id, 0};
auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
kernel_list->emplace_back(job_profiling_node);
ProfilingTraceJobId(anf_node, graph_ptr, kernel_list);
ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
kernel_list->emplace_back(fp_profiling_node);
}
}
void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list) {
MS_LOG(INFO) << "Profiling Match start";
auto job_id = ProfilingManager::GetInstance().GetJobId();
ProfilingContent job_profiling_context = {false, job_id, 0};
auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
kernel_list->emplace_back(job_profiling_node);
}
CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
const ProfilingContent &profiling_content,
NotNull<session::KernelGraph *> graph_ptr) {
......
......@@ -65,6 +65,9 @@ class ProfilingUtils {
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list);
static void ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list);
// Insert net output profiling node, which tells the device to stop profiling.
// The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
......@@ -113,6 +116,8 @@ class ProfilingUtils {
static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
NotNull<ProfilingTraceInfo *> profiling_trace);
static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
NotNull<std::set<std::string> *> getnext_outputs);
// graph id --> (kernel name list)
static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
......
......@@ -464,10 +464,13 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra
}
std::vector<CNodePtr> new_cnode_list;
std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order();
if (cnode_ptr_list.empty()) {
MS_LOG(ERROR) << "No CNode in graph";
return;
}
for (const auto &cnode_ptr : cnode_ptr_list) {
ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
new_cnode_list.emplace_back(cnode_ptr);
ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册