From effdb483d6166f1440fe6c1d00302b3847ad994c Mon Sep 17 00:00:00 2001 From: jojobugfree Date: Tue, 7 Apr 2020 19:59:36 +0800 Subject: [PATCH] profiling feature enhancement --- .../device/ascend/ascend_stream_assign.cc | 2 +- .../ascend/profiling/profiling_manager.h | 4 - .../ascend/profiling/profiling_utils.cc | 229 ++++++++---------- .../device/ascend/profiling/profiling_utils.h | 93 ++++--- mindspore/ccsrc/device/kernel_adjust.cc | 31 +-- mindspore/ccsrc/device/kernel_adjust.h | 6 +- .../ascend/ascend_backend_optimization.cc | 2 +- mindspore/ccsrc/session/ascend_session.cc | 2 +- .../tasksink/ascend_stream_assign_stub.cc | 2 +- 9 files changed, 190 insertions(+), 181 deletions(-) diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc index 4f16c596c..8c4d1f4a8 100644 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc @@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr(primitive->GetAttr(kAttrEventId)) << "]"; } else { - MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id[" + MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id[" << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h index de8f6a7d0..b826c4cf3 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h @@ -29,10 +29,6 @@ namespace ascend { // PROFILING_CUSTOM_LOGID_START 3 const uint64_t kProfilingFpStartLogId = 1; const uint64_t kProfilingBpEndLogId = 2; -const uint64_t kProfilingAllReduce1Start = 3; -const uint64_t kProfilingAllReduce1End = 4; -const uint64_t kProfilingAllReduce2Start = 5; -const uint64_t kProfilingAllReduce2End = 6; const uint64_t kProfilingIterEndLogId = 255; class ProfilingEngineImpl; diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 0d7088300..aa71aa056 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -14,10 +14,8 @@ * limitations under the License. */ -#include "device/ascend/profiling/profiling_utils.h" - #include - +#include "device/ascend/profiling/profiling_utils.h" #include "kernel/kernel.h" #include "device/ascend/profiling/profiling_manager.h" #include "session/anf_runtime_algorithm.h" @@ -27,82 +25,61 @@ namespace mindspore { namespace device { namespace ascend { -const char ProfilingUtils::kProfiling[] = "Profiling"; -const char ProfilingUtils::kNotify[] = "notify"; -const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id"; -const char ProfilingUtils::kFlags[] = "flags"; +constexpr uint32_t kMaxProfilingNodeNum = 100; +constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; +constexpr char kFpStartNode[] = "PROFILING_FP_START"; +constexpr char kBpEndNode[] = "PROFILING_BP_END"; +constexpr char kIterEndNode[] = "PROFILING_ITER_END"; std::unordered_map> ProfilingUtils::graph_kernel_name_; -bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr &graph_ptr, - ProfilingTraceInfo *profiling_trace_info) { - MS_EXCEPTION_IF_NULL(profiling_trace_info); - MS_EXCEPTION_IF_NULL(graph_ptr); - bool find_begin = false; - bool first_allreduce = true; - for (const auto &anf_node : graph_ptr->execution_order()) { - if (anf_node->isa()) { - const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node); - if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) { - profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope(); - find_begin = true; - } - if (kernel_name == "Conv2DBackpropFilter") { - profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope(); - } - if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) { - profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope(); - } - if (kernel_name == kAllReduceOpName) { - if (first_allreduce) { - profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope(); - profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope(); - first_allreduce = false; - } else { - profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope(); - profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope(); - } - } +uint32_t ProfilingUtils::custom_node_index_ = 1; + +ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { + MS_LOG(INFO) << "get env start"; + custom_node_index_ = 1; + auto &cnode_exec_order = graph_ptr->execution_order(); + ProfilingTraceInfo profiling_trace; + profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order); + profiling_trace.trace_bp_end = GetTraceBpEnd(); + profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order); + + MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin + << " trace_bp_end:" << profiling_trace.trace_bp_end + << " trace_netoutput:" << profiling_trace.trace_netoutput; + + for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) { + std::string env_str = std::string(kCustomNode) + std::to_string(i); + const char *node_full_name = std::getenv(env_str.c_str()); + if (node_full_name == nullptr) { + break; } + MS_LOG(INFO) << "Get profiling node:" << node_full_name; + profiling_trace.trace_custom_node.insert(node_full_name); } - MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin - << ", net_output:" << profiling_trace_info->profiling_trace_netoutput - << ", end:" << profiling_trace_info->profiling_trace_bp_end - << ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start - << ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start; - return profiling_trace_info->IsValid(); + MS_LOG(INFO) << "get env end"; + return profiling_trace; } -bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(profiling_trace_net_output); - MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope(); - if (!profiling_trace_net_output->empty()) { - MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str(); - return true; - } - - if (AnfAlgo::IsRealKernel(anf_node)) { - *profiling_trace_net_output = anf_node->fullname_with_scope(); - return true; - } +std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exec_order) { + const char *trace_begin = std::getenv(kFpStartNode); + auto &first_cnode = cnode_exec_order.front(); + MS_EXCEPTION_IF_NULL(first_cnode); + return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); +} - auto cnode = anf_node->cast(); - if (cnode == nullptr) { - MS_LOG(ERROR) << "[profiling]Anf node should be a CNode"; - return false; - } +std::string ProfilingUtils::GetTraceBpEnd() { + const char *trace_bp_end = std::getenv(kBpEndNode); + return trace_bp_end == nullptr ? "" : std::string(trace_bp_end); +} - auto inputs = cnode->inputs(); - auto input_size = inputs.size(); - if (input_size < 2) { - MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node."; - return false; - } - return GetNetOutput(inputs[1], profiling_trace_net_output); +std::string ProfilingUtils::GetTraceNetoutput(const std::vector &cnode_exec_order) { + const char *trace_netoutput = std::getenv(kIterEndNode); + auto &last_cnode = cnode_exec_order.back(); + MS_EXCEPTION_IF_NULL(last_cnode); + return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); } -CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr &graph_ptr, bool notify, - uint64_t profiler_trace_id, uint32_t flags) { - MS_EXCEPTION_IF_NULL(graph_ptr); +NotNull ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, + NotNull graph_ptr) { kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder; selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32}); @@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptrset_abstract(type_none_abstract); // set attr - ValuePtr notify_value = MakeValue(notify); - ValuePtr trace_id_value = MakeValue(profiler_trace_id); - ValuePtr flags_value = MakeValue(flags); + ValuePtr notify_value = MakeValue(profiling_content.notify); + ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id); + ValuePtr flags_value = MakeValue(profiling_content.flags); AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr); AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr); AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr); - return cnode_ptr; + return NOT_NULL(cnode_ptr); } -void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr &graph_ptr, - const mindspore::AnfNodePtr &anf_node, - const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, - std::vector *kernel_list) { - if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) { - if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) { - MS_LOG(ERROR) << "[profiling]input param invalid"; - return; - } +void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node, + const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { + if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { auto job_id = ProfilingManager::GetInstance().GetJobId(); - // job task info - CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get()); - // fp task info - CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get()); - kernel_list->emplace_back(job_kernel_ptr); - kernel_list->emplace_back(start_kernel_ptr); + ProfilingContent job_profiling_context = {false, job_id, 0}; + auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); + kernel_list->emplace_back(job_profiling_node); + + ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; + auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); + kernel_list->emplace_back(fp_profiling_node); } } -void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr &graph_ptr, - const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name, - std::vector *kernel_list) { - MS_EXCEPTION_IF_NULL(graph_ptr); +CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node, + const ProfilingContent &profiling_content, + NotNull graph_ptr) { + CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr); + AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get()); + AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get()); + return profiling_node; +} + +void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(kernel_list); - auto full_scope_name = anf_node->fullname_with_scope(); - if (profiling_node_name == full_scope_name) { - CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get()); - kernel_list->emplace_back(allreduce_kernel_ptr); + auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope()); + if (iter == profiling_trace_info.trace_custom_node.end()) { + return; } + // custom op profiling job start from 3. + ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0}; + CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); + kernel_list->insert(kernel_list->end() - 1, front_node); + + ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0}; + CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr); + kernel_list->insert(kernel_list->end(), back_node); + ++custom_node_index_; } -void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr &graph_ptr, - const mindspore::AnfNodePtr &anf_node, - const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, - std::vector *kernel_list) { - MS_EXCEPTION_IF_NULL(graph_ptr); +void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(kernel_list); - if (profiling_trace_info.IsValid()) { - auto full_scope_name = anf_node->fullname_with_scope(); - if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) { - CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get()); - kernel_list->emplace_back(bp_kernel_ptr); - } + if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) { + ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0}; + CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); + kernel_list->emplace_back(bp_end_node); + } +} - if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) { - CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get()); - kernel_list->emplace_back(end_task_info); - } +void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto full_scope_name = anf_node->fullname_with_scope(); + if (profiling_trace_info.trace_netoutput == full_scope_name) { + ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0}; + CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); + kernel_list->emplace_back(bp_kernel_ptr); } } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index ca0ef6f1f..c59e85624 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -19,63 +19,102 @@ #include #include #include +#include #include #include "session/kernel_graph.h" +#include "utils/contract.h" namespace mindspore { namespace device { namespace ascend { struct ProfilingTraceInfo { // execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...) - std::string profiling_trace_begin; + std::string trace_begin; // get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp - std::string profiling_trace_bp_end; + std::string trace_bp_end; // execute order's end execute (like: Conv2DBackpropFilter) - std::string profiling_trace_netoutput; + std::string trace_netoutput; - std::string profiling_allreduce1_start; - - std::string profiling_allreduce1_end; - - std::string profiling_allreduce2_start; - - std::string profiling_allreduce2_end; + // profiling specific op, such as AllReduce; + std::set trace_custom_node; // 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty. // 2. op lanuch get task info with callback func. // 3. insert profiling_trace_bp_end. // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty. - bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); } + bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); } +}; + +struct ProfilingContent { + // true -send data from device to host and finish profiling + bool notify; + uint64_t profiler_trace_id; + uint32_t flags; }; class ProfilingUtils { public: ProfilingUtils() = default; ~ProfilingUtils() = default; - static bool GetProfilingTraceInfo(const std::shared_ptr &graph_ptr, - ProfilingTraceInfo *profiling_trace_info); - static void ProfilingTraceFpStart(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - const ProfilingTraceInfo &profiling_trace_info, std::vector *kernel_list); - static void ProfilingAllReduce(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - int job_id, const std::string &profiling_node_name, - std::vector *kernel_list); - static void ProfilingTraceEnd(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - const ProfilingTraceInfo &profiling_trace_info, std::vector *kernel_list); + + // Insert job_id profiling node and fp_start profiling node. + // Job_id is got from envs, which shound be a number greater than 255 + // Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1. + static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Insert net output profiling node, which tells the device to stop profiling. + // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host. + static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network. + static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Mapping graph id and the kernels' name in the graph static void SetGraphKernelName(uint32_t graph_id, const std::vector &kernel_names); + + // Mapping task_id and kernel name for device to generate the time cost of specific kernel. + // Device calculate the time cost of the task which is marked by task id. + // But we need data of (kernel name , time cost) static void ReportProfilingData(uint32_t graph_id, const std::vector &task_ids); - static const char kProfiling[]; - static const char kNotify[]; - static const char kProfilerTraceId[]; - static const char kFlags[]; + // Get profiling trace point from envs. + // export PROFILING_FP_START='full name of the first cnode to execute' + // export PROFILING_BP_END='full name of the last backpropagation cnode to execute' + // export PROFILING_ITER_END='full name of last cnode in graph to execute' + // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode' + // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode' + // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption. + static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull graph_ptr); + + // Insert two profiling trace points, one in front and one behind + static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + inline static constexpr char kProfiling[] = "Profiling"; + inline static constexpr char kNotify[] = "notify"; + inline static constexpr char kProfilerTraceId[] = "profiler_trace_id"; + inline static constexpr char kFlags[] = "flags"; private: - static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output); - static CNodePtr CreateProfilingCNode(const std::shared_ptr &graph_ptr, bool notify, - uint64_t profiler_trace_id, uint32_t flags); + static NotNull CreateProfilingCNode(const ProfilingContent &profiling_content, + NotNull graph_ptr); + static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content, + NotNull graph_ptr); + static std::string GetTraceBegin(const std::vector &cnode_exec_order); + static std::string GetTraceBpEnd(); + static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); + // graph id --> (kernel name list) static std::unordered_map> graph_kernel_name_; + static uint32_t custom_node_index_; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index c422d50b5..9a6f48025 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector *inputs) { MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; } -void KernelAdjust::Profiling(const std::shared_ptr &kernel_graph_ptr) { +void KernelAdjust::Profiling(NotNull kernel_graph_ptr) { if (!ascend::ProfilingManager::GetInstance().IsProfiling()) { MS_LOG(INFO) << "No need to profiling"; return; } - ProfilingTraceInfo profiling_trace_info; - if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) { - InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info); - } else { - MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed"; + ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr); + if (!profiling_trace_info.IsValid()) { + MS_LOG(WARNING) << "[profiling] no profiling node found!"; + return; } + InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr); } -void KernelAdjust::InsertProfilingKernel(const std::shared_ptr &kernel_graph_ptr, - const ProfilingTraceInfo &profiling_trace_info) { +void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, + NotNull kernel_graph_ptr) { MS_LOG(INFO) << "[profiling] Insert profiling kernel start"; - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); if (!profiling_trace_info.IsValid()) { MS_LOG(WARNING) << "Profiling trace point not found"; return; @@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr new_cnode_list; std::vector cnode_ptr_list = kernel_graph_ptr->execution_order(); for (const auto &cnode_ptr : cnode_ptr_list) { - ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start, - profiling_trace_info.profiling_allreduce1_start, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start, - profiling_trace_info.profiling_allreduce2_start, &new_cnode_list); + ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); new_cnode_list.emplace_back(cnode_ptr); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End, - profiling_trace_info.profiling_allreduce1_end, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End, - profiling_trace_info.profiling_allreduce2_end, &new_cnode_list); - ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); + ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); + ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); + ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); } kernel_graph_ptr->set_execution_order(new_cnode_list); } diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h index 62c64d98b..ca01d51e5 100644 --- a/mindspore/ccsrc/device/kernel_adjust.h +++ b/mindspore/ccsrc/device/kernel_adjust.h @@ -48,7 +48,7 @@ class KernelAdjust { void SetStreamSwitchOps(const std::shared_ptr &kernel_graph_ptr); bool StepLoadCtrlInputs(const std::shared_ptr &context, const std::shared_ptr &kernel_graph_ptr); - void Profiling(const std::shared_ptr &kernel_graph_ptr); + void Profiling(NotNull kernel_graph_ptr); static bool NeedInsertSwitch(); CNodePtr CreateSteamActiveOp(const std::shared_ptr &kernel_graph_ptr); @@ -66,8 +66,8 @@ class KernelAdjust { kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector &formats, const std::vector &type_ids); void LoadSwitchInputs(std::vector *inputs); - void InsertProfilingKernel(const std::shared_ptr &kernel_graph_ptr, - const ProfilingTraceInfo &profiling_trace_info); + void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, + NotNull kernel_graph_ptr); }; } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 8212d64c2..432d88e7a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr &kern kernel_graph->SetExecOrderByDefault(); if (save_graphs) { std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir"; - DumpIR(file_path, kernel_graph); + DumpIR(file_path, kernel_graph, true); DumpIRProto(kernel_graph, "after_hwopt"); } } diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index 9d351f319..34c05aed0 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { // Assign streams for control sink and hccl and so on AssignStream(graph); - device::KernelAdjust::GetInstance().Profiling(graph); + device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get())); // build kernel if node is cnode BuildKernel(graph); auto ms_context = MsContext::GetInstance(); diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc index ebd2ac8b4..e0b5ab0d6 100755 --- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc +++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc @@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr &c return true; } bool KernelAdjust::NeedInsertSwitch() { return true; } -void KernelAdjust::Profiling(const std::shared_ptr &kernel_graph_ptr) { return; } +void KernelAdjust::Profiling(NotNull kernel_graph_ptr) { return; } } // namespace device } // namespace mindspore -- GitLab