提交 effdb483 编写于 作者: J jojobugfree 提交者: jojo

profiling feature enhancement

上级 315036b1
......@@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr<mindspore::session
<< AnfAlgo::GetStreamId(cur_cnode_ptr) << "], event_id["
<< GetValue<uint32_t>(primitive->GetAttr(kAttrEventId)) << "]";
} else {
MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id["
MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id["
<< AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id["
<< AnfAlgo::GetStreamId(cur_cnode_ptr) << "]";
}
......
......@@ -29,10 +29,6 @@ namespace ascend {
// PROFILING_CUSTOM_LOGID_START 3
const uint64_t kProfilingFpStartLogId = 1;
const uint64_t kProfilingBpEndLogId = 2;
const uint64_t kProfilingAllReduce1Start = 3;
const uint64_t kProfilingAllReduce1End = 4;
const uint64_t kProfilingAllReduce2Start = 5;
const uint64_t kProfilingAllReduce2End = 6;
const uint64_t kProfilingIterEndLogId = 255;
class ProfilingEngineImpl;
......
......@@ -14,10 +14,8 @@
* limitations under the License.
*/
#include "device/ascend/profiling/profiling_utils.h"
#include <map>
#include "device/ascend/profiling/profiling_utils.h"
#include "kernel/kernel.h"
#include "device/ascend/profiling/profiling_manager.h"
#include "session/anf_runtime_algorithm.h"
......@@ -27,82 +25,61 @@
namespace mindspore {
namespace device {
namespace ascend {
const char ProfilingUtils::kProfiling[] = "Profiling";
const char ProfilingUtils::kNotify[] = "notify";
const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id";
const char ProfilingUtils::kFlags[] = "flags";
constexpr uint32_t kMaxProfilingNodeNum = 100;
constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
constexpr char kFpStartNode[] = "PROFILING_FP_START";
constexpr char kBpEndNode[] = "PROFILING_BP_END";
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
ProfilingTraceInfo *profiling_trace_info) {
MS_EXCEPTION_IF_NULL(profiling_trace_info);
MS_EXCEPTION_IF_NULL(graph_ptr);
bool find_begin = false;
bool first_allreduce = true;
for (const auto &anf_node : graph_ptr->execution_order()) {
if (anf_node->isa<CNode>()) {
const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node);
if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) {
profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope();
find_begin = true;
}
if (kernel_name == "Conv2DBackpropFilter") {
profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope();
}
if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) {
profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope();
}
if (kernel_name == kAllReduceOpName) {
if (first_allreduce) {
profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope();
profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope();
first_allreduce = false;
} else {
profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope();
profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope();
}
}
uint32_t ProfilingUtils::custom_node_index_ = 1;
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
MS_LOG(INFO) << "get env start";
custom_node_index_ = 1;
auto &cnode_exec_order = graph_ptr->execution_order();
ProfilingTraceInfo profiling_trace;
profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order);
profiling_trace.trace_bp_end = GetTraceBpEnd();
profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order);
MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin
<< " trace_bp_end:" << profiling_trace.trace_bp_end
<< " trace_netoutput:" << profiling_trace.trace_netoutput;
for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
std::string env_str = std::string(kCustomNode) + std::to_string(i);
const char *node_full_name = std::getenv(env_str.c_str());
if (node_full_name == nullptr) {
break;
}
MS_LOG(INFO) << "Get profiling node:" << node_full_name;
profiling_trace.trace_custom_node.insert(node_full_name);
}
MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin
<< ", net_output:" << profiling_trace_info->profiling_trace_netoutput
<< ", end:" << profiling_trace_info->profiling_trace_bp_end
<< ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start
<< ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start;
return profiling_trace_info->IsValid();
MS_LOG(INFO) << "get env end";
return profiling_trace;
}
bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(profiling_trace_net_output);
MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope();
if (!profiling_trace_net_output->empty()) {
MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str();
return true;
}
if (AnfAlgo::IsRealKernel(anf_node)) {
*profiling_trace_net_output = anf_node->fullname_with_scope();
return true;
}
std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
const char *trace_begin = std::getenv(kFpStartNode);
auto &first_cnode = cnode_exec_order.front();
MS_EXCEPTION_IF_NULL(first_cnode);
return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin);
}
auto cnode = anf_node->cast<CNodePtr>();
if (cnode == nullptr) {
MS_LOG(ERROR) << "[profiling]Anf node should be a CNode";
return false;
}
std::string ProfilingUtils::GetTraceBpEnd() {
const char *trace_bp_end = std::getenv(kBpEndNode);
return trace_bp_end == nullptr ? "" : std::string(trace_bp_end);
}
auto inputs = cnode->inputs();
auto input_size = inputs.size();
if (input_size < 2) {
MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node.";
return false;
}
return GetNetOutput(inputs[1], profiling_trace_net_output);
std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
const char *trace_netoutput = std::getenv(kIterEndNode);
auto &last_cnode = cnode_exec_order.back();
MS_EXCEPTION_IF_NULL(last_cnode);
return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput);
}
CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
uint64_t profiler_trace_id, uint32_t flags) {
MS_EXCEPTION_IF_NULL(graph_ptr);
NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
NotNull<session::KernelGraph *> graph_ptr) {
kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder;
selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32});
......@@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::Ker
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get());
cnode_ptr->set_abstract(type_none_abstract);
// set attr
ValuePtr notify_value = MakeValue(notify);
ValuePtr trace_id_value = MakeValue(profiler_trace_id);
ValuePtr flags_value = MakeValue(flags);
ValuePtr notify_value = MakeValue(profiling_content.notify);
ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id);
ValuePtr flags_value = MakeValue(profiling_content.flags);
AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr);
AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr);
AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr);
return cnode_ptr;
return NOT_NULL(cnode_ptr);
}
void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
const mindspore::AnfNodePtr &anf_node,
const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
std::vector<mindspore::CNodePtr> *kernel_list) {
if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) {
if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) {
MS_LOG(ERROR) << "[profiling]input param invalid";
return;
}
void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node,
const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
auto job_id = ProfilingManager::GetInstance().GetJobId();
// job task info
CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get());
// fp task info
CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get());
kernel_list->emplace_back(job_kernel_ptr);
kernel_list->emplace_back(start_kernel_ptr);
ProfilingContent job_profiling_context = {false, job_id, 0};
auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
kernel_list->emplace_back(job_profiling_node);
ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
kernel_list->emplace_back(fp_profiling_node);
}
}
void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr,
const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name,
std::vector<CNodePtr> *kernel_list) {
MS_EXCEPTION_IF_NULL(graph_ptr);
CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
const ProfilingContent &profiling_content,
NotNull<session::KernelGraph *> graph_ptr) {
CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get());
return profiling_node;
}
void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_list);
auto full_scope_name = anf_node->fullname_with_scope();
if (profiling_node_name == full_scope_name) {
CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get());
kernel_list->emplace_back(allreduce_kernel_ptr);
auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
if (iter == profiling_trace_info.trace_custom_node.end()) {
return;
}
// custom op profiling job start from 3.
ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0};
CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
kernel_list->insert(kernel_list->end() - 1, front_node);
ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0};
CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
kernel_list->insert(kernel_list->end(), back_node);
++custom_node_index_;
}
void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
const mindspore::AnfNodePtr &anf_node,
const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
std::vector<mindspore::CNodePtr> *kernel_list) {
MS_EXCEPTION_IF_NULL(graph_ptr);
void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_list);
if (profiling_trace_info.IsValid()) {
auto full_scope_name = anf_node->fullname_with_scope();
if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) {
CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get());
kernel_list->emplace_back(bp_kernel_ptr);
}
if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) {
ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0};
CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
kernel_list->emplace_back(bp_end_node);
}
}
if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) {
CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0);
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get());
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get());
kernel_list->emplace_back(end_task_info);
}
void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
MS_EXCEPTION_IF_NULL(anf_node);
auto full_scope_name = anf_node->fullname_with_scope();
if (profiling_trace_info.trace_netoutput == full_scope_name) {
ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0};
CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
kernel_list->emplace_back(bp_kernel_ptr);
}
}
......
......@@ -19,63 +19,102 @@
#include <memory>
#include <string>
#include <vector>
#include <set>
#include <unordered_map>
#include "session/kernel_graph.h"
#include "utils/contract.h"
namespace mindspore {
namespace device {
namespace ascend {
struct ProfilingTraceInfo {
// execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
std::string profiling_trace_begin;
std::string trace_begin;
// get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
std::string profiling_trace_bp_end;
std::string trace_bp_end;
// execute order's end execute (like: Conv2DBackpropFilter)
std::string profiling_trace_netoutput;
std::string trace_netoutput;
std::string profiling_allreduce1_start;
std::string profiling_allreduce1_end;
std::string profiling_allreduce2_start;
std::string profiling_allreduce2_end;
// profiling specific op, such as AllReduce;
std::set<std::string> trace_custom_node;
// 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
// 2. op lanuch get task info with callback func.
// 3. insert profiling_trace_bp_end.
// 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); }
bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); }
};
struct ProfilingContent {
// true -send data from device to host and finish profiling
bool notify;
uint64_t profiler_trace_id;
uint32_t flags;
};
class ProfilingUtils {
public:
ProfilingUtils() = default;
~ProfilingUtils() = default;
static bool GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
ProfilingTraceInfo *profiling_trace_info);
static void ProfilingTraceFpStart(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
static void ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
int job_id, const std::string &profiling_node_name,
std::vector<CNodePtr> *kernel_list);
static void ProfilingTraceEnd(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
// Insert job_id profiling node and fp_start profiling node.
// Job_id is got from envs, which shound be a number greater than 255
// Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list);
// Insert net output profiling node, which tells the device to stop profiling.
// The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list);
// Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
// Mapping graph id and the kernels' name in the graph
static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
// Mapping task_id and kernel name for device to generate the time cost of specific kernel.
// Device calculate the time cost of the task which is marked by task id.
// But we need data of (kernel name , time cost)
static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids);
static const char kProfiling[];
static const char kNotify[];
static const char kProfilerTraceId[];
static const char kFlags[];
// Get profiling trace point from envs.
// export PROFILING_FP_START='full name of the first cnode to execute'
// export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
// export PROFILING_ITER_END='full name of last cnode in graph to execute'
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
// Insert two profiling trace points, one in front and one behind
static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
inline static constexpr char kProfiling[] = "Profiling";
inline static constexpr char kNotify[] = "notify";
inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
inline static constexpr char kFlags[] = "flags";
private:
static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output);
static CNodePtr CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
uint64_t profiler_trace_id, uint32_t flags);
static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content,
NotNull<session::KernelGraph *> graph_ptr);
static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content,
NotNull<session::KernelGraph *> graph_ptr);
static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
static std::string GetTraceBpEnd();
static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
// graph id --> (kernel name list)
static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
static uint32_t custom_node_index_;
};
} // namespace ascend
} // namespace device
......
......@@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
}
void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) {
if (!ascend::ProfilingManager::GetInstance().IsProfiling()) {
MS_LOG(INFO) << "No need to profiling";
return;
}
ProfilingTraceInfo profiling_trace_info;
if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) {
InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info);
} else {
MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed";
ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr);
if (!profiling_trace_info.IsValid()) {
MS_LOG(WARNING) << "[profiling] no profiling node found!";
return;
}
InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr);
}
void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
const ProfilingTraceInfo &profiling_trace_info) {
void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> kernel_graph_ptr) {
MS_LOG(INFO) << "[profiling] Insert profiling kernel start";
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
if (!profiling_trace_info.IsValid()) {
MS_LOG(WARNING) << "Profiling trace point not found";
return;
......@@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGr
std::vector<CNodePtr> new_cnode_list;
std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order();
for (const auto &cnode_ptr : cnode_ptr_list) {
ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start,
profiling_trace_info.profiling_allreduce1_start, &new_cnode_list);
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start,
profiling_trace_info.profiling_allreduce2_start, &new_cnode_list);
ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
new_cnode_list.emplace_back(cnode_ptr);
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End,
profiling_trace_info.profiling_allreduce1_end, &new_cnode_list);
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End,
profiling_trace_info.profiling_allreduce2_end, &new_cnode_list);
ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
}
kernel_graph_ptr->set_execution_order(new_cnode_list);
}
......
......@@ -48,7 +48,7 @@ class KernelAdjust {
void SetStreamSwitchOps(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
bool StepLoadCtrlInputs(const std::shared_ptr<session::Context> &context,
const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
void Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
static bool NeedInsertSwitch();
CNodePtr CreateSteamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
......@@ -66,8 +66,8 @@ class KernelAdjust {
kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats,
const std::vector<TypeId> &type_ids);
void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs);
void InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
const ProfilingTraceInfo &profiling_trace_info);
void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> kernel_graph_ptr);
};
} // namespace device
} // namespace mindspore
......
......@@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
kernel_graph->SetExecOrderByDefault();
if (save_graphs) {
std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir";
DumpIR(file_path, kernel_graph);
DumpIR(file_path, kernel_graph, true);
DumpIRProto(kernel_graph, "after_hwopt");
}
}
......
......@@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
// Assign streams for control sink and hccl and so on
AssignStream(graph);
device::KernelAdjust::GetInstance().Profiling(graph);
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get()));
// build kernel if node is cnode
BuildKernel(graph);
auto ms_context = MsContext::GetInstance();
......
......@@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::Context> &c
return true;
}
bool KernelAdjust::NeedInsertSwitch() { return true; }
void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { return; }
} // namespace device
} // namespace mindspore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册