diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc index cbecb3030d5d20481516f8e1e63b36aa26fa7707..a3934093348da3f5760b6dbcfac18862e6aa3470 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc @@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() { int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) { if (plugin != nullptr) { delete plugin; + plugin = nullptr; } return 0; } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc index fec1aac685f87a583589caf18e456c989fc6d262..a2fe5b852df193452bb6ac699d9586d4c0657572 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc @@ -15,11 +15,8 @@ */ #include "device/ascend/profiling/profiling_manager.h" - #include #include - -#include #include "securec/include/securec.h" #include "./prof_mgr_core.h" #include "device/ascend/profiling/plugin_impl.h" @@ -30,9 +27,6 @@ #include "utils/convert_utils.h" #include "runtime/base.h" -using std::vector; -using Json = nlohmann::json; - namespace mindspore { namespace device { namespace ascend { @@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); const string prof_options_str = context->profiling_options(); - vector opts = Split(prof_options_str, ':'); + std::vector opts = Split(prof_options_str, ':'); if (opts.empty()) { MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!"; return true; } // current one docker only use one device` - Json p_device; + nlohmann::json p_device; // JOBID auto job_id = GetJobId(); p_device["jobID"] = std::to_string(job_id); // device_id p_device["deviceID"] = std::to_string(device_id); // features:'training_trace', 'task_trace' etc - Json features; - for (vector::size_type i = 0; i < opts.size(); i++) { - Json f; + nlohmann::json features; + for (std::vector::size_type i = 0; i < opts.size(); i++) { + nlohmann::json f; f["name"] = opts[i]; features[i] = f; } p_device["features"] = features; // only one device, but sProfMgrStartUp API require for device list - Json devices; + nlohmann::json devices; devices[0] = p_device; - Json startCfg; + nlohmann::json startCfg; startCfg["startCfg"] = devices; + if (!ProfStartUp(NOT_NULL(&startCfg))) { + MS_LOG(ERROR) << "ProfMgrStartUp failed."; + return false; + } + return true; +} + +bool ProfilingManager::ProfStartUp(NotNull startCfg) { // convert json to string std::stringstream ss; - ss << startCfg; + ss << *startCfg; std::string cfg = ss.str(); MS_LOG(INFO) << "profiling config " << cfg; auto ret = rtProfilerStart(); diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h index c30c6898ea7ce0b670ba4168d85974862ed61d75..05b524899669ce74ff37ec20b555c59fba6bce69 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h @@ -20,18 +20,15 @@ #include #include #include +#include +#include "utils/contract.h" #include "utils/context/ms_context.h" + using std::map; using std::string; - namespace mindspore { namespace device { namespace ascend { -// PROFILING_CUSTOM_LOGID_START 3 -const uint64_t kProfilingFpStartLogId = 1; -const uint64_t kProfilingBpEndLogId = 2; -const uint64_t kProfilingIterEndLogId = 255; - class ProfilingEngineImpl; class ProfilingManager { public: @@ -52,6 +49,7 @@ class ProfilingManager { ~ProfilingManager() { prof_handle_ = nullptr; } private: + bool ProfStartUp(NotNull json); std::shared_ptr engine_0_; uint32_t device_id_; void *prof_handle_; diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 131a22805d0ec8774342a64d14259e2dc2c72a45..17ac4c4530267efc20274692437615a62da69033 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; constexpr char kFpStartNode[] = "PROFILING_FP_START"; constexpr char kBpEndNode[] = "PROFILING_BP_END"; constexpr char kIterEndNode[] = "PROFILING_ITER_END"; +// PROFILING_CUSTOM_LOGID_START 3 +constexpr uint64_t kProfilingFpStartLogId = 1; +constexpr uint64_t kProfilingBpEndLogId = 2; +constexpr uint64_t kProfilingIterEndLogId = 255; std::map> ProfilingUtils::graph_profiling_cnode_; std::map> ProfilingUtils::graph_kernel_name_; std::map>> ProfilingUtils::graph_point_; diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc b/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc index 603dd989e5ea25e347c9eee7c3fd3201606b5f97..3faeefb820f7446194a972a615d4210a42a20056 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc @@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info if (task_info->hccl_type() == kBroadcastOpName) { // call hcom broadcast interface to run op const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0); - ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast(task_info->input_data_addr()), - static_cast(task_info->count()), static_cast(task_info->data_type()), - static_cast(task_info->root_id()), hccl_group.c_str(), stream); + ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast(task_info->count()), + static_cast(task_info->data_type()), static_cast(task_info->root_id()), + hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast(ret); return false; @@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info } else if (task_info->hccl_type() == kAllGatherOpName) { // call hcom allgather interface to run op const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0); - ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast(task_info->input_data_addr()), - reinterpret_cast(task_info->output_data_addr()), static_cast(task_info->count()), - static_cast(task_info->data_type()), hccl_group.c_str(), stream); + ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), + static_cast(task_info->count()), static_cast(task_info->data_type()), + hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret; return false; @@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info } else if (task_info->hccl_type() == kAllReduceOpName) { // call hcom allreduce interface to run op const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0); - ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast(task_info->input_data_addr()), - reinterpret_cast(task_info->output_data_addr()), static_cast(task_info->count()), - static_cast(task_info->data_type()), + ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), + static_cast(task_info->count()), static_cast(task_info->data_type()), static_cast(task_info->op_type()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret; @@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info // call hcom reducescatter interface to run op const string tag_reduce_scatter = kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0); - ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast(task_info->input_data_addr()), - reinterpret_cast(task_info->output_data_addr()), + ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), static_cast(task_info->count()), static_cast(task_info->data_type()), static_cast(task_info->op_type()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index 0cdf7518016f7149e1a423ffe6df57343e6c2131..e026459ae9770ee6ebd4be5913a26a41e061ab31 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -40,39 +40,46 @@ bool TaskGenerator::GenTasks(const std::vector &anf_node_list, std::ve return true; } +void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { + MS_EXCEPTION_IF_NULL(anf_node_ptr); + MS_EXCEPTION_IF_NULL(kernel_inputs); + // akg process + // set atomic clean addr + if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { + auto clean_output_indexs = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAtomicOutputIndexs); + auto graph = anf_node_ptr->func_graph(); + MS_EXCEPTION_IF_NULL(graph); + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto node_users = manager->node_users(); + if (node_users[anf_node_ptr].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; + } + auto depend_node = node_users[anf_node_ptr].pop().first; + if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { + MS_LOG(EXCEPTION) << "Checking Depend node failed"; + } + if (node_users[depend_node].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; + } + auto post_node = node_users[depend_node].pop().first; + for (auto index : clean_output_indexs) { + auto device_address = AnfAlgo::GetOutputAddr(post_node, index); + kernel::AddressPtr input = std::make_shared(); + MS_EXCEPTION_IF_NULL(input); + input->addr = device_address->ptr_; + input->size = device_address->size_; + kernel_inputs->push_back(input); + } + MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); + } +} + void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { MS_EXCEPTION_IF_NULL(anf_node_ptr); + MS_EXCEPTION_IF_NULL(kernel_inputs); if (anf_node_ptr->inputs().size() != 2) { - // akg process - // set atomic clean addr - if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { - auto clean_output_indexs = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAtomicOutputIndexs); - auto graph = anf_node_ptr->func_graph(); - MS_EXCEPTION_IF_NULL(graph); - auto manager = graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - auto node_users = manager->node_users(); - if (node_users[anf_node_ptr].empty()) { - MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; - } - auto depend_node = node_users[anf_node_ptr].pop().first; - if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { - MS_LOG(EXCEPTION) << "Checking Depend node failed"; - } - if (node_users[depend_node].empty()) { - MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; - } - auto post_node = node_users[depend_node].pop().first; - for (auto index : clean_output_indexs) { - auto device_address = AnfAlgo::GetOutputAddr(post_node, index); - kernel::AddressPtr input = std::make_shared(); - input->addr = device_address->ptr_; - MS_EXCEPTION_IF_NULL(input->addr); - input->size = device_address->size_; - kernel_inputs->push_back(input); - } - MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); - } + LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs); return; } MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]); diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h b/mindspore/ccsrc/device/ascend/tasksink/task_generator.h index ffedcd7930c81c783f3159d20118ab986f929820..ecd5889b04def775ce601022b088b9cbc2dbde54 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.h @@ -48,6 +48,7 @@ class TaskGenerator { private: static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs); + static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs); static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector *task_info_list); static bool LaunchAllKernel(const std::vector &anf_node_list, std::vector *task_info_list, uint32_t graph_id); diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc index cfcc1b7c796587300ab6941df3761bf425222e51..cdb848add263fe79ac6d595dfaca755525f618c4 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc @@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph std::vector data_shape = tensor->shape(); size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies()); DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32); + MS_EXCEPTION_IF_NULL(address); if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { address->ptr_ = tensor->data_c(false); } else {