提交 246fc290 编写于 作者: C caifubi 提交者: jojobugfree

clean runtime codex

上级 496ffff3
......@@ -453,25 +453,26 @@ bool AscendKernelRuntime::HcclInit() {
}
MS_LOG(INFO) << "do hcom init";
std::string path;
const char *config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH");
if (config_path_str == nullptr) {
MS_LOG(ERROR) << "get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH";
return false;
}
path = config_path_str;
char fullPath[PATH_MAX] = {0};
if (path.size() > PATH_MAX || realpath(path.c_str(), fullPath) == nullptr) {
MS_LOG(ERROR) << "file " << path << " is not exist";
auto full_path = realpath(config_path_str, nullptr);
if (full_path == nullptr) {
MS_LOG(ERROR) << "file path " << config_path_str << " does not exist";
return false;
}
const char *identify = std::getenv("RANK_ID");
if (identify == nullptr) {
MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID";
free(full_path);
return false;
}
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << fullPath << ", RANK_ID: " << identify;
hcclResult_t res = hcom_init(fullPath, identify);
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify;
hcclResult_t res = hcom_init(full_path, identify);
free(full_path);
if (res != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom init failed, res is " << static_cast<int>(res);
return false;
......
......@@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END";
std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
uint32_t ProfilingUtils::custom_node_index_ = 1;
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr) {
MS_LOG(INFO) << "get env start";
custom_node_index_ = 1;
auto &cnode_exec_order = graph_ptr->execution_order();
......
......@@ -94,7 +94,7 @@ class ProfilingUtils {
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
static ProfilingTraceInfo GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr);
// Insert two profiling trace points, one in front and one behind
static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
......
......@@ -121,8 +121,10 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs);
}
std::vector<TaskInfoPtr> task_info_ptrs = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod)
->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod);
MS_EXCEPTION_IF_NULL(ascend_kernel_mod);
std::vector<TaskInfoPtr> task_info_ptrs =
ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end());
return true;
}
......
......@@ -136,7 +136,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<hcclDataTyp
}
}
if (total_size % type_size != 0) {
if (type_size == 0 || total_size % type_size != 0) {
MS_LOG(ERROR) << "Total_size[" << total_size << "],Type_size[" << type_size << "] != 0, fail!";
return false;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册