From 2a8f0a75beba4bedb24df7394a5f2dd8da8595d4 Mon Sep 17 00:00:00 2001 From: kswang Date: Fri, 19 Jun 2020 17:05:41 +0800 Subject: [PATCH] fix review code --- .../device/ascend/ascend_kernel_runtime.cc | 69 ++++++++++--------- .../device/ascend/ascend_stream_assign.cc | 17 +---- .../ascend/profiling/profiling_manager.cc | 5 -- .../ccsrc/device/cpu/cpu_device_address.cc | 2 +- .../ccsrc/device/cpu/cpu_kernel_runtime.cc | 28 +++++--- .../ccsrc/device/kernel_runtime_manager.cc | 5 +- .../cpu/embedding_look_up_cpu_kernel.cc | 47 +------------ mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc | 4 +- 8 files changed, 62 insertions(+), 115 deletions(-) diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 0333c796f..bcf8dbbf4 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -52,6 +52,38 @@ namespace mindspore { namespace device { namespace ascend { static const size_t PRAMATER_OUTPUT_INDEX = 0; +namespace { +std::string GetRankId() { + std::string rank_id_str; +#ifdef ENABLE_MPI + auto mpi_config_ptr = MpiConfig::GetInstance(); + MS_EXCEPTION_IF_NULL(mpi_config_ptr); + if (mpi_config_ptr->enable_mpi()) { + int rank_id = device::cpu::MPIAdapter::Instance().GetRankId(); + const char *offset = std::getenv("RANK_OFFSET"); + if (offset != nullptr) { + try { + int rank_offset = std::stoi(offset); + rank_id += rank_offset; + } catch (std::invalid_argument) { + MS_LOG(EXCEPTION) << "stoi invalid argument:" << offset; + } catch (std::out_of_range) { + MS_LOG(EXCEPTION) << "stoi out_of_range:" << offset; + } + } + rank_id_str = std::to_string(rank_id); + } else { + rank_id_str = std::getenv("RANK_ID"); + } +#else + rank_id_str = std::getenv("RANK_ID"); +#endif + if (rank_id_str.empty()) { + MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; + } + return rank_id_str; +} +} // namespace AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } @@ -497,7 +529,6 @@ bool AscendKernelRuntime::HcclInit() { if (!context_ptr->IsTsdOpened()) { MS_LOG(EXCEPTION) << "Hccl dependent tsd is not open"; } - MS_LOG(INFO) << "do hcom init"; auto config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH"); if (config_path_str == nullptr) { @@ -507,44 +538,14 @@ bool AscendKernelRuntime::HcclInit() { } return false; } + std::string rank_id_str = GetRankId(); auto full_path = realpath(config_path_str, nullptr); if (full_path == nullptr) { MS_LOG(ERROR) << "file path " << config_path_str << " does not exist"; return false; } - const char *identify = nullptr; -#ifdef ENABLE_MPI - std::string rank_id_tmp; - auto mpi_config_ptr = MpiConfig::GetInstance(); - MS_EXCEPTION_IF_NULL(mpi_config_ptr); - if (mpi_config_ptr->enable_mpi()) { - int rank_id = device::cpu::MPIAdapter::Instance().GetRankId(); - const char *offset = std::getenv("RANK_OFFSET"); - if (offset != nullptr) { - try { - int rank_offset = std::stoi(offset); - rank_id += rank_offset; - } catch (std::invalid_argument) { - MS_LOG(EXCEPTION) << "stoi invalid argument:" << offset; - } catch (std::out_of_range) { - MS_LOG(EXCEPTION) << "stoi out_of_range:" << offset; - } - } - rank_id_tmp = std::to_string(rank_id); - identify = rank_id_tmp.c_str(); - } else { - identify = std::getenv("RANK_ID"); - } -#else - identify = std::getenv("RANK_ID"); -#endif - if (identify == nullptr) { - MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; - free(full_path); - return false; - } - MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify; - hcclResult_t res = hcom_init(full_path, identify); + MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << rank_id_str; + hcclResult_t res = hcom_init(full_path, rank_id_str.c_str()); free(full_path); if (res != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom init failed, res is " << static_cast(res); diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc index 10d98856e..d637cfb90 100644 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc @@ -303,15 +303,12 @@ void AscendStreamAssign::InsertSendRecvForDiffHcom(const shared_ptrset_execution_order(orders); total_event_num_ = cur_event_id; - MS_LOG(INFO) << "after indsert between allreduce, total event nums[" << total_event_num_ << "]"; - MS_LOG(INFO) << "end"; + MS_LOG(INFO) << "after indsert between allreduce, total event nums[" << total_event_num_ << "]\n end"; } void AscendStreamAssign::InsertSendRecvForHcomParallel(const shared_ptr &graph_ptr) { @@ -826,7 +818,6 @@ void AscendStreamAssign::ReorderIndependentOrders(const shared_ptr exe_orders; std::vector independents; std::vector others; - auto cnode_ptr_list = graph_ptr->execution_order(); MS_LOG(INFO) << "before reorder, graph orders size:" << cnode_ptr_list.size(); for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { @@ -838,19 +829,16 @@ void AscendStreamAssign::ReorderIndependentOrders(const shared_ptrset_execution_order(exe_orders); return; } - if (independents.empty()) { std::copy(others.begin(), others.end(), std::back_inserter(exe_orders)); graph_ptr->set_execution_order(exe_orders); return; } - std::vector processed; for (size_t i = 0; i < others.size(); i++) { auto begin = others.begin() + i; @@ -862,7 +850,6 @@ void AscendStreamAssign::ReorderIndependentOrders(const shared_ptrset_execution_order(exe_orders); } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc index 3951028e3..fec1aac68 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc @@ -121,7 +121,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { MS_LOG(ERROR) << "Register profiling Engine failed."; return false; } - auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); const string prof_options_str = context->profiling_options(); @@ -130,7 +129,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!"; return true; } - // current one docker only use one device` Json p_device; // JOBID @@ -149,7 +147,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { // only one device, but sProfMgrStartUp API require for device list Json devices; devices[0] = p_device; - Json startCfg; startCfg["startCfg"] = devices; @@ -157,9 +154,7 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { std::stringstream ss; ss << startCfg; std::string cfg = ss.str(); - MS_LOG(INFO) << "profiling config " << cfg; - auto ret = rtProfilerStart(); if (ret != RT_ERROR_NONE) { MS_LOG(INFO) << "Call rtProfilerStart failed, ret:" << ret; diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/device/cpu/cpu_device_address.cc index 7e808d6d3..09ab0da12 100644 --- a/mindspore/ccsrc/device/cpu/cpu_device_address.cc +++ b/mindspore/ccsrc/device/cpu/cpu_device_address.cc @@ -33,7 +33,7 @@ bool CPUDeviceAddress::SyncDeviceToHost(const std::vector & /*shape*/, size } if (type == type_id_) { - auto ret_code = memcpy_s(host_ptr, size, ptr_, size); + auto ret_code = memcpy_s(host_ptr, size, ptr_, size_); if (ret_code != EOK) { MS_LOG(ERROR) << "Failed to copy tensor!"; return false; diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc index 63b474904..640607b9e 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc @@ -34,6 +34,23 @@ namespace mindspore { namespace device { namespace cpu { const size_t INIT_NODE_REF = 1; +namespace { +TypeId GetCPUSupportOutputTypeId(const TypeId type_id) { + TypeId support_type_id = type_id; + if (type_id == kNumberTypeUInt32) { + support_type_id = kNumberTypeInt32; + } + if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || + type_id == kNumberTypeFloat64) { + support_type_id = kNumberTypeFloat32; + } + if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) { + MS_LOG(EXCEPTION) << "Check output type failed."; + } + return support_type_id; +} +} // namespace + void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { AssignValueNodeAddress(kernel_graph); AssignInputNodeAddress(kernel_graph); @@ -149,16 +166,7 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(const AnfNodePtr &input_node, siz std::vector temp_shape; (void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end()); TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index); - if (type_id == kNumberTypeUInt32) { - type_id = kNumberTypeInt32; - } - if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || - type_id == kNumberTypeFloat64) { - type_id = kNumberTypeFloat32; - } - if (type_id != kNumberTypeInt32 && type_id != kNumberTypeFloat32) { - MS_LOG(EXCEPTION) << "Check output type failed."; - } + type_id = GetCPUSupportOutputTypeId(type_id); tensor::TensorPtr tensor = std::make_shared(type_id, temp_shape); MS_EXCEPTION_IF_NULL(tensor); if (address->ref_count_ > 0 && address->ptr_ != nullptr) { diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.cc b/mindspore/ccsrc/device/kernel_runtime_manager.cc index ca6f386b5..0f95f3e79 100644 --- a/mindspore/ccsrc/device/kernel_runtime_manager.cc +++ b/mindspore/ccsrc/device/kernel_runtime_manager.cc @@ -54,8 +54,9 @@ KernelRuntime *KernelRuntimeManager::GetSingleKernelRuntime(const std::string &d return runtime_iter->second.get(); } else if (runtime_map_.size() > 0) { auto cur_runtime_key = runtime_map_.begin()->first; - if (cur_runtime_key.rfind('_') != std::string::npos) { - auto cur_device_id = cur_runtime_key.substr(cur_runtime_key.rfind('_') + 1); + auto find_pos = cur_runtime_key.rfind('_'); + if (find_pos != std::string::npos) { + auto cur_device_id = cur_runtime_key.substr(find_pos + 1); MS_LOG(EXCEPTION) << "Can't change device id in runtime, already set device id: " << cur_device_id << ", set device id: " << device_id << " failed"; } diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc index 0393b86d7..e91b5d810 100644 --- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc @@ -24,50 +24,32 @@ namespace mindspore { namespace kernel { void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); - input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); input_lens_ = 1; for (auto shape : input_shape_) { - MS_LOG(INFO) << "input shape: " << shape; input_lens_ = input_lens_ * shape; } - MS_LOG(INFO) << "input lens: " << input_lens_; - indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); indices_lens_ = 1; for (auto shape : indices_shape_) { - MS_LOG(INFO) << "indice shape: " << shape; indices_lens_ = indices_lens_ * shape; } - MS_LOG(INFO) << "indice lens: " << indices_lens_; - output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); - for (auto shape : output_shape_) { - MS_LOG(INFO) << "output shape: " << shape; - } - auto output_type = AnfAlgo::GetOutputInferDataType(kernel_node, 0); - MS_LOG(INFO) << "output type: " << output_type; - axis_ = 4 - input_shape_.size(); - MS_LOG(INFO) << "axis_: " << axis_; reduce_scatter_flag_ = AnfAlgo::GetNodeAttr(kernel_node, "reduce_scatter_flag"); - MS_LOG(INFO) << "reduce_scatter_flag: " << reduce_scatter_flag_; #ifdef ENABLE_MPI if (reduce_scatter_flag_) { size_t gatherv2_out_lens = 1; for (int i = 0; i < SizeToInt(input_shape_.size()); i++) { if (i == 0) { for (int j = 0; j < SizeToInt(indices_shape_.size()); j++) { - MS_LOG(DEBUG) << "gatherv2 out shape: " << indices_shape_[j]; gatherv2_out_lens = gatherv2_out_lens * indices_shape_[j]; } } else { - MS_LOG(DEBUG) << "gatherv2 out shape: " << input_shape_[i]; gatherv2_out_lens = gatherv2_out_lens * input_shape_[i]; } } gatherv2_out_lens_ = gatherv2_out_lens * sizeof(float); - MS_LOG(INFO) << "gatherv2 out lens: " << gatherv2_out_lens_; gather_v2_out_ = malloc(gatherv2_out_lens_); if (gather_v2_out_ == nullptr) { MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel malloc failed, malloc lens: " << gatherv2_out_lens_; @@ -76,9 +58,7 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { if (ret != 0) { MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel memset gatherv2 out buff failed"; } - split_num_ = AnfAlgo::GetNodeAttr(kernel_node, "split_num"); - MS_LOG(INFO) << "split_num: " << split_num_; } #else if (reduce_scatter_flag_) { @@ -86,7 +66,6 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { } #endif offset_ = AnfAlgo::GetNodeAttr(kernel_node, "offset"); - MS_LOG(INFO) << "offset: " << offset_; CPUKernelUtils::ExpandDimsTo4(&input_shape_); CPUKernelUtils::ExpandDimsTo4(&output_shape_); } @@ -94,21 +73,11 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { -#if defined(_WIN32) || defined(_WIN64) - auto start_time = std::chrono::steady_clock::now(); -#else - struct timeval start_time, end_time; - (void)gettimeofday(&start_time, nullptr); -#endif auto output_addr = reinterpret_cast(outputs[0]->addr); - MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << outputs[0]->size; float *gather_out_addr = reduce_scatter_flag_ ? reinterpret_cast(gather_v2_out_) : output_addr; - MS_LOG(DEBUG) << "gatherv2 out addr: " << gather_out_addr; - size_t dim0 = input_shape_[0]; size_t dim1 = input_shape_[1]; size_t dim2 = input_shape_[2]; - if (axis_ == 3) { for (size_t i = 0; i < dim0; ++i) { for (size_t j = 0; j < dim1; ++j) { @@ -130,7 +99,6 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inp } else if (axis_ == 0) { LookUpTable(inputs, 0, 0, 0, &gather_out_addr); } - #ifdef ENABLE_MPI if (reduce_scatter_flag_) { size_t one_split_lens = gatherv2_out_lens_ / split_num_ / sizeof(float); @@ -143,21 +111,10 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inp } } #endif - -#if defined(_WIN32) || defined(_WIN64) - auto end_time = std::chrono::steady_clock::now(); - std::chrono::duration> cost = end_time - start_time; - MS_LOG(INFO) << "EmbeddingLookUpCPUKernel, used time: " << cost.count() << " us"; -#else - (void)gettimeofday(&end_time, nullptr); - uint64_t time = 1000000 * static_cast(end_time.tv_sec - start_time.tv_sec); - time += static_cast(end_time.tv_usec - start_time.tv_usec); - MS_LOG(INFO) << "EmbeddingLookUpCPUKernel, used time: " << time << " us"; -#endif return true; } -void LookUpTable_task(float *input_addr, float *output_addr, int *indices_addr, size_t indices_lens, size_t num, +void LookUpTable_task(const float *input_addr, float *output_addr, int *indices_addr, size_t indices_lens, size_t num, size_t dim0, size_t dim1, size_t dim2, int offset, size_t axis, std::vector input_shape, size_t input_lens) { size_t lens = num * sizeof(float); @@ -182,7 +139,6 @@ void LookUpTable_task(float *input_addr, float *output_addr, int *indices_addr, if (ret != EOK) { MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; } - } else { auto ret = memset_s(output_addr, lens, 0, lens); if (ret != EOK) { @@ -204,6 +160,7 @@ void LookUpTable_task(float *input_addr, float *output_addr, int *indices_addr, output_addr += num; } } + void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector &inputs, size_t dim0, size_t dim1, size_t dim2, float **output_addr) { auto input_addr = reinterpret_cast(inputs[0]->addr); diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc index 9fe36fba8..543f0e5cd 100644 --- a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc @@ -30,7 +30,7 @@ void SubCPUKernel::InitKernel(const CNodePtr &kernel_node) { } } -void sub_task(int *in_addr, int *out_addr, size_t lens, int offset) { +void sub_task(const int *in_addr, int *out_addr, size_t lens, int offset) { for (size_t i = 0; i < lens; i++) { out_addr[i] = in_addr[i] - offset; } @@ -55,7 +55,7 @@ bool SubCPUKernel::Launch(const std::vector &inputs, output_addr[i] = input_addr[i] - offset_; } } else { - size_t thread_num = 4; + const size_t thread_num = 4; std::thread threads[4]; size_t process_lens = (lens + thread_num - 1) / thread_num; size_t process_offset = 0; -- GitLab