!1701 fix several bugs

Merge pull request !1701 from fary86/fix_gpu_submodule_name_error

!1701 fix several bugs
Merge pull request !1701 from fary86/fix_gpu_submodule_name_error
0a27c46e · mindspore-ci-bot · Gitee · 6fe2ff31 · 54ccab29 · 0a27c46e
11 changed file
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -49,6 +49,7 @@ if(ENABLE_GPU)

    set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
    string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
    cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
    set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
 endif ()

--- a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-gnn OBJECT
    graph.cc
    graph_loader.cc

--- a/mindspore/ccsrc/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/text/CMakeLists.txt
 add_subdirectory(kernels)

+file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
        vocab.cc
        )

--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/device/CMakeLists.txt
@@ -20,25 +20,28 @@ endif ()
 if (ENABLE_GPU)
    file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu")

+    set(GPU_QUEUE_SRCS "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    set(GPU_COLLECTIVE_SRCS "gpu/distribution/collective_wrapper.cc"
+                            "gpu/distribution/mpi_wrapper.cc"
+                            "gpu/distribution/nccl_wrapper.cc")
+
    # gpu_queue
-    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
-    add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    list(REMOVE_ITEM CUDA_SRC_LIST ${GPU_QUEUE_SRCS})
+    set_property(SOURCE ${GPU_QUEUE_SRCS} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
+    add_library(gpu_queue SHARED ${GPU_QUEUE_SRCS})
    target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)

-    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc"
-                                   "gpu/distribution/collective_wrapper.cc"
-                                   "gpu/distribution/mpi_wrapper.cc"
-                                   "gpu/distribution/nccl_wrapper.cc"
-    )
+    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" ${GPU_COLLECTIVE_SRCS})

    if (ENABLE_MPI)
        include(ExternalProject)
        # gpu_collective
-        add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc"
-                                          "gpu/distribution/mpi_wrapper.cc"
-                                          "gpu/distribution/nccl_wrapper.cc"
-        )
+        set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
+            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
+        add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
        # _ms_mpi
+        set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
+            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
        pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)

--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/onnx/onnx_exporter.cc
@@ -411,6 +411,8 @@ void OnnxExporter::InitModelInfo() {
 void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {
  std::map<AnfNodePtr, size_t> node_map;

+  MS_LOG(INFO) << "Begin exporting onnx model for graph " << func_graph->ToString();
+
  onnx_node_index_ = func_graph->parameters().size();

  // set graph name
@@ -423,6 +425,8 @@ void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphPr

  // export computational nodes and output nodes
  ExportNodes(func_graph, &node_map, graph_proto);
+
+  MS_LOG(INFO) << "End exporting onnx model for graph " << func_graph->ToString();
 }

 void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {

--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -373,7 +373,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
    p_actions = GePipeline();
  }

-  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, p_actions);
+  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));

  // get the parameters items and add the value to args_spec
  abstract::AbstractBasePtrList args_spec;
@@ -407,6 +407,22 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
  return true;
 }

+std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
+  // phase does not contain 'export_onnx'
+  if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
+    return actions;
+  }
+  MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
+  std::vector<ActionItem> filtered_actions;
+  for (const auto &item : actions) {
+    filtered_actions.emplace_back(item);
+    if (item.first == "validate") {
+      break;
+    }
+  }
+  return filtered_actions;
+}
+
 void ExecutorPy::ReleaseResource(const py::object &phase) {
  ResourcePtr res = GetResource(py::cast<std::string>(phase));
  if (res != nullptr) {

--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@@ -102,6 +102,9 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
  void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
  bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
  void GetGeBackendPolicy() const;
+  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
+  // 'validate' stage
+  static std::vector<ActionItem> FilterActions(const std::vector<ActionItem> &actions, const std::string &phase);

  std::map<std::string, ExecutorInfoPtr> info_;
  static std::shared_ptr<ExecutorPy> executor_;

--- a/mindspore/ccsrc/pipeline/validator.cc
+++ b/mindspore/ccsrc/pipeline/validator.cc
@@ -62,12 +62,12 @@ void ValidateOperation(const AnfNodePtr &node) {

 void ValidateAbstract(const AnfNodePtr &node) {
  if (node == nullptr) {
-    MS_LOG(WARNING) << "Node to validate is invalid";
+    MS_LOG(DEBUG) << "Node to validate is invalid";
    return;
  }
  AbstractBasePtr ptrBase = node->abstract();
  if (ptrBase == nullptr) {
-    MS_LOG(WARNING) << "Abstract is null in node: " << node->DebugString();
+    MS_LOG(DEBUG) << "Abstract is null in node: " << node->DebugString();
    return;
  }
  if (ptrBase->isa<AbstractClass>() || ptrBase->isa<AbstractJTagged>()) {
@@ -88,7 +88,7 @@ void ValidateAbstract(const AnfNodePtr &node) {
  }
  if (ptrBase->isa<AbstractError>()) {
    // NOTICE: validate dead code?
-    MS_LOG(WARNING) << "AbstractError in the graph: " << ptrBase->ToString();
+    MS_LOG(DEBUG) << "AbstractError in the graph: " << ptrBase->ToString();
    return;
  }


--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -640,7 +640,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
    // if name not in params_, create a node in graph
    if (node_itor == params_.end()) {
      MS_LOG(WARNING) << name << " is not in params, and create a new node.";
-      ParameterPtr param = anf_graph_->add_parameter();
+      ParameterPtr param = std::make_shared<Parameter>(nullptr);
      name = name + "_temp";
      param->set_name(name);
      (void)ConvertParameter(param);

--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -412,9 +412,9 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple
    if (params.empty()) {
      MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0";
    }
-    if (args.size() != params.size()) {
-      MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size()
-                        << ", let graph to be executed.";
+    if ((args.size() + func_graph->hyper_param_count()) != params.size()) {
+      MS_LOG(EXCEPTION) << "Input size " << args.size() << " add Parameter count " << func_graph->hyper_param_count()
+                        << " not equal to graph input size " << params.size() << ", let graph to be executed.";
    }

    auto it = std::find(params.begin(), params.end(), output);

--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -420,6 +420,8 @@ def export(net, *inputs, file_name, file_format='GEIR'):
        _executor.compile(net, *inputs, phase='export')
        _executor.export(net, file_name, file_format)
    elif file_format == 'ONNX':  # file_format is 'ONNX'
+        # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
+        #         do not change it to other values.
        phase_name = 'export_onnx'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id)