diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index a7513699facfdc991ceb00e6534673b85a1c5812..1e1c650239654ee415825a020be6a89a93ebfb4d 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -49,6 +49,7 @@ if(ENABLE_GPU)
 
     set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
     string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
     cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
     set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
 endif ()
diff --git a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
index d7a295e32a8beb7cce348cdc2619e9b50fa2e37c..401fce6d11879e3c3d2022f8e38d78839098256b 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
@@ -1,3 +1,5 @@
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-gnn OBJECT
     graph.cc
     graph_loader.cc
diff --git a/mindspore/ccsrc/dataset/text/CMakeLists.txt b/mindspore/ccsrc/dataset/text/CMakeLists.txt
index 08620458c7b40b7bfee40c8a27c571bda1d786e4..605b2644b76540d5ec425b5cb60c7be05564c159 100644
--- a/mindspore/ccsrc/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/text/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_subdirectory(kernels)
 
+file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
         vocab.cc
         )
diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/device/CMakeLists.txt
index fba0b20711fec2ccc6a27d2481dbbcde75199323..2ade0f0ef3c98761ad4bd331296766742de3b041 100644
--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/device/CMakeLists.txt
@@ -20,25 +20,28 @@ endif ()
 if (ENABLE_GPU)
     file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu")
 
+    set(GPU_QUEUE_SRCS "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    set(GPU_COLLECTIVE_SRCS "gpu/distribution/collective_wrapper.cc"
+                            "gpu/distribution/mpi_wrapper.cc"
+                            "gpu/distribution/nccl_wrapper.cc")
+
     # gpu_queue
-    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
-    add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    list(REMOVE_ITEM CUDA_SRC_LIST ${GPU_QUEUE_SRCS})
+    set_property(SOURCE ${GPU_QUEUE_SRCS} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
+    add_library(gpu_queue SHARED ${GPU_QUEUE_SRCS})
     target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)
 
-    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc"
-                                   "gpu/distribution/collective_wrapper.cc"
-                                   "gpu/distribution/mpi_wrapper.cc"
-                                   "gpu/distribution/nccl_wrapper.cc"
-    )
+    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" ${GPU_COLLECTIVE_SRCS})
 
     if (ENABLE_MPI)
         include(ExternalProject)
         # gpu_collective
-        add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc"
-                                          "gpu/distribution/mpi_wrapper.cc"
-                                          "gpu/distribution/nccl_wrapper.cc"
-        )
+        set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
+            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
+        add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
         # _ms_mpi
+        set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
+            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
         pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
         target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
         target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/onnx/onnx_exporter.cc
index a0c8de75afe6ee081cb1f537ce5b52425f3107dc..2a038bbf1a19c6b6f156616815c95d1777326dd5 100644
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/onnx/onnx_exporter.cc
@@ -411,6 +411,8 @@ void OnnxExporter::InitModelInfo() {
 void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {
   std::map<AnfNodePtr, size_t> node_map;
 
+  MS_LOG(INFO) << "Begin exporting onnx model for graph " << func_graph->ToString();
+
   onnx_node_index_ = func_graph->parameters().size();
 
   // set graph name
@@ -423,6 +425,8 @@ void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphPr
 
   // export computational nodes and output nodes
   ExportNodes(func_graph, &node_map, graph_proto);
+
+  MS_LOG(INFO) << "End exporting onnx model for graph " << func_graph->ToString();
 }
 
 void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index a996e763e5ee4fac911f257d872b106116ddbf54..103477363f10b4e9d9ff910219a0a58b79d87c78 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -373,7 +373,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
     p_actions = GePipeline();
   }
 
-  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, p_actions);
+  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));
 
   // get the parameters items and add the value to args_spec
   abstract::AbstractBasePtrList args_spec;
@@ -407,6 +407,22 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   return true;
 }
 
+std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
+  // phase does not contain 'export_onnx'
+  if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
+    return actions;
+  }
+  MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
+  std::vector<ActionItem> filtered_actions;
+  for (const auto &item : actions) {
+    filtered_actions.emplace_back(item);
+    if (item.first == "validate") {
+      break;
+    }
+  }
+  return filtered_actions;
+}
+
 void ExecutorPy::ReleaseResource(const py::object &phase) {
   ResourcePtr res = GetResource(py::cast<std::string>(phase));
   if (res != nullptr) {
diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h
index 22d6a3ee6070fd401ecaaac8f9d8fd42c024cfb3..81d0e1a9f4b5d98315dcf137dabc0b3e8b63d4cc 100644
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@@ -102,6 +102,9 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
   void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
   bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
   void GetGeBackendPolicy() const;
+  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
+  // 'validate' stage
+  static std::vector<ActionItem> FilterActions(const std::vector<ActionItem> &actions, const std::string &phase);
 
   std::map<std::string, ExecutorInfoPtr> info_;
   static std::shared_ptr<ExecutorPy> executor_;
diff --git a/mindspore/ccsrc/pipeline/validator.cc b/mindspore/ccsrc/pipeline/validator.cc
index 73a54bb1807aa27e5efa1a541772ac3dddce86b3..4866d43b93035782f6dd8fb319fbe8a23d323f86 100644
--- a/mindspore/ccsrc/pipeline/validator.cc
+++ b/mindspore/ccsrc/pipeline/validator.cc
@@ -62,12 +62,12 @@ void ValidateOperation(const AnfNodePtr &node) {
 
 void ValidateAbstract(const AnfNodePtr &node) {
   if (node == nullptr) {
-    MS_LOG(WARNING) << "Node to validate is invalid";
+    MS_LOG(DEBUG) << "Node to validate is invalid";
     return;
   }
   AbstractBasePtr ptrBase = node->abstract();
   if (ptrBase == nullptr) {
-    MS_LOG(WARNING) << "Abstract is null in node: " << node->DebugString();
+    MS_LOG(DEBUG) << "Abstract is null in node: " << node->DebugString();
     return;
   }
   if (ptrBase->isa<AbstractClass>() || ptrBase->isa<AbstractJTagged>()) {
@@ -88,7 +88,7 @@ void ValidateAbstract(const AnfNodePtr &node) {
   }
   if (ptrBase->isa<AbstractError>()) {
     // NOTICE: validate dead code?
-    MS_LOG(WARNING) << "AbstractError in the graph: " << ptrBase->ToString();
+    MS_LOG(DEBUG) << "AbstractError in the graph: " << ptrBase->ToString();
     return;
   }
 
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index aee0654c45894de65e581ab7f7fb2c92e44302fb..1450572e4bbb1094584d4f6a42c27c3858fcc529 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -640,7 +640,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
     // if name not in params_, create a node in graph
     if (node_itor == params_.end()) {
       MS_LOG(WARNING) << name << " is not in params, and create a new node.";
-      ParameterPtr param = anf_graph_->add_parameter();
+      ParameterPtr param = std::make_shared<Parameter>(nullptr);
       name = name + "_temp";
       param->set_name(name);
       (void)ConvertParameter(param);
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index 9cad14da33e27d806988ebffdd7b7558e89fa1e8..411a15c971b8063a8fa256e39f8059fd63c7fdbc 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -412,9 +412,9 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple
     if (params.empty()) {
       MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0";
     }
-    if (args.size() != params.size()) {
-      MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size()
-                        << ", let graph to be executed.";
+    if ((args.size() + func_graph->hyper_param_count()) != params.size()) {
+      MS_LOG(EXCEPTION) << "Input size " << args.size() << " add Parameter count " << func_graph->hyper_param_count()
+                        << " not equal to graph input size " << params.size() << ", let graph to be executed.";
     }
 
     auto it = std::find(params.begin(), params.end(), output);
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index 4e6e67e32bc2bdcf5e955c94161d4e166c06c8aa..502f00572f24ddeac16c280915e3af9727ff2496 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -420,6 +420,8 @@ def export(net, *inputs, file_name, file_format='GEIR'):
         _executor.compile(net, *inputs, phase='export')
         _executor.export(net, file_name, file_format)
     elif file_format == 'ONNX':  # file_format is 'ONNX'
+        # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
+        #         do not change it to other values.
         phase_name = 'export_onnx'
         graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
         onnx_stream = _executor._get_func_graph_proto(graph_id)