!5685 update run for br: master

Merge pull request !5685 from guozhijian/udpate_run_from_c75b100_to_c75b150_master

!5685 update run for br: master
Merge pull request !5685 from guozhijian/udpate_run_from_c75b100_to_c75b150_master
a838c9bd · mindspore-ci-bot · Gitee · b4067700 · bd527a33 · 2dbfefcd
34 changed file
--- a/graphengine @ 2dbfefcd
+++ b/graphengine @ 2dbfefcd
-Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
+Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2
--- a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
@@ -17,8 +17,6 @@ import json
 import os
 import sys
 from te.platform.cce_conf import te_set_version
-from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
-    init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
 from te.platform.fusion_util import fusion_op
 from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version

@@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()

 # op function list
 op_build = "compile"
-op_pre_build = "pre_build"
 fusion_pattern_start_flag = "fusion_pattern_start"
 fusion_pattern_end_flag = "fusion_pattern_end"

@@ -83,19 +80,7 @@ def build_op(build_type, json_str):
        else:
            op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
        # get function
-        if build_type == op_pre_build:
-            # set op parameter
-            op_build_cfg_dis()
-            set_current_op_func_name(op_name)
-            set_current_op_name(kernel_name)
-            init_op_pattern()
-            set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
-            set_op_build_type('prebuild')
-            if custom_flag:
-                py_fn_name = kernel_info['op_info']['name']
-            else:
-                py_fn_name = op_name
-        elif build_type == op_build:
+        if build_type == op_build:
            if custom_flag:
                py_fn_name = kernel_info['op_info']['name']
            else:
@@ -106,13 +91,6 @@ def build_op(build_type, json_str):
        if op_func is None:
            raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))

-        # pre build
-        if build_type == op_pre_build:
-            op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
-            # disable only pattern configuration
-            op_build_cfg_en()
-            return get_op_pattern()
-
        # call function
        if kernel_name[0:19] == "bounding_box_encode":
            return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
@@ -120,8 +98,6 @@ def build_op(build_type, json_str):
        return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)

    except Exception as e:
-        if build_type == op_pre_build:
-            op_build_cfg_en()
        raise RuntimeError(e)


@@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
        Exception: If specific keyword is not found.
    """
    args = json.loads(json_str)
+    te_set_version(ddk_version)
    if 'fusion_op' not in args or not args['fusion_op']:
        raise ValueError("Json string Errors, key:fusion_op not found.")
-    if 'prebuild_ops' not in args or not args['prebuild_ops']:
-        raise ValueError("Json string Errors, key:prebuild_ops not found.")
-
-    pre_build_op_list = args['prebuild_ops']
-    for op in pre_build_op_list:
-        build_op(op_pre_build, json.dumps(op))
    fusion_op_arg = args['fusion_op']
    return fusion_op(json.dumps(fusion_op_arg))

@@ -159,8 +130,6 @@ def compile_with_json(json_str):
    json_info = json.loads(json_str)
    if "fusion_op" in json_info:
        ret = compile_fusion_op(json_str)
-    elif "compile_type" in json_info:
-        ret = build_op(op_pre_build, json_str)
    else:
        ret = build_op(op_build, json_str)
    return ret

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
@@ -20,6 +20,8 @@
 #include <vector>
 #include <memory>
 #include <algorithm>
+#include <map>
+#include <climits>
 #include "runtime/device/kernel_runtime.h"
 #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
 #include "backend/kernel_compiler/akg/akg_kernel_build.h"
@@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
      dim->set_size((::google::protobuf::int64)item);
    }
-    node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
+    node_inputs->set_tensor_type(input_data_type);
    node_inputs->set_mem_device("HBM");
  }
 }
@@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
    }
    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
-    node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
+    node_outputs->set_tensor_type(output_data_type);
    node_outputs->set_mem_device("HBM");
  }
 }
@@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
  return true;
 }

+bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
+  if (!anf_node->isa<CNode>()) {
+    return true;
+  }
+
+  if (!AnfAlgo::IsDynamicShape(anf_node)) {
+    return true;
+  }
+
+  MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
+
+  int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
+  uint64_t ext_info_head_len = kExtInfoHeadSize;
+  std::string ext_info;
+  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
+  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
+
+  // 1.addr:unknown shape type
+  uint64_t ext_info_len = ext_info.size();
+  ext_info_len += ext_info_head_len + sizeof(int32_t);
+
+  // 2.addr:input ShapeAndType
+  ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
+
+  // 3.addr:output ShapeAndType
+  ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
+
+  uint64_t ext_info_offset = ext_info.size();
+  ext_info.resize(ext_info_len, 0);
+  char *ext_info_buf = ext_info.data();
+
+  // deal1: unknown shape type
+  ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
+  info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
+  info->infoLen = sizeof(int32_t);
+  ext_info_offset += ext_info_head_len;
+  int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
+  *shape_type = unknown_shape_type;
+  ext_info_offset += info->infoLen;
+
+  // deal2:input ShapeAndType
+  info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
+  info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
+  info->infoLen = input_num * sizeof(ShapeAndType);
+  ext_info_offset += ext_info_head_len;
+
+  ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
+  for (size_t input_index = 0; input_index < input_num; input_index++) {
+    TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
+    std::vector<size_t> input_shape;
+    int32_t input_data_type;
+    if (input_type == kObjectTypeString) {
+      auto cnode = anf_node->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(cnode);
+      auto input_node = cnode->inputs()[input_index + 1];
+      auto value_ptr = GetValueNode(input_node);
+      auto value = GetValue<std::string>(value_ptr);
+      input_shape.push_back(1);
+      input_shape.push_back(value.size());
+      input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
+    } else {
+      input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
+      input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
+    }
+    inputs[input_index].type = input_data_type;
+
+    size_t input_shape_index = 0;
+    for (; input_shape_index < input_shape.size(); input_shape_index++) {
+      inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
+    }
+    if (input_shape.size() < kMaxShapeDims) {
+      inputs[input_index].dims[input_shape_index] = LLONG_MIN;
+    }
+  }
+  ext_info_offset += info->infoLen;
+
+  // deal3:output ShapeAndType
+  info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
+  info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
+  info->infoLen = output_num * sizeof(ShapeAndType);
+  ext_info_offset += ext_info_head_len;
+
+  ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
+  for (size_t output_index = 0; output_index < output_num; output_index++) {
+    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
+    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
+    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
+    outputs[output_index].type = output_data_type;
+
+    size_t output_shape_index = 0;
+    for (; output_shape_index < output_shape.size(); output_shape_index++) {
+      outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
+    }
+    if (output_shape_index < kMaxShapeDims) {
+      outputs[output_index].dims[output_shape_index] = LLONG_MIN;
+    }
+  }
+
+  // set ext info
+  kernel_mod_ptr->SetExtInfo(ext_info);
+  return true;
+}
+
 KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
  MS_EXCEPTION_IF_NULL(anf_node);
  std::string op_name = AnfAlgo::GetCNodeName(anf_node);
@@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
  if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
    MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
  }
+
+  if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
+    MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
+  }
+
  if (!SetIOSize(anf_node, kernel_mod_ptr)) {
    MS_LOG(EXCEPTION) << "Set input output size list failed.";
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
@@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
  input_size_list_.clear();
  output_size_list_.clear();
  workspace_size_list_.clear();
+  ext_info_.clear();
 }

 void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
@@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
 void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
 void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
 void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
+void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
 void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
 void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
  MS_EXCEPTION_IF_NULL(anf_node);
@@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs

  auto node_def_len = node_def_str_.length();
  param_len += node_def_len;
+  param_len += sizeof(uint32_t);
+
+  AicpuParamHead aicpu_param_head;
+  aicpu_param_head.length = param_len;
+  aicpu_param_head.ioAddrNum = io_addrs_num;
+
+  if (ext_info_.empty()) {
+    MS_LOG(INFO) << "Static Shape Kernel";
+    aicpu_param_head.extInfoLength = 0;
+    aicpu_param_head.extInfoAddr = 0;
+  } else {
+    MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
+  }

-  // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
-  AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
  args_.clear();
-  (void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
+  (void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
  // TaskArgs append ioAddrs
  if (io_addrs_size != 0) {
    (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
  }

+  // size for node_def
+  args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
+
  // When it's aicpu customized ops, taskArgs should append customized attr
  if (node_def_len != 0) {
    (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
@@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
    node_name_ = kTopKV2;
  }

-  AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
-    kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
+  AicpuTaskInfoPtr task_info_ptr =
+    make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
+                                                 ext_info_, input_data_addrs, output_data_addrs, NeedDump());

  MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
  return {task_info_ptr};

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
@@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
  void SetOutputList(const std::vector<int64_t> &outputList);
  void SetAnfNode(const AnfNodePtr &anf_node);
  void SetNodeDef(const std::string &nodeDef);
+  void SetExtInfo(const std::string &ext_info);
  void SetNodeName(const std::string &node_name);

  /**
@@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
  std::string node_def_str_;
  std::string node_name_;
  std::string node_so_;
+  std::string ext_info_;
  std::vector<int64_t> inputList_;
  std::vector<int64_t> outputList_;
  AnfNodePtr anf_node_;

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
@@ -21,7 +21,6 @@
 #include <map>
 #include <string>
 #include "backend/kernel_compiler/kernel.h"
-
 namespace mindspore {
 namespace kernel {
 constexpr auto kInitDataSetQueue = "InitDataSetQueue";
@@ -50,6 +49,36 @@ struct AicpuParamHead {
  uint64_t extInfoAddr;    // extInfo address
 } __attribute__((packed));

+const uint32_t kExtInfoHeadSize = 8;
+struct ExtInfo {
+  int32_t infoType;  // extend type
+  uint32_t infoLen;  // length for infoMsg
+  char infoMsg[0];   // extend value
+} __attribute__((packed));
+
+// Extent info ShapeAndType
+const uint32_t kMaxShapeDims = 8;
+struct ShapeAndType {
+  int32_t type;
+  int64_t dims[kMaxShapeDims];
+} __attribute__((packed));
+
+// Extend Info type for task
+enum FWKTaskExtInfoType {
+  FWK_ADPT_EXT_SHAPE_TYPE = 0,
+  FWK_ADPT_EXT_INPUT_SHAPE,
+  FWK_ADPT_EXT_OUTPUT_SHAPE,
+  FWK_ADPT_EXT_INVALID
+};
+
+// for unknown shape op type
+enum UnknowShapeOpType {
+  DEPEND_IN_SHAPE = 1,     // op out shape get by input shape
+  DEPEND_CONST_VALUE = 2,  // op out shape get by const op value
+  DEPEND_SHAPE_RANGE = 3,  // op out shape get by range
+  DEPEND_COMPUTE = 4       // op out shape get by totally computing
+};
+
 class AicpuOpUtil {
 public:
  static int MsTypeToProtoType(TypeId ms_type);

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
@@ -26,7 +26,7 @@ message AttrValue {
    repeated int64 i = 3 [ packed = true ];       //"array(int)"
    repeated float f = 4 [ packed = true ];       //"array(float)"
    repeated bool b = 5 [ packed = true ];        //"array(bool)"
-    repeated DataType type = 6 [ packed = true ]; //"array(type)"
+    repeated int32 type = 6 [ packed = true ]; //"array(type)"
    repeated TensorShape shape = 7;               //"array(shape)"
    repeated Tensor tensor = 8;                   //"array(tensor)"
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
@@ -18,9 +18,16 @@ package mindspore;
 import "attr.proto";
 import "tensor.proto";

+message DynamicIdx {
+  int32 idx = 1;
+  int32 num = 2;
+}
+
 message NodeDef {
  string op = 2;
  map<string, AttrValue> attrs = 3;
  repeated Tensor inputs = 4;
  repeated Tensor outputs = 5;
+  map<string, DynamicIdx> dym_inputs = 6;
+  map<string, DynamicIdx> dym_outputs = 7;
 }
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
@@ -26,9 +26,12 @@ message Tensor {
  TensorShape tensor_shape = 1;

  // tensor content data type
-  DataType tensor_type = 2;
+  int32 tensor_type = 2;

  // tensor memory device
  // data located memory device , "DDR" "HBM" OR "NONE"
  string mem_device = 3;
+  string name = 4;
+  uint64 data_ptr = 5;
+  uint64 data_size = 6;
 }
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
@@ -31,5 +31,5 @@ message TensorShape {
  bool unknown_rank = 3;

  // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
-  string data_format = 4;
+  int32 data_format = 4;
 };
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
@@ -19,17 +19,30 @@ option cc_enable_arenas = true;
 package mindspore;

 enum DataType {
-  MS_UNKNOWN = 0;
-  MS_BOOL = 1;
+  MS_FLOAT32 = 0;
+  MS_FLOAT16 = 1;
  MS_INT8 = 2;
-  MS_UINT8 = 3;
-  MS_INT16 = 4;
-  MS_UINT16 = 5;
-  MS_INT32 = 6;
-  MS_UINT32 = 7;
-  MS_INT64 = 8;
-  MS_UINT64 = 9;
-  MS_FLOAT16 = 10;
-  MS_FLOAT32 = 11;
-  MS_FLOAT64 = 12;
+  MS_INT32 = 3;
+  MS_UINT8 = 4;
+  MS_INT16 = 6;
+  MS_UINT16 = 7;
+  MS_UINT32 = 8;
+  MS_INT64 = 9;
+  MS_UINT64 = 10;
+  MS_FLOAT64 = 11;
+  MS_BOOL = 12;
+  MS_STRING = 13;
+  MS_DUAL_SUB_INT8 = 14;
+  MS_DUAL_SUB_UINT8 = 15;
+  MS_COMPLEX64 = 16;
+  MS_COMPLEX128 = 17;
+  MS_QINT8 = 18;
+  MS_QINT16 = 19;
+  MS_QINT32 = 20;
+  MS_QUINT8 = 21;
+  MS_QUINT16 = 22;
+  MS_RESOURCE = 23;
+  MS_STRING_REF = 24;
+  MS_DUAL = 25;
+  MS_UNKNOWN = 26;
 }
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
@@ -37,7 +37,6 @@ enum FusionType {
  COMMREDUCE,
  SEGMENT,
  OPAQUE,
-  DYNAMIC,
  UNKNOWN_FUSION_TYPE = -1,
 };
 enum OpPattern {
@@ -80,8 +79,8 @@ class KernelPack {
  bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
  bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
  const std::string Serialize() const;
-  const FlexArray *const GetJson() const { return json_; }
-  const FlexArray *const GetKernel() const { return kernel_; }
+  const FlexArray *GetJson() const { return json_; }
+  const FlexArray *GetKernel() const { return kernel_; }
  ~KernelPack() {
    if (json_) {
      delete[] json_;

--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
@@ -19,53 +19,36 @@
 #include <map>
 #include <string>
 #include <memory>
-#include <utility>
 #include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
 #include "backend/kernel_compiler/tbe/tbe_utils.h"
 #include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "utils/ms_context.h"

 namespace mindspore {
 namespace kernel {
 using mindspore::kernel::tbe::TbeUtils;
-static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
-                                  std::vector<nlohmann::json> *prebuild_op_list) {
-  MS_EXCEPTION_IF_NULL(prebuild_op_list);
-  TbeKernelJsonCreator creator(PREBUILD);
-  for (const auto &anf_node : compute_nodes) {
-    nlohmann::json prebuild;
-    if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
-      MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
-      return false;
-    }
-    (*prebuild_op_list).push_back(prebuild);
-  }
-  return true;
-}
-
 std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
  MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
  std::map<int32_t, KernelModPtr> kernel_mod_ret;
  auto build_manger = std::make_shared<ParallelBuildManager>();
  MS_EXCEPTION_IF_NULL(build_manger);
  for (const auto &fusion_scope_iter : fusion_scopes) {
-    auto scope_id = fusion_scope_iter.scope_id;
+    string fusion_kernel_name;
    nlohmann::json fusion_op;
-    string fusion_kernel = "te_fusion";
    if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
-                                            &fusion_kernel)) {
+                                            &fusion_kernel_name)) {
      continue;
    }
    // gen kernel_name & check cache
    std::string json_str = fusion_op.dump();
    size_t hash_id = std::hash<std::string>()(json_str);
-    auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
+    auto context_ptr = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(context_ptr);
+    auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
+    auto json_name =
+      fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
    fusion_op["fusion_op_name"] = json_name;
-    // gen json for prebuild
-    std::vector<nlohmann::json> prebuild_op_list;
-    if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
-      continue;
-    }
    // get io size
    std::vector<size_t> input_size_list;
    std::vector<size_t> output_size_list;
@@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
      auto kernel_mod =
        build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
      if (kernel_mod != nullptr) {
-        kernel_mod_ret[scope_id] = kernel_mod;
+        kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
        continue;
      }
    }
    // fusion build
    nlohmann::json fusion_json;
    fusion_json["fusion_op"] = fusion_op;
-    fusion_json["prebuild_ops"] = prebuild_op_list;
    auto task_id = build_manger->StartCompileOp(fusion_json);
    TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
    if (task_id < 0) {
      MS_EXCEPTION(ArgumentError) << "start compile failed.";
    }
-    build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
+    build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
+                               fusion_scope_iter.scope_id);
  }

  int build_failed_num = 0;

--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
@@ -16,6 +16,7 @@

 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
+#include <utility>
 #include <vector>
 #include <map>
 #include "backend/kernel_compiler/kernel.h"
@@ -25,11 +26,9 @@ namespace kernel {
 * @brief fuse op and return a callable mod
 */
 struct FusionScopeInfo {
-  FusionScopeInfo() {}
-  FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
-                  const std::vector<AnfNodePtr> &out)
-      : scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
-  int32_t scope_id;
+  FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
+      : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
+  int32_t scope_id{};
  std::vector<AnfNodePtr> input_nodes;
  std::vector<AnfNodePtr> compute_nodes;
  std::vector<AnfNodePtr> output_nodes;

--- a/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
@@ -40,14 +40,13 @@ class OpLib {

 private:
  static bool RegOpFromLocalInfo();
-  static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
-  static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
-                         const std::shared_ptr<OpInfo> &op_info);
+  static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
+  static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
  static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
                                size_t index);
  static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
  static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
-  static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
+  static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
                                const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
  static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
  static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
@@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
  *func_name = name_tmp;
  auto iter = tbe_func_adapter_map.find(*func_name);
  if (iter != tbe_func_adapter_map.end()) {
-    MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
+    MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
    *func_name = iter->second;
  }
 }

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
@@ -27,7 +27,7 @@
 //       the TBE back-end operator implementation difference
 namespace mindspore {
 namespace kernel {
-enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
+enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
 namespace tbe {
 using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
                            nlohmann::json *attrs_json);

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
@@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {

 const std::unordered_map<std::string, FusionType> fusion_type_maps = {
  {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
-  {"SEGMENT", FusionType::SEGMENT},       {"DYNAMIC", FusionType::DYNAMIC},   {"OPAQUE", FusionType::OPAQUE},
+  {"SEGMENT", FusionType::SEGMENT},       {"OPAQUE", FusionType::OPAQUE},
 };

 TypeId DtypeToTypeId(const std::string &dtypes) {

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
@@ -41,8 +41,8 @@ class TbeKernelBuild {
                        std::vector<size_t> *output_size_list);
  // Ub Fuison
  static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
-                                 const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
-                                 std::string *fusion_kernel);
+                                 const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
+                                 std::string *fusion_kernel_name);
  static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
                        std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);

@@ -61,9 +61,14 @@ class TbeKernelBuild {
  static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
  static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
                                         std::vector<nlohmann::json> *output_desc_list);
+  static void GenPreDescJson(nlohmann::json *output_desc);
+  static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
+                                         std::string *fusion_kernel_name);
+  static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
  static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
                          size_t desc_output_idx, nlohmann::json *output_desc,
                          FusionDataType fusion_data_type = kFusionNormal);
+  static void GenSuffixDescJson(nlohmann::json *output_desc);
  static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
                                  size_t output_index, nlohmann::json *output_desc);
  static size_t GetIOSizeImpl(const nlohmann::json &desc);
@@ -76,6 +81,7 @@ class TbeKernelBuild {
  static bool IsDynamicInput(const CNodePtr &cnode);
  static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
  static std::string GetRealOpType(const std::string &origin_type);
+  static std::string GetNodeFusionType(const CNodePtr &cnode);
 };

 class TbeKernelJsonCreator {
@@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
  ~TbeKernelJsonCreator() = default;
  bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
  std::string json_name() { return json_name_; }
+  bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
+                      nlohmann::json *attrs_json);

 private:
  bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
                        nlohmann::json *inputs_json);
  bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
                         nlohmann::json *outputs_json);
-  bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
-                      nlohmann::json *attrs_json);
  static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
  bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
                        const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
@@ -33,42 +33,6 @@
 namespace mindspore {
 namespace kernel {
 using mindspore::kernel::tbe::TbeUtils;
-
-bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
-  auto build_manger = std::make_shared<ParallelBuildManager>();
-  MS_EXCEPTION_IF_NULL(build_manger);
-  for (const auto &anf_node : anf_nodes) {
-    // gen kernel json
-    MS_EXCEPTION_IF_NULL(anf_node);
-    nlohmann::json kernel_json;
-    TbeKernelJsonCreator creator(OP_PRE_COMPILE);
-    if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
-      MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
-      return false;
-    }
-    kernel_json["compile_type"] = "pre_build";
-    // op build
-    auto task_id = build_manger->StartCompileOp(kernel_json);
-    build_manger->SavePreTaskInfo(task_id, anf_node);
-  }
-  while (!build_manger->IsAllPreTaskFinish()) {
-    int task_id = -1;
-    std::string task_result;
-    std::string pre_build_result;
-    auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
-    if (!ret) {
-      MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
-    }
-
-    if (task_result != "Success") {
-      MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
-    }
-
-    build_manger->PreTaskFinishProcess(task_id, pre_build_result);
-  }
-  return true;
-}
-
 bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
  auto build_manger = std::make_shared<ParallelBuildManager>();
  MS_EXCEPTION_IF_NULL(build_manger);
@@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
  return build_manger->GenSameOpKernelMod();
 }

-ParallelBuildManager::ParallelBuildManager() {}
-
 ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }

-void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
-  MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
-  pre_task_map_[task_id] = anf_node;
-}
-
 void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
                                        const std::string &json_name, const std::vector<size_t> &input_size_list,
                                        const std::vector<size_t> &output_size_list, int32_t scope_id) {
@@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
  task_map_[task_id] = task_info;
 }

-bool ParallelBuildManager::IsAllPreTaskFinish() const {
-  MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
-  return pre_task_map_.empty();
-}
-
 bool ParallelBuildManager::IsAllTaskFinish() const {
  MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
  return task_map_.empty();
 }

-void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
-  auto task_iter = pre_task_map_.find(task_id);
-  if (task_iter == pre_task_map_.end()) {
-    MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
-  }
-  auto node = task_iter->second;
-  auto builder =
-    std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
-  std::string start_flag = "fusion_pattern_start";
-  std::string end_flag = "fusion_pattern_end";
-  int start = pre_build_result.find(start_flag);
-  int end = pre_build_result.find(end_flag);
-  if (start != -1 && end != -1 && end >= start) {
-    std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
-    if (result == "") {
-      (void)pre_task_map_.erase(task_iter);
-      return;
-    }
-    transform(result.begin(), result.end(), result.begin(), ::toupper);
-    FusionType fusion_type = tbe::GetFusionType(result);
-    builder->SetFusionType(fusion_type);
-    AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
-  }
-  (void)pre_task_map_.erase(task_iter);
-}
-
 std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
  auto task_iter = task_map_.find(task_id);
  if (task_iter == task_map_.end()) {

--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
@@ -28,7 +28,6 @@

 namespace mindspore {
 namespace kernel {
-bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
 bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);

 struct KernelBuildTaskInfo {
@@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {

 class ParallelBuildManager {
 public:
-  ParallelBuildManager();
+  ParallelBuildManager() = default;
  ~ParallelBuildManager();
-  void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
  void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
                    const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
                    int32_t scope_id = 0);
@@ -54,10 +52,7 @@ class ParallelBuildManager {
  bool SearchInCache(const std::string &json_name, const std::string &processor,
                     const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
                     AnfNode *node) const;
-
-  bool IsAllPreTaskFinish() const;
  bool IsAllTaskFinish() const;
-  void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
  std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
  KernelModPtr GenKernelMod(const string &json_name, const string &processor,
                            const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,

--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
  return GetCNodeOutputPrecision(kernel_with_index.first);
 }

+bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
+  if (!node->isa<CNode>()) {
+    return false;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
+  if (!has_attr) {
+    return false;
+  }
+  return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
+}
+
 bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
  MS_EXCEPTION_IF_NULL(node);
  if (node->inputs().empty()) {

--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
@@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
  static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
  // get fix output precision from prev node, input_idx is the input index of current node related to prev node.
  static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
+  static bool IsDynamicShape(const AnfNodePtr &node);
  static bool IsCondControlKernel(const CNodePtr &node);
  static bool IsIndependentNode(const CNodePtr &node);
 };

--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() {
 }

 void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
-  device::ascend::KernelPreBuild(kernel_graph.get());
  MS_LOG(INFO) << "HardwareOptimize start!";
  opt::AscendBackendOptimization(kernel_graph);
  opt::AscendGraphKernelCommonProcess(kernel_graph);

--- a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
@@ -19,7 +19,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-
+#include <set>
+#include <map>
 #include "runtime/device/ascend/kernel_select_ascend.h"
 #include "runtime/device/kernel_info.h"
 #include "backend/kernel_compiler/kernel.h"
@@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
  return kernel_mod_ptr;
 }

-static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
-  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
-  std::vector<AnfNodePtr> tbe_nodes;
-  for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
-    MS_EXCEPTION_IF_NULL(anf_node);
-    if (!AnfAlgo::IsRealKernel(anf_node)) {
-      continue;
-    }
-    KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
-    switch (kernel_type) {
-      case KernelType::TBE_KERNEL: {
-        if (AnfAlgo::GetKernelMod(anf_node) == nullptr &&
-            AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) {
-          tbe_nodes.push_back(anf_node);
-        }
-        break;
-      }
-      default: {
-        break;
-      }
-    }
-  }
-  bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
-  return ret;
-}
-
 static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
  std::vector<AnfNodePtr> tbe_nodes;
@@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
  return !(workspace_indexs.empty() && output_indexs.empty());
 }

-bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
-  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
-  bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
-  return ret;
-}
-
 bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
  TbeUtils::LoadCache();

--- a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
@@ -22,10 +22,6 @@
 namespace mindspore {
 namespace device {
 namespace ascend {
-/**
- * @brief kernel pre build for ascend.
- */
-bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
 /**
 * @brief kernel build for ascend.
 */

--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -32,6 +32,7 @@ namespace mindspore {
 // op name. Op which not exists in operator/ops.h, so define it's name here
 constexpr auto kFour2FiveOpName = "Four2Five";
 constexpr auto kFive2FourOpName = "Five2Four";
+constexpr auto kConv2DOpName = "Conv2D";
 constexpr auto kConvBN1OpName = "ConvBN1";
 constexpr auto kBN2AddReluOpName = "BN2AddRelu";
 constexpr auto kBN2ReLUOpName = "BN2Relu";
@@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size";
 constexpr auto kAttrNumSegments = "num_segments";
 constexpr auto kAttrBegin = "begin";
 constexpr auto kAttrSize = "size";
+constexpr auto kAttrIsDynamicShape = "is_dynamic_shape";

 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";

--- a/mindspore/ops/_op_impl/aicpu/__init__.py
+++ b/mindspore/ops/_op_impl/aicpu/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 """aicpu ops"""
+from .unique import _unique_aicpu
 from .init_data_set_queue import _init_data_set_queue_aicpu
 from .embedding_lookup import _embedding_lookup_aicpu
 from .padding import _padding_aicpu

--- a/mindspore/ops/_op_impl/aicpu/unique.py
+++ b/mindspore/ops/_op_impl/aicpu/unique.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Unique op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
+
+unique_op_info = AiCPURegOp("Unique") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x", "required") \
+    .output(0, "y", "required") \
+    .output(1, "idx", "required") \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
+    .get_op_info()
+
+@op_info_register(unique_op_info)
+def _unique_aicpu():
+    """Unique AiCPU register"""
+    return
--- a/mindspore/ops/_op_impl/tbe/matmul.py
+++ b/mindspore/ops/_op_impl/tbe/matmul.py
@@ -17,7 +17,7 @@
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

 matmul_op_info = TBERegOp("MatMul") \
-    .fusion_type("ELEMWISE") \
+    .fusion_type("OPAQUE") \
    .async_flag(False) \
    .binfile_name("matmul.so") \
    .compute_cost(10) \

--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg
 from .sparse_ops import SparseToDense

 __all__ = [
+    'Unique',
    'ReverseSequence',
    'EditDistance',
    'CropAndResize',

--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -597,9 +597,9 @@ class Unique(Primitive):
        containing indices of elements in the input coressponding to the output tensor.

    Examples:
-        >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32)
+        >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32)
        >>> out = P.Unique()(x)
-        (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32))
+        (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32))
    """
    @prim_attr_register
    def __init__(self):

--- a/tests/ut/cpp/stub/tdt/tdt_mock.cc
+++ b/tests/ut/cpp/stub/tdt/tdt_mock.cc
@@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {}

 std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; }

-TsdClient* TsdClient::GetInstance() {
-  static TsdClient instance;
-  return &instance;
-}
-
-/**
- * @ingroup TsdClient
- * @brief 构造函数
- */
-TsdClient::TsdClient() { rankSize_ = 1; }
-
-/**
- * @ingroup TsdClient
- * @brief 析构函数
- */
-TsdClient::~TsdClient() = default;
-
-/**
- * @ingroup TsdClient
- * @brief framework发送拉起hccp和computer process的命令
- * @param [in] phyDeviceId : FMK传入物理ID
- * @param [in] phyDeviceId : FMK传入rankSize
- * @return TDT_OK:成功 或者其他错误码
- */
-TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; }
-
-/**
- * @ingroup TsdClient
- * @brief 通知TsdClient关闭相关资源
- * @param 无
- * @return TDT_OK:成功 或者其他错误码
- */
-TDT_StatusT TsdClient::Close() { return TDT_OK; }
-
 }  // namespace tdt
 #endif  // TDT_MOCK_H