Fix some bugs.

1. Add transform for Shape and InferConv2dShape ops. 2. Remove matmul gpu kernel. 3. Fix the length of abbreviated commit id in MACE version.

Fix some bugs.
1. Add transform for Shape and InferConv2dShape ops. 2. Remove matmul gpu kernel. 3. Fix the length of abbreviated commit id in MACE version.
9f448aa0 · liuqi · 61de4dab · 9f448aa0 · 9f448aa0 · 9f448aa0
Showing with 65 addition and 85 deletion

mace/core/net_def_adapter.cc mace/core/net_def_adapter.cc +64 -76

mace/ops/matmul.cc mace/ops/matmul.cc +0 -8

mace/tools/git/gen_version_source.sh mace/tools/git/gen_version_source.sh +1 -1

未找到文件。
--- a/mace/core/net_def_adapter.cc
+++ b/mace/core/net_def_adapter.cc
@@ -50,15 +50,6 @@ std::string TransformedName(const std::string &input_name,
  return ss.str();
 }
-#ifdef MACE_ENABLE_OPENCL
-bool TransformRequiredOp(const std::string &op_type) {
-  static const std::unordered_set<std::string> kNoTransformOp = {
-      "Shape", "InferConv2dShape"
-  };
-  return kNoTransformOp.count(op_type) == 0;
-}
-#endif  // MACE_ENABLE_OPENCL
 void BuildTransposeOpDef(
    const std::string &input_name,
    const std::string &output_name,
@@ -514,76 +505,73 @@ MaceStatus NetDefAdapter::AdaptMemoryType(
  // (only support one kind of memory type for multiple outputs)
  op_registry_->GetInOutMemoryTypes(op_def->type(), context);
 #ifdef MACE_ENABLE_OPENCL
-  // if op is memory-unused op, no transformation
+  int input_size = op_def->input_size();
-  if (TransformRequiredOp(op_def->type())) {
+  for (int i = 0; i < input_size; ++i) {
-    int input_size = op_def->input_size();
+    if (output_map->count(op_def->input(i)) == 0) {
-    for (int i = 0; i < input_size; ++i) {
+      MACE_CHECK(ws_->GetTensor(op_def->input(i)) != nullptr
-      if (output_map->count(op_def->input(i)) == 0) {
+                     && ws_->GetTensor(op_def->input(i))->is_weight(),
-        MACE_CHECK(ws_->GetTensor(op_def->input(i)) != nullptr
+                 "Tensor ", op_def->input(i), " of ",
-                       && ws_->GetTensor(op_def->input(i))->is_weight(),
+                 op_def->name(), " not allocated");
-                   "Tensor ", op_def->input(i), " of ",
+      continue;
-                   op_def->name(), " not allocated");
+    }
-        continue;
+    auto &input_info = output_map->at(op_def->input(i));
-      }
+    // check whether to do transform
-      auto &input_info = output_map->at(op_def->input(i));
+    MemoryType src_mem_type = input_info.mem_type;
-      // check whether to do transform
+    MemoryType dst_mem_type = context->GetInputMemType(i);
-      MemoryType src_mem_type = input_info.mem_type;
+    auto wanted_input_dtype = context->GetInputDataType(i);
-      MemoryType dst_mem_type = context->GetInputMemType(i);
+    if (src_mem_type != dst_mem_type ||
-      auto wanted_input_dtype = context->GetInputDataType(i);
+        (input_info.dtype != wanted_input_dtype &&
-      if (src_mem_type != dst_mem_type ||
+            (src_mem_type != MemoryType::CPU_BUFFER
-          (input_info.dtype != wanted_input_dtype &&
+                || dst_mem_type != MemoryType::CPU_BUFFER))) {
-              (src_mem_type != MemoryType::CPU_BUFFER
+      auto transformed_name = TransformedName(op_def->input(i),
-                  || dst_mem_type != MemoryType::CPU_BUFFER))) {
+                                              "mem_type",
-        auto transformed_name = TransformedName(op_def->input(i),
+                                              dst_mem_type);
-                                                "mem_type",
+      // check whether the tensor has been transformed
-                                                dst_mem_type);
+      if (transformed_set->count(transformed_name) == 0) {
-        // check whether the tensor has been transformed
+        VLOG(1) << "Add Transform operation " << op_def->name()
-        if (transformed_set->count(transformed_name) == 0) {
+                << " to transform tensor "
-          VLOG(1) << "Add Transform operation " << op_def->name()
+                << op_def->input(i) << "', from memory type "
-                  << " to transform tensor "
+                << input_info.mem_type << " to "
-                  << op_def->input(i) << "', from memory type "
+                << dst_mem_type;
-                  << input_info.mem_type << " to "
+        OperatorDef *transformed_op_def = target_net_def->add_op();
-                  << dst_mem_type;
+        OpenCLUtil::BuildTransformOpDef(
-          OperatorDef *transformed_op_def = target_net_def->add_op();
+            op_def->input(i),
-          OpenCLUtil::BuildTransformOpDef(
+            input_info.shape,
-              op_def->input(i),
+            transformed_name,
-              input_info.shape,
+            wanted_input_dtype,
-              transformed_name,
+            context->GetInputOpenCLBufferType(i),
-              wanted_input_dtype,
+            dst_mem_type,
-              context->GetInputOpenCLBufferType(i),
+            input_info.data_format,
-              dst_mem_type,
+            transformed_op_def);
-              input_info.data_format,
+        // set data format arg
-              transformed_op_def);
+        SetProtoArg<int>(transformed_op_def,
-          // set data format arg
+                         "data_format",
-          SetProtoArg<int>(transformed_op_def,
+                         static_cast<int>(input_info.data_format));
-                           "data_format",
+        // set output memory type argument
-                           static_cast<int>(input_info.data_format));
+        SetProtoArg<int>(transformed_op_def,
-          // set output memory type argument
+                         OutputMemoryTypeTagName(),
-          SetProtoArg<int>(transformed_op_def,
+                         dst_mem_type);
-                           OutputMemoryTypeTagName(),
-                           dst_mem_type);
-          // update tensor consumer information
+        // update tensor consumer information
-          output_map->at(op_def->input(i)).consumer_op_indices.push_back(
+        output_map->at(op_def->input(i)).consumer_op_indices.push_back(
-              target_net_def->op_size() - 1);
+            target_net_def->op_size() - 1);
-          // update output information map
+        // update output information map
-          output_map->emplace(
+        output_map->emplace(
-              transformed_name,
+            transformed_name,
-              InternalOutputInfo(
+            InternalOutputInfo(
-                  dst_mem_type,
+                dst_mem_type,
-                  context->GetInputDataType(i),
+                context->GetInputDataType(i),
-                  input_info.data_format,
+                input_info.data_format,
-                  input_info.shape,
+                input_info.shape,
-                  target_net_def->op_size() - 1));
+                target_net_def->op_size() - 1));
-          // update tensor shape map
+        // update tensor shape map
-          tensor_shape_map->emplace(transformed_name, input_info.shape);
+        tensor_shape_map->emplace(transformed_name, input_info.shape);
-          // record transformed tensors
+        // record transformed tensors
-          transformed_set->insert(transformed_name);
+        transformed_set->insert(transformed_name);
-        }
-        // update original op_def's input
-        op_def->set_input(i, transformed_name);
      }
+      // update original op_def's input
+      op_def->set_input(i, transformed_name);
    }
  }
 #else

--- a/mace/ops/matmul.cc
+++ b/mace/ops/matmul.cc
@@ -602,14 +602,6 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
                   DeviceType::CPU, uint8_t);
 #endif  // MACE_ENABLE_QUANTIZE
-#ifdef MACE_ENABLE_OPENCL
-  MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
-                   DeviceType::GPU, float);
-  MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
-                   DeviceType::GPU, half);
-#endif  // MACE_ENABLE_OPENCL
 #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
  MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
                   DeviceType::CPU, float16_t);

--- a/mace/tools/git/gen_version_source.sh
+++ b/mace/tools/git/gen_version_source.sh
@@ -28,7 +28,7 @@ fi
 mkdir -p $OUTPUT_DIR
-GIT_VERSION=$(git --git-dir=${MACE_SOURCE_DIR}/.git --work-tree=${MACE_SOURCE_DIR} describe --long --tags)
+GIT_VERSION=$(git --git-dir=${MACE_SOURCE_DIR}/.git --work-tree=${MACE_SOURCE_DIR} describe --long --tags --abbrev=7)
 if [[ $? != 0 ]]; then
  GIT_VERSION=unknown
 else