Merge branch 'multiple' into 'master'

fix: Support building multiple models of GPU and DSP together See merge request applied-machine-learning/sysml/mace!1314

Merge branch 'multiple' into 'master'
fix: Support building multiple models of GPU and DSP together See merge request applied-machine-learning/sysml/mace!1314
6c9f380e · 卢旭辉 · 9863b5e6 · 85fb3268 · 6c9f380e · 6c9f380e
5 changed file
--- a/mace/core/runtime/hexagon/hexagon_device.cc
+++ b/mace/core/runtime/hexagon/hexagon_device.cc
@@ -32,16 +32,18 @@ HexagonDevice::HexagonDevice(DeviceType device_type,

 #ifdef MACE_ENABLE_OPENCL
 GPURuntime *HexagonDevice::gpu_runtime() {
+  MACE_CHECK_NOTNULL(gpu_device_);
  return gpu_device_->gpu_runtime();
 }
 #endif  // MACE_ENABLE_OPENCL

 Allocator *HexagonDevice::allocator() {
 #ifdef MACE_ENABLE_OPENCL
-  return gpu_device_->allocator();
-#else
-  return allocator_.get();
+  if (gpu_device_ != nullptr) {
+    return gpu_device_->allocator();
+  }
 #endif  // MACE_ENABLE_OPENCL
+  return allocator_.get();
 }

 DeviceType HexagonDevice::device_type() const {

--- a/mace/core/runtime/hexagon/hexagon_device.h
+++ b/mace/core/runtime/hexagon/hexagon_device.h
@@ -39,7 +39,7 @@ class HexagonDevice : public CPUDevice {
  HexagonDevice(DeviceType device_type,
 #ifdef MACE_ENABLE_OPENCL
                utils::ThreadPool *thread_pool,
-                std::unique_ptr<GPUDevice> gpu_device);
+                std::unique_ptr<GPUDevice> gpu_device = nullptr);
 #else
                utils::ThreadPool *thread_pool);
 #endif  // MACE_ENABLE_OPENCL

--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -578,8 +578,9 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
  }
 #endif
 #if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
-  if (device_type_ == DeviceType::HEXAGON
-      || device_type_ == DeviceType::HTA) {
+  if (device_type_ == DeviceType::HEXAGON) {
+    device_.reset(new HexagonDevice(device_type_, thread_pool_.get()));
+  } else if (device_type_ == DeviceType::HTA) {
 #ifdef MACE_ENABLE_OPENCL
    device_.reset(new HexagonDevice(
        device_type_, thread_pool_.get(),

--- a/mace/ops/eltwise.cc
+++ b/mace/ops/eltwise.cc
@@ -22,6 +22,7 @@
 #include <cmath>
 #include <functional>
 #include <memory>
+#include <set>
 #include <utility>
 #include <vector>

@@ -1198,6 +1199,27 @@ void RegisterEltwise(OpRegistry *op_registry) {
 #endif  // MACE_ENABLE_QUANTIZE

  MACE_REGISTER_GPU_OP(op_registry, "Eltwise", EltwiseOp);
+  MACE_REGISTER_OP_CONDITION(
+      op_registry, OpConditionBuilder("Eltwise").SetDevicePlacerFunc(
+                       [](OpConditionContext *context) -> std::set<DeviceType> {
+                         auto op = context->operator_def();
+                         if (op->output_shape_size() != op->output_size()) {
+                           return {DeviceType::CPU, DeviceType::GPU};
+                         }
+
+                         int input_size = op->input_size();
+                         auto ws = context->workspace();
+                         for (int i = 0; i < input_size; ++i) {
+                           if (ws->HasTensor(op->input(i)) &&
+                               ws->GetTensor(op->input(i))->is_weight()) {
+                             int dims = ws->GetTensor(op->input(i))->dim_size();
+                             if (dims != 1 && dims != 4) {
+                               return {DeviceType::CPU};
+                             }
+                           }
+                         }
+                         return {DeviceType::CPU, DeviceType::GPU};
+                       }));
 }

 }  // namespace ops

--- a/tools/device.py
+++ b/tools/device.py
@@ -279,8 +279,7 @@ class DeviceWrapper:
                if os.path.exists(opencl_parameter_file):
                    self.push(opencl_parameter_file, self.data_dir)

-            if self.system == SystemType.android \
-                    and device_type == common.DeviceType.HEXAGON:
+            if self.system == SystemType.android:
                self.push(
                    "third_party/nnlib/%s/libhexagon_controller.so" % abi,
                    self.data_dir)