From 4f4882fa11d935ae1d44d13d83a586eec0c62952 Mon Sep 17 00:00:00 2001
From: luxuhui <luxuhui@xiaomi.com>
Date: Wed, 17 Apr 2019 19:02:57 +0800
Subject: [PATCH] opt the compile option and related code

N/A

Signed-off-by: Luxuhui <luxuhui@xiaomi.com>
---
 mace/core/quantize.cc                        |  4 ++--
 mace/core/runtime/opencl/opencl_allocator.cc |  6 +++++
 mace/core/runtime/opencl/opencl_runtime.cc   |  2 +-
 mace/libmace/mace.cc                         |  6 ++---
 mace/ops/addn.cc                             |  2 --
 mace/ops/common/transpose.h                  |  2 +-
 mace/ops/opencl/image/split.h                |  2 +-
 mace/ops/ops_test_util.cc                    | 23 ++++++++++++++++----
 mace/ops/ops_test_util.h                     | 15 ++++++++++---
 mace/ops/strided_slice.cc                    |  2 +-
 tools/bazel.rc                               |  3 +++
 11 files changed, 49 insertions(+), 18 deletions(-)
diff --git a/mace/core/quantize.cc b/mace/core/quantize.cc
index 167c6da3..ec4c65ac 100644
--- a/mace/core/quantize.cc
+++ b/mace/core/quantize.cc
@@ -77,7 +77,7 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
   thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) {
     for (index_t i = start; i < end; i += step) {
       uint8x16_t vi = vld1q_u8(input + i * 16);
-      float32x4x4_t vo = {
+      float32x4x4_t vo = {{
           vmulq_f32(vscale,
                     vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(
                         vget_low_u16(vmovl_u8(vget_low_u8(vi))))), vzero))),
@@ -90,7 +90,7 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
           vmulq_f32(vscale,
                     vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(
                         vget_high_u16(vmovl_u8(vget_high_u8(vi))))), vzero))),
-      };
+      }};
       vst1q_f32(output + i * 16, vo.val[0]);
       vst1q_f32(output + i * 16 + 4, vo.val[1]);
       vst1q_f32(output + i * 16 + 8, vo.val[2]);
diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc
index e63b252b..0a42e295 100644
--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
@@ -93,6 +93,12 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
                  << image_shape[0] << ", " << image_shape[1]
                  << "] failed because of "
                  << OpenCLErrorToString(error);
+    // Many users have doubts at CL_INVALID_IMAGE_SIZE, add some tips.
+    if (error == CL_INVALID_IMAGE_SIZE) {
+      auto max_2d_size = opencl_runtime_->GetMaxImage2DSize();
+      LOG(WARNING) << "The allowable OpenCL image size is: "
+                   << max_2d_size[0] << "x" << max_2d_size[1];
+    }
     delete cl_image;
     *result = nullptr;
     return MaceStatus::MACE_OUT_OF_RESOURCES;
diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc
index bcf26498..e26c6048 100644
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -713,7 +713,7 @@ std::vector<uint64_t> OpenCLRuntime::GetMaxImage2DSize() {
     LOG(ERROR) << "error: " << OpenCLErrorToString(err);
     return {};
   }
-  return {max_height, max_width};
+  return {max_width, max_height};
 }
 
 uint64_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) {
diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc
index c5e16b76..fe6ea488 100644
--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -391,12 +391,12 @@ class MaceEngine::Impl {
   std::unique_ptr<Workspace> ws_;
   std::unique_ptr<NetBase> net_;
   bool is_quantized_model_;
-#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
-  std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
-#endif
   std::map<std::string, mace::InputOutputInfo> input_info_map_;
   std::map<std::string, mace::InputOutputInfo> output_info_map_;
   std::unique_ptr<utils::ThreadPool> thread_pool_;
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
+  std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
+#endif
 
   MACE_DISABLE_COPY_AND_ASSIGN(Impl);
 };
diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc
index ea6458d4..1f0fa7a1 100644
--- a/mace/ops/addn.cc
+++ b/mace/ops/addn.cc
@@ -29,8 +29,6 @@
 namespace mace {
 namespace ops {
 
-static constexpr int kCostPerGroup = 1024;
-
 template <DeviceType D, class T>
 class AddNOp;
 
diff --git a/mace/ops/common/transpose.h b/mace/ops/common/transpose.h
index 0c075185..b7b42490 100644
--- a/mace/ops/common/transpose.h
+++ b/mace/ops/common/transpose.h
@@ -132,7 +132,7 @@ inline void TransposeNCHWToNHWCC2<float>(utils::ThreadPool *thread_pool,
       for (w = 0; w + 3 < width; w += 4) {
         float32x4_t vi0 = vld1q_f32(input + in_offset);
         float32x4_t vi1 = vld1q_f32(input + in_offset + image_size);
-        float32x4x2_t vi = {vi0, vi1};
+        float32x4x2_t vi = {{vi0, vi1}};
         vst2q_f32(output + out_offset, vi);
         in_offset += 4;
         out_offset += 8;
diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h
index 04eaaa87..12755910 100644
--- a/mace/ops/opencl/image/split.h
+++ b/mace/ops/opencl/image/split.h
@@ -133,7 +133,7 @@ MaceStatus SplitKernel<T>::Compute(
     }
   }
   if (context->future() != nullptr) {
-    context->future()->wait_fn = [runtime, call_stats](CallStats *stats) {
+    context->future()->wait_fn = [call_stats](CallStats *stats) {
       if (stats != nullptr) {
         stats->start_micros = call_stats.start_micros;
         stats->end_micros = stats->start_micros + call_stats.end_micros;
diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc
index ab61e8c6..bcf1282d 100644
--- a/mace/ops/ops_test_util.cc
+++ b/mace/ops/ops_test_util.cc
@@ -101,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const {
 }
 
 namespace {
+#ifdef MACE_ENABLE_OPENCL
 std::string GetStoragePathFromEnv() {
   char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH");
   if (storage_path_str == nullptr) return "";
   return storage_path_str;
 }
+#endif
 }  // namespace
 
 OpTestContext *OpTestContext::Get(int num_threads,
@@ -117,15 +119,21 @@ OpTestContext *OpTestContext::Get(int num_threads,
 
 OpTestContext::OpTestContext(int num_threads,
                              CPUAffinityPolicy cpu_affinity_policy)
+#ifdef MACE_ENABLE_OPENCL
     : gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())),
       opencl_mem_types_({MemoryType::GPU_IMAGE}),
       thread_pool_(make_unique<utils::ThreadPool>(num_threads,
                                                   cpu_affinity_policy)) {
+#else
+    : thread_pool_(make_unique<utils::ThreadPool>(num_threads,
+                                                  cpu_affinity_policy)) {
+#endif
   thread_pool_->Init();
 
   device_map_[DeviceType::CPU] = make_unique<CPUDevice>(
       num_threads, cpu_affinity_policy, thread_pool_.get());
 
+#ifdef MACE_ENABLE_OPENCL
   device_map_[DeviceType::GPU] = make_unique<GPUDevice>(
       gpu_context_->opencl_tuner(),
       gpu_context_->opencl_cache_storage(),
@@ -135,16 +143,18 @@ OpTestContext::OpTestContext(int num_threads,
       num_threads,
       cpu_affinity_policy,
       thread_pool_.get());
-}
-
-std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
-  return gpu_context_;
+#endif
 }
 
 Device *OpTestContext::GetDevice(DeviceType device_type) {
   return device_map_[device_type].get();
 }
 
+#ifdef MACE_ENABLE_OPENCL
+std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
+  return gpu_context_;
+}
+
 std::vector<MemoryType> OpTestContext::opencl_mem_types() {
   return opencl_mem_types_;
 }
@@ -160,6 +170,7 @@ void OpTestContext::SetOCLImageTestFlag() {
 void OpTestContext::SetOCLImageAndBufferTestFlag() {
   opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER};
 }
+#endif  // MACE_ENABLE_OPENCL
 
 bool OpsTestNet::Setup(mace::DeviceType device) {
   NetDef net_def;
@@ -231,6 +242,7 @@ MaceStatus OpsTestNet::Run() {
 
 MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
   if (device == DeviceType::GPU) {
+#ifdef MACE_ENABLE_OPENCL
     auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types();
     for (auto type : opencl_mem_types) {
       OpTestContext::Get()->GetDevice(device)
@@ -239,6 +251,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
       MACE_RETURN_IF_ERROR(Run());
     }
     return MaceStatus::MACE_SUCCESS;
+#else
+    return MaceStatus::MACE_UNSUPPORTED;
+#endif  // MACE_ENABLE_OPENCL
   } else {
     Setup(device);
     return Run();
diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h
index e9ef4d90..d2212a65 100644
--- a/mace/ops/ops_test_util.h
+++ b/mace/ops/ops_test_util.h
@@ -29,8 +29,6 @@
 #include "mace/core/types.h"
 #include "mace/core/net.h"
 #include "mace/core/device_context.h"
-#include "mace/core/runtime/opencl/gpu_device.h"
-#include "mace/core/runtime/opencl/opencl_util.h"
 #include "mace/core/tensor.h"
 #include "mace/core/workspace.h"
 #include "mace/ops/ops_registry.h"
@@ -40,6 +38,11 @@
 #include "mace/core/quantize.h"
 #include "mace/ops/testing/test_utils.h"
 
+#ifdef MACE_ENABLE_OPENCL
+#include "mace/core/runtime/opencl/gpu_device.h"
+#include "mace/core/runtime/opencl/opencl_util.h"
+#endif
+
 namespace mace {
 namespace ops {
 namespace test {
@@ -78,12 +81,14 @@ class OpTestContext {
   static OpTestContext *Get(
       int num_threads = -1,
       CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY);
-  std::shared_ptr<GPUContext> gpu_context() const;
   Device *GetDevice(DeviceType device_type);
+#ifdef MACE_ENABLE_OPENCL
+  std::shared_ptr<GPUContext> gpu_context() const;
   std::vector<MemoryType> opencl_mem_types();
   void SetOCLBufferTestFlag();
   void SetOCLImageTestFlag();
   void SetOCLImageAndBufferTestFlag();
+#endif
   utils::ThreadPool *thread_pool() {
     return thread_pool_.get();
   }
@@ -93,8 +98,10 @@ class OpTestContext {
                 CPUAffinityPolicy cpu_affinity_policy);
   MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext);
 
+#ifdef MACE_ENABLE_OPENCL
   std::shared_ptr<GPUContext> gpu_context_;
   std::vector<MemoryType> opencl_mem_types_;
+#endif
   std::map<DeviceType, std::unique_ptr<Device>> device_map_;
   std::unique_ptr<utils::ThreadPool> thread_pool_;
 };
@@ -424,7 +431,9 @@ class OpsTestBase : public ::testing::Test {
   }
 
   virtual void TearDown() {
+#ifdef MACE_ENABLE_OPENCL
     OpTestContext::Get()->SetOCLImageTestFlag();
+#endif
   }
 };
 
diff --git a/mace/ops/strided_slice.cc b/mace/ops/strided_slice.cc
index c10914f2..4218d1f7 100644
--- a/mace/ops/strided_slice.cc
+++ b/mace/ops/strided_slice.cc
@@ -199,7 +199,7 @@ class StridedSliceOp : public Operation {
             strides_indices_vec[d] > 0 ? 0 : -1,
             strides_indices_vec[d] > 0 ? dim_len : dim_len - 1};
 
-        auto format_indices = [valid_range, d, dim_len](index_t indice) {
+        auto format_indices = [valid_range, dim_len](index_t indice) {
           index_t forward = indice < 0 ? indice + dim_len : indice;
           return Clamp(forward, valid_range[0], valid_range[1]);
         };
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 629129cf..f0c6d64f 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -19,6 +19,9 @@ build:android --linkopt=-lm
 build:android --distinct_host_configuration=true
 build:android --crosstool_top=//external:android/crosstool
 build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:android --copt -Wall
+build:android --copt -Wno-mismatched-tags
+build:android --copt -Wno-missing-braces
 
 # Linux host build, --config linux
 build:linux --define linux=true
-- 
GitLab