diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b13f7122f65beefa99a71d1849b8a569978ccad3..9fd34aa90e8e3f939a5bc2351a021f8c43f6b3ca 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,7 +9,7 @@ cpplint:
     - master
   script:
     - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-    - python cpplint.py --root=mace --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
+    - python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
 
 ops_test:
   stage: ops_test
diff --git a/WORKSPACE b/WORKSPACE
index 783ddc7acf36eea8ebeeafd8ea063e03958e663d..1f22f3276fe2cc74282ce2acf438d341298a4170 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -55,7 +55,7 @@ new_git_repository(
     name = "opencl_clhpp",
     build_file = "mace/third_party/opencl-clhpp.BUILD",
     commit = "4c6f7d56271727e37fb19a9b47649dd175df2b12",
-    remote = "https://github.com/KhronosGroup/OpenCL-CLHPP.git",
+    remote = "http://v9.git.n.xiaomi.com/deep-computing/OpenCL-CLHPP-Mirror.git",
 )
 
 new_git_repository(
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 498d1ea006d692ac10ec211fb9e99c6b25b4986d..6d83d0e417214f99a00d7c101c07016b89c7b798 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -117,9 +117,3 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
 # Download tensorflow tools
 RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \
     chmod +x transform_graph
-
-# Install gitlab runner
-RUN curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-ci-multi-runner/script.deb.sh | bash
-RUN apt-get install gitlab-ci-multi-runner
-
-ENTRYPOINT gitlab-runner run
diff --git a/docker/gitlab-runner/Dockerfile b/docker/gitlab-runner/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..13984e4cde4c1caea4f96e3b0c36b4f8d1f9e60e
--- /dev/null
+++ b/docker/gitlab-runner/Dockerfile
@@ -0,0 +1,12 @@
+FROM cr.d.xiaomi.net/mace/mace-dev:latest
+
+# Update source
+# Looks like mirrors.163.com does not work in xiaomi network
+# RUN sed -i 's/http:\/\/archive\.ubuntu\.com\/ubuntu\//http:\/\/mirrors\.163\.com\/ubuntu\//g' /etc/apt/sources.list
+RUN apt-get update -y
+
+# Install gitlab runner
+RUN curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-ci-multi-runner/script.deb.sh | bash
+RUN apt-get install gitlab-ci-multi-runner
+
+ENTRYPOINT gitlab-runner run
diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD
index 2cdc00e180175be1237545a8b1d4e7cbfb876633..9334e7473acb0e706cdd34840228d16cc38578b3 100644
--- a/mace/benchmark/BUILD
+++ b/mace/benchmark/BUILD
@@ -16,7 +16,8 @@ cc_library(
     hdrs = ["stat_summarizer.h"],
     linkstatic = 1,
     deps = [
-        "//mace/core",
+        "//mace/public",
+        "//mace/utils",
     ],
 )
 
diff --git a/mace/core/allocator.h b/mace/core/allocator.h
index eebbb32bec2edf4424eeff552aa21e69123a80ae..a9f76fcab7d91d8c81a76d181f9515998fb2f72a 100644
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
@@ -7,6 +7,9 @@
 #define MACE_CORE_ALLOCATOR_H_
 
 #include <malloc.h>
+#include <map>
+#include <limits>
+#include <vector>
 
 #include "mace/core/registry.h"
 #include "mace/core/types.h"
@@ -81,7 +84,7 @@ class CPUAllocator : public Allocator {
     free(data);
   };
   void *Map(void *buffer, size_t offset, size_t nbytes) const override {
-    return (char *)buffer + offset;
+    return reinterpret_cast<char*>(buffer) + offset;
   }
   void *MapImage(void *buffer,
                  const std::vector<size_t> &image_shape,
diff --git a/mace/core/arg_helper.cc b/mace/core/arg_helper.cc
index 8b6d57fbba00ce55bd0a9bcace65fb9714379e25..207d2de91acc2a5b3efc55df95ff2b94a89c9f04 100644
--- a/mace/core/arg_helper.cc
+++ b/mace/core/arg_helper.cc
@@ -2,6 +2,9 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
+#include <string>
+#include <vector>
+
 #include "mace/core/arg_helper.h"
 #include "mace/utils/logging.h"
 
diff --git a/mace/core/arg_helper.h b/mace/core/arg_helper.h
index 25137804016c442c96dd88f683f810ff4288e5b9..ab8e14b99bf9baa078dd37129a3a089d4fb6096f 100644
--- a/mace/core/arg_helper.h
+++ b/mace/core/arg_helper.h
@@ -5,6 +5,8 @@
 #ifndef MACE_CORE_ARG_HELPER_H_
 #define MACE_CORE_ARG_HELPER_H_
 
+#include <string>
+#include <vector>
 #include <map>
 
 #include "mace/public/mace.h"
diff --git a/mace/core/buffer.h b/mace/core/buffer.h
index 38c577a582e4f2dcf668cad26fd54b82967836c7..e6d433e62dae922d62a4f554fb87b1fcfb2cb08f 100644
--- a/mace/core/buffer.h
+++ b/mace/core/buffer.h
@@ -6,6 +6,8 @@
 #define MACE_CORE_BUFFER_H_
 
 #include <vector>
+#include <functional>
+
 #include "mace/core/allocator.h"
 #include "mace/core/types.h"
 
@@ -14,7 +16,7 @@ namespace mace {
 class BufferBase {
  public:
   BufferBase() : size_(0) {}
-  BufferBase(index_t size) : size_(size) {}
+  explicit BufferBase(index_t size) : size_(size) {}
   virtual ~BufferBase() {}
 
   virtual void *buffer() = 0;
@@ -39,7 +41,7 @@ class BufferBase {
 
   virtual bool OnHost() const = 0;
 
-  virtual index_t offset() const { return 0; };
+  virtual index_t offset() const { return 0; }
 
   template <typename T>
   const T *data() const {
@@ -59,7 +61,7 @@ class BufferBase {
 
 class Buffer : public BufferBase {
  public:
-  Buffer(Allocator *allocator)
+  explicit Buffer(Allocator *allocator)
       : BufferBase(0),
         allocator_(allocator),
         buf_(nullptr),
@@ -93,7 +95,7 @@ class Buffer : public BufferBase {
   void *buffer() {
     MACE_CHECK_NOTNULL(buf_);
     return buf_;
-  };
+  }
 
   const void *raw_data() const {
     if (OnHost()) {
@@ -129,7 +131,7 @@ class Buffer : public BufferBase {
   void Map(std::vector<size_t> *pitch) {
     MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
     mapped_buf_ = Map(0, size_, pitch);
-  };
+  }
 
   void UnMap() {
     UnMap(mapped_buf_);
@@ -151,7 +153,7 @@ class Buffer : public BufferBase {
   void Copy(void *src, index_t offset, index_t length) {
     MACE_CHECK_NOTNULL(mapped_buf_);
     MACE_CHECK(length <= size_, "out of buffer");
-    memcpy(mapped_buf_, (char *)src + offset, length);
+    memcpy(mapped_buf_, reinterpret_cast<char*>(src) + offset, length);
   }
 
   bool OnHost() const { return allocator_->OnHost(); }
@@ -197,7 +199,7 @@ class Image : public BufferBase {
   void *buffer() {
     MACE_CHECK_NOTNULL(buf_);
     return buf_;
-  };
+  }
 
   const void *raw_data() const {
     MACE_CHECK_NOTNULL(mapped_buf_);
@@ -227,12 +229,12 @@ class Image : public BufferBase {
     MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
     MACE_CHECK_NOTNULL(pitch);
     mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch);
-  };
+  }
 
   void UnMap() {
     UnMap(mapped_buf_);
     mapped_buf_ = nullptr;
-  };
+  }
 
   void Resize(index_t size) { MACE_NOT_IMPLEMENTED; }
 
@@ -276,12 +278,12 @@ class BufferSlice : public BufferBase {
   void *buffer() {
     MACE_CHECK_NOTNULL(buffer_);
     return buffer_->buffer();
-  };
+  }
 
   const void *raw_data() const {
     if (OnHost()) {
       MACE_CHECK_NOTNULL(buffer_);
-      return (char *)buffer_->raw_data() + offset_;
+      return reinterpret_cast<const char*>(buffer_->raw_data()) + offset_;
     } else {
       MACE_CHECK_NOTNULL(mapped_buf_);
       return mapped_buf_;
@@ -304,13 +306,13 @@ class BufferSlice : public BufferBase {
     MACE_CHECK_NOTNULL(buffer_);
     MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null");
     mapped_buf_ = buffer_->Map(offset_, length_, pitch);
-  };
+  }
 
   void UnMap() {
     MACE_CHECK_NOTNULL(mapped_buf_);
     buffer_->UnMap(mapped_buf_);
     mapped_buf_ = nullptr;
-  };
+  }
 
   void Resize(index_t size) { MACE_NOT_IMPLEMENTED; }
 
@@ -326,6 +328,6 @@ class BufferSlice : public BufferBase {
   index_t offset_;
   index_t length_;
 };
-}
+}  // namespace mace
 
 #endif  // MACE_CORE_BUFFER_H_
diff --git a/mace/core/mace.cc b/mace/core/mace.cc
index e4d25c7d114dc5c9a28e457a6bb0888a887aa92a..f1f0d59a4716f738ee07c11598d68e4490ca0c31 100644
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -459,7 +459,7 @@ MaceEngine::~MaceEngine() {
     MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error");
     MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error");
   }
-};
+}
 
 bool MaceEngine::Run(const float *input,
                      const std::vector<index_t> &input_shape,
@@ -493,7 +493,6 @@ bool MaceEngine::Run(const float *input,
     auto shape = output_tensor->shape();
     int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1,
                                           std::multiplies<int64_t>());
-    // TODO: check for overflow exception.
     std::memcpy(output, output_tensor->data<float>(),
                 output_size * sizeof(float));
     return true;
@@ -530,7 +529,6 @@ bool MaceEngine::Run(const std::vector<MaceInputInfo> &inputs,
       int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1,
                                             std::multiplies<int64_t>());
       MACE_CHECK(!shape.empty()) << "Output's shape must greater than 0";
-      // TODO: check for overflow exception.
       std::memcpy(output.second, output_tensor->data<float>(),
                   output_size * sizeof(float));
     } else {
diff --git a/mace/core/net.cc b/mace/core/net.cc
index 2439a67fee31f1e402efe7aff4f1c87daf5ad6b2..02efc1a41ebcdde25df4b91ebe7e679d8e4434a1 100644
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -2,6 +2,8 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
+#include <utility>
+
 #include "mace/core/net.h"
 #include "mace/utils/memory_logging.h"
 #include "mace/utils/timer.h"
diff --git a/mace/core/net.h b/mace/core/net.h
index 3b625393cf6fef8fd8a6a8336a60233b2c5bf087..e14297222a933f73640fff3736664d8c0f1b1f84 100644
--- a/mace/core/net.h
+++ b/mace/core/net.h
@@ -5,6 +5,10 @@
 #ifndef MACE_CORE_NET_H_
 #define MACE_CORE_NET_H_
 
+#include <memory>
+#include <string>
+#include <vector>
+
 #include "mace/core/operator.h"
 #include "mace/public/mace.h"
 
diff --git a/mace/core/operator.cc b/mace/core/operator.cc
index c670d9aa729dc575a204eacf3789fd56675df4a9..ae6ca107e1f13e72958f401e88cdde5af6005d98 100644
--- a/mace/core/operator.cc
+++ b/mace/core/operator.cc
@@ -3,6 +3,9 @@
 //
 
 #include <sstream>
+#include <memory>
+#include <string>
+#include <vector>
 
 #include "mace/core/operator.h"
 
diff --git a/mace/core/operator.h b/mace/core/operator.h
index a163c0c89b5e4da2feba23232027474e8930f3a4..27afdadd3a3b4c8bfbbe613c36ca558025f7c606 100644
--- a/mace/core/operator.h
+++ b/mace/core/operator.h
@@ -2,8 +2,13 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
-#ifndef MACE_CORE_OPERATOR_H
-#define MACE_CORE_OPERATOR_H
+#ifndef MACE_CORE_OPERATOR_H_
+#define MACE_CORE_OPERATOR_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <map>
 
 #include "mace/core/arg_helper.h"
 #include "mace/core/future.h"
@@ -100,7 +105,7 @@ class Operator : public OperatorBase {
       }
     }
   }
-  virtual bool Run(StatsFuture *future) override = 0;
+  bool Run(StatsFuture *future) override = 0;
   ~Operator() noexcept override {}
 };
 
@@ -150,7 +155,7 @@ class OperatorRegistry {
       RegistryType;
   OperatorRegistry();
   ~OperatorRegistry() = default;
-  RegistryType *registry() { return &registry_; };
+  RegistryType *registry() { return &registry_; }
   std::unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
                                                Workspace *ws,
                                                DeviceType type,
@@ -171,4 +176,4 @@ MACE_DECLARE_REGISTRY(OpRegistry,
 
 }  // namespace mace
 
-#endif  // MACE_CORE_OPERATOR_H
+#endif  // MACE_CORE_OPERATOR_H_
diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h
index 0299d2f4cf83d89f4871806e6b504985a345e374..80e7fa659bb3c566a47e4afea87dc40d114fba22 100644
--- a/mace/core/preallocated_pooled_allocator.h
+++ b/mace/core/preallocated_pooled_allocator.h
@@ -5,7 +5,10 @@
 #ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
 #define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
 
+#include <memory>
+#include <utility>
 #include <unordered_map>
+
 #include "mace/core/allocator.h"
 #include "mace/core/buffer.h"
 
diff --git a/mace/core/registry.h b/mace/core/registry.h
index 07eaa01b76b1ff2df005eadf46cce7c055b79c49..0e1e85d2e6c3d9bcfb6663ff82001206dc3de9e2 100644
--- a/mace/core/registry.h
+++ b/mace/core/registry.h
@@ -7,7 +7,7 @@
 
 #include <map>
 #include <memory>
-#include <mutex>
+#include <mutex>  // NOLINT(build/c++11)
 #include <string>
 #include <vector>
 
diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc
index 4c2cd851d4bd502a1096b8523a85d9e3cf61fcfd..555372ab8da152690237e12dd7f26a20c3db8468 100644
--- a/mace/core/runtime/cpu/cpu_runtime.cc
+++ b/mace/core/runtime/cpu/cpu_runtime.cc
@@ -2,19 +2,21 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
-#include "mace/public/mace.h"
-#include "mace/utils/logging.h"
 #include <omp.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+#include <vector>
 
+#include "mace/core/runtime/cpu/cpu_runtime.h"
+#include "mace/public/mace.h"
+#include "mace/utils/logging.h"
 namespace mace {
 
 namespace {
 
-static int GetCPUMaxFreq(int cpu_id) {
+int GetCPUMaxFreq(int cpu_id) {
   char path[64];
-  sprintf(path,
+  snprintf(path, sizeof(path),
           "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
           cpu_id);
   FILE *fp = fopen(path, "rb");
@@ -26,24 +28,25 @@ static int GetCPUMaxFreq(int cpu_id) {
   return freq;
 }
 
-static void SortCPUIdsByMaxFreqAsc(std::vector<int> &cpu_ids) {
-  int cpu_count = cpu_ids.size();
+void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) {
+  MACE_CHECK_NOTNULL(cpu_ids);
+  int cpu_count = cpu_ids->size();
   std::vector<int> cpu_max_freq;
   cpu_max_freq.resize(cpu_count);
 
   // set cpu max frequency
   for (int i = 0; i < cpu_count; ++i) {
     cpu_max_freq[i] = GetCPUMaxFreq(i);
-    cpu_ids[i] = i;
+    (*cpu_ids)[i] = i;
   }
 
   // sort cpu ids by max frequency asc, bubble sort
   for (int i = 0; i < cpu_count - 1; ++i) {
     for (int j = i + 1; j < cpu_count; ++j) {
       if (cpu_max_freq[i] > cpu_max_freq[j]) {
-        int tmp = cpu_ids[i];
-        cpu_ids[i] = cpu_ids[j];
-        cpu_ids[j] = tmp;
+        int tmp = (*cpu_ids)[i];
+        (*cpu_ids)[i] = (*cpu_ids)[j];
+        (*cpu_ids)[j] = tmp;
 
         tmp = cpu_max_freq[i];
         cpu_max_freq[i] = cpu_max_freq[j];
@@ -53,11 +56,12 @@ static void SortCPUIdsByMaxFreqAsc(std::vector<int> &cpu_ids) {
   }
 }
 
-static void SetThreadAffinity(cpu_set_t mask) {
+void SetThreadAffinity(cpu_set_t mask) {
   int sys_call_res;
   pid_t pid = gettid();
 
-  // TODO: when set omp num threads to 1, sometiomes return EINVAL(22) error
+  // TODO(chenghui): when set omp num threads to 1,
+  // sometiomes return EINVAL(22) error.
   // https://linux.die.net/man/2/sched_setaffinity
   sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
   if (sys_call_res != 0) {
@@ -68,12 +72,11 @@ static void SetThreadAffinity(cpu_set_t mask) {
 }  // namespace
 
 void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
-
   int cpu_count = omp_get_num_procs();
   LOG(INFO) << "cpu_count: " << cpu_count;
   std::vector<int> sorted_cpu_ids;
   sorted_cpu_ids.resize(cpu_count);
-  SortCPUIdsByMaxFreqAsc(sorted_cpu_ids);
+  SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids);
 
   std::vector<int> use_cpu_ids;
   if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) {
@@ -92,7 +95,7 @@ void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
   // compute mask
   cpu_set_t mask;
   CPU_ZERO(&mask);
-  for (auto cpu_id: use_cpu_ids) {
+  for (auto cpu_id : use_cpu_ids) {
     CPU_SET(cpu_id, &mask);
   }
   LOG(INFO) << "use cpus mask: " << mask.__bits[0];
diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h
index 13dfd680236663e5af516b004306f864013fe3e4..f80ca1b824bcc097b2276ae253ca2b4d702fab66 100644
--- a/mace/core/runtime/cpu/cpu_runtime.h
+++ b/mace/core/runtime/cpu/cpu_runtime.h
@@ -3,8 +3,8 @@
 //
 
 
-#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
-#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
+#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
+#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
 
 #include "mace/public/mace.h"
 
@@ -14,4 +14,4 @@ void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option);
 
 }
 
-#endif //MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
+#endif  // MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
index 2828ffa0e2056c180abbf93b6f35bb36edfcc851..138fd933a0848abb6b5300a5c97b0cc62e659f90 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
+++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
@@ -3,8 +3,11 @@
 //
 
 #include <sys/time.h>
-#include <thread>
+#include <thread>  // NOLINT(build/c++11)
 #include <vector>
+#include <unordered_map>
+#include <string>
+#include <utility>
 
 #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
 #include "mace/core/runtime/hexagon/hexagon_nn_ops.h"
@@ -324,7 +327,7 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
   MACE_ASSERT(output_bytes == output_tensor->raw_size(),
               "wrong output bytes inferred.");
   return res == 0;
-};
+}
 
 bool HexagonControlWrapper::ExecuteGraphNew(
     const std::vector<Tensor> &input_tensors,
@@ -374,7 +377,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(
   delete[] inputs;
   delete[] outputs;
   return res == 0;
-};
+}
 
 bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor,
                                                     Tensor *output_tensor) {
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
index 8cb3b359010b18e3afc350179d38060d7aa07b76..01f69749fcf050c83b953213f76122d676bde99c 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h
+++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
@@ -2,8 +2,8 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
-#ifndef MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
-#define MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
 
 #include <vector>
 
@@ -16,7 +16,7 @@ namespace mace {
 
 class HexagonControlWrapper {
  public:
-  HexagonControlWrapper(){};
+  HexagonControlWrapper() {}
   int GetVersion();
   bool Config();
   bool Init();
@@ -53,6 +53,6 @@ class HexagonControlWrapper {
 
   DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper);
 };
-}
+}  // namespace mace
 
-#endif  // MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
diff --git a/mace/core/runtime/hexagon/hexagon_controller.h b/mace/core/runtime/hexagon/hexagon_controller.h
index 0e7d7596ca6e36218e7d6ed7e82112e63607dd97..11f77c1d834e7a6de42574e8853fb54e6c078381 100644
--- a/mace/core/runtime/hexagon/hexagon_controller.h
+++ b/mace/core/runtime/hexagon/hexagon_controller.h
@@ -1,5 +1,9 @@
-#ifndef MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
-#define MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_
 
 #include "mace/core/runtime/hexagon/hexagon_nn.h"
 
@@ -18,4 +22,5 @@ int hexagon_controller_DeInitHexagon();
 }
 #endif  // __cplusplus
 
-#endif  // MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
\ No newline at end of file
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_
+
diff --git a/mace/core/runtime/hexagon/hexagon_nn.h b/mace/core/runtime/hexagon/hexagon_nn.h
index 0baafd8cc7956fed751120504a876b4da4c06b56..7bb5bdefbe2fef4dfd13283969c69f4fd66eff2a 100644
--- a/mace/core/runtime/hexagon/hexagon_nn.h
+++ b/mace/core/runtime/hexagon/hexagon_nn.h
@@ -1,8 +1,43 @@
-#ifndef _HEXAGON_NN_H
-#define _HEXAGON_NN_H
+/*
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_
 #ifndef __QAIC_HEADER
 #define __QAIC_HEADER(ff) ff
-#endif  //__QAIC_HEADER
+#endif  // __QAIC_HEADER
 
 #ifndef __QAIC_HEADER_EXPORT
 #define __QAIC_HEADER_EXPORT
@@ -14,7 +49,7 @@
 
 #ifndef __QAIC_IMPL
 #define __QAIC_IMPL(ff) ff
-#endif  //__QAIC_IMPL
+#endif  // __QAIC_IMPL
 
 #ifndef __QAIC_IMPL_EXPORT
 #define __QAIC_IMPL_EXPORT
@@ -186,4 +221,4 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(
 #ifdef __cplusplus
 }
 #endif
-#endif  //_HEXAGON_NN_H
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_
diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_nn_ops.h
index 8704ce8068d9b38de4bc43a815a700b55eb2b480..faad9c4d0f256084ca3b651c96feab9b7767467e 100644
--- a/mace/core/runtime/hexagon/hexagon_nn_ops.h
+++ b/mace/core/runtime/hexagon/hexagon_nn_ops.h
@@ -2,10 +2,12 @@
 // Copyright (c) 2018 XiaoMi All rights reserved.
 //
 
-#ifndef LIBMACE_HEXAGON_NN_OPS_H
-#define LIBMACE_HEXAGON_NN_OPS_H
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
 
+#include <string>
 #include <unordered_map>
+
 #include "mace/utils/logging.h"
 
 namespace mace {
@@ -15,7 +17,7 @@ namespace mace {
 typedef enum op_type_enum {
 #define DEF_OP(NAME, ...) OP_##NAME,
 
-#include "mace/core/runtime/hexagon/ops.h"
+#include "mace/core/runtime/hexagon/ops.h"  // NOLINT(build/include)
   NN_OPS_MAX
 
 #undef DEF_OP
@@ -26,7 +28,7 @@ class OpMap {
   void Init() {
 #define DEF_OP(NAME) op_map_[#NAME] = OP_##NAME;
 
-#include "mace/core/runtime/hexagon/ops.h"
+#include "mace/core/runtime/hexagon/ops.h"  // NOLINT(build/include)
 
 #undef DEF_OP
   }
@@ -45,4 +47,4 @@ class OpMap {
 };
 }  // namespace mace
 
-#endif  // LIBMACE_HEXAGON_NN_OPS_H
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
diff --git a/mace/core/runtime/hexagon/ops.h b/mace/core/runtime/hexagon/ops.h
index 55b40413fdf4cbbca9ae69b93991aed9e5fe5a89..a01e71acab6cdae2fd96020d844767554307fe3d 100644
--- a/mace/core/runtime/hexagon/ops.h
+++ b/mace/core/runtime/hexagon/ops.h
@@ -1,3 +1,38 @@
+/*
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
 /*
  * You probably want to
  *
@@ -42,6 +77,8 @@
  *
  * otherwise the interface becomes incompatible.
  */
+// NOLINT(build/header_guard)
+
 DEF_OP(INPUT)
 DEF_OP(OUTPUT)
 DEF_OP(Nop)
diff --git a/mace/core/runtime/hexagon/quantize.cc b/mace/core/runtime/hexagon/quantize.cc
index c4548bcbc4902d03ce51acd79648c6299772dcfe..7330424aab6d893c84cee0915078995148c93ba9 100644
--- a/mace/core/runtime/hexagon/quantize.cc
+++ b/mace/core/runtime/hexagon/quantize.cc
@@ -2,6 +2,8 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
+#include <algorithm>
+
 #include "mace/core/runtime/hexagon/quantize.h"
 
 namespace mace {
@@ -93,4 +95,5 @@ void Quantizer::DeQuantize(const Tensor &in_tensor,
   }
 }
 
-}  // namespace mace
\ No newline at end of file
+
+}  // namespace mace
diff --git a/mace/core/runtime/hexagon/quantize.h b/mace/core/runtime/hexagon/quantize.h
index 216e0c6b5fecf766b03ecfe2bfa57a951f0d671e..8e98f3beddf801dfa9ec0b38a547d93761f6ec7f 100644
--- a/mace/core/runtime/hexagon/quantize.h
+++ b/mace/core/runtime/hexagon/quantize.h
@@ -2,8 +2,8 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
-#ifndef MACE_DSP_UTIL_QUANTIZE_H_
-#define MACE_DSP_UTIL_QUANTIZE_H_
+#ifndef MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
+#define MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
 
 #include "mace/core/tensor.h"
 
@@ -40,6 +40,6 @@ class Quantizer {
   DISABLE_COPY_AND_ASSIGN(Quantizer);
 };
 
-}  // mace
+}  // namespace mace
 
-#endif  // MACE_DSP_UTIL_QUANTIZE_H_
+#endif  // MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 66c9c19571b24f4a6d85422ee1e73bfc52c7cd39..1d7d5debf9dfefaeab59205d6de67d29867d2c35 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -5,6 +5,10 @@
 #ifndef MACE_CORE_TENSOR_H_
 #define MACE_CORE_TENSOR_H_
 
+#include <string>
+#include <vector>
+#include <functional>
+
 #include "mace/core/buffer.h"
 #include "mace/core/preallocated_pooled_allocator.h"
 #include "mace/core/runtime/opencl/cl2_header.h"
@@ -60,7 +64,7 @@ inline std::ostream &operator<<(std::ostream &os, signed char c) {
 inline std::ostream &operator<<(std::ostream &os, unsigned char c) {
   return os << static_cast<unsigned int>(c);
 }
-}
+}  // namespace numerical_chars
 
 class Tensor {
  public:
@@ -69,7 +73,7 @@ class Tensor {
         dtype_(type),
         buffer_(nullptr),
         is_buffer_owner_(true),
-        name_(""){};
+        name_("") {}
 
   Tensor(BufferBase *buffer, DataType dtype)
     : dtype_(dtype),
@@ -240,7 +244,7 @@ class Tensor {
   inline void SetSourceOpName(const std::string name) { name_ = name; }
 
   inline void DebugPrint() const {
-    using namespace numerical_chars;
+    using namespace numerical_chars;  // NOLINT(build/namespaces)
     std::stringstream os;
     for (index_t i : shape_) {
       os << i << ", ";
@@ -262,7 +266,7 @@ class Tensor {
 
   class MappingGuard {
    public:
-    MappingGuard(const Tensor *tensor) : tensor_(tensor) {
+    explicit MappingGuard(const Tensor *tensor) : tensor_(tensor) {
       if (tensor_ != nullptr) {
         tensor_->buffer_->Map(&mapped_image_pitch_);
       }
@@ -301,6 +305,6 @@ class Tensor {
   DISABLE_COPY_AND_ASSIGN(Tensor);
 };
 
-}  // namespace tensor
+}  // namespace mace
 
 #endif  // MACE_CORE_TENSOR_H_
diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc
index 7dcf2a272ee342e0903b54da2f870b59d4ab8110..4a894a1d4ff39d421bc05ff1630759f4bf1bb4d7 100644
--- a/mace/core/testing/test_benchmark.cc
+++ b/mace/core/testing/test_benchmark.cc
@@ -6,7 +6,7 @@
 #include <cstdlib>
 
 #include <algorithm>
-#include <regex>
+#include <regex>  // NOLINT(build/c++11)
 #include <vector>
 
 #include "mace/core/testing/test_benchmark.h"
diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc
index 91302d7832f983f9801ccc129060176885d9fdda..76b7d15f8f25191d71c0052e5572a631eb7b6782 100644
--- a/mace/core/testing/test_benchmark_main.cc
+++ b/mace/core/testing/test_benchmark_main.cc
@@ -14,7 +14,6 @@ int main(int argc, char **argv) {
   mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH,
                             mace::GPUPriorityHint::PRIORITY_HIGH);
 
-  // TODO Use gflags
   if (argc == 2) {
     mace::testing::Benchmark::Run(argv[1]);
   } else {
diff --git a/mace/core/types.h b/mace/core/types.h
index 5eb7b536a5784df4160bed080f48feaa30efb4cc..e7a078f625fbaf869cdfbae50dcaf0be7b3b9054 100644
--- a/mace/core/types.h
+++ b/mace/core/types.h
@@ -6,6 +6,7 @@
 #define MACE_CORE_TYPES_H_
 
 #include <cstdint>
+#include <string>
 
 #include "mace/public/mace.h"
 #include "include/half.hpp"
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index 2cb5e237ab7c81e72c01df0f4850a9d3c5583389..1aabb5de70177036c054e4a2f675a0c2abfa42cb 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -4,6 +4,7 @@
 
 #include <string>
 #include <vector>
+#include <utility>
 
 #include "mace/core/arg_helper.h"
 #include "mace/core/workspace.h"
@@ -52,16 +53,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
   unsigned char *model_data_ptr = nullptr;
   for (auto &const_tensor : net_def.tensors()) {
     if (model_data_ptr == nullptr ||
-        reinterpret_cast<long long>(const_tensor.data()) <
-            reinterpret_cast<long long>(model_data_ptr)) {
+        reinterpret_cast<int64_t>(const_tensor.data()) <
+            reinterpret_cast<int64_t>(model_data_ptr)) {
       model_data_ptr = const_cast<unsigned char *>(const_tensor.data());
     }
   }
   for (auto &const_tensor : net_def.tensors()) {
     model_data_size = std::max(
         model_data_size,
-        static_cast<index_t>((reinterpret_cast<long long>(const_tensor.data()) -
-                              reinterpret_cast<long long>(model_data_ptr)) +
+        static_cast<index_t>((reinterpret_cast<int64_t>(const_tensor.data()) -
+                              reinterpret_cast<int64_t>(model_data_ptr)) +
                              const_tensor.data_size() *
                                  GetEnumTypeSize(const_tensor.data_type())));
   }
@@ -89,7 +90,8 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
       dims.push_back(d);
     }
 
-    index_t offset = (long long)const_tensor.data() - (long long)model_data_ptr;
+    index_t offset = reinterpret_cast<int64_t>(const_tensor.data())
+        - reinterpret_cast<int64_t>(model_data_ptr);
     std::unique_ptr<Tensor> tensor(
         new Tensor(BufferSlice(tensor_buffer_.get(), offset,
                                const_tensor.data_size() *
@@ -116,7 +118,7 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
   // As DSP may have different data output type for each op,
   // we stick to the same concept.
   for (auto &op : net_def.op()) {
-    if (! op.mem_id().empty()){
+    if (!op.mem_id().empty()) {
       const DataType op_dtype = static_cast<DataType>(
           ArgumentHelper::GetSingleArgument<OperatorDef, int>(
               op, "T", static_cast<int>(DT_FLOAT)));
@@ -142,11 +144,14 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
         std::unique_ptr<Tensor> tensor
             (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
         tensor->SetSourceOpName(op.name());
-        VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")" << "; Mem: "
-                << mem_ids[i] << "; Image shape: "
-                << dynamic_cast<Image *>(tensor->UnderlyingBuffer())->image_shape()[0]
+        VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
+                << " Mem: "  << mem_ids[i]
+                << " Image shape: "
+                << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
+                    ->image_shape()[0]
                 << ", "
-                << dynamic_cast<Image *>(tensor->UnderlyingBuffer())->image_shape()[1];
+                << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
+                    ->image_shape()[1];
         tensor_map_[op.output(i)] = std::move(tensor);
       }
     }
diff --git a/mace/core/workspace.h b/mace/core/workspace.h
index 5e990d8210321d829f18551bd4fbe63c70e750b3..1e1012672c30d388fe34ff645b50ed36a292c16b 100644
--- a/mace/core/workspace.h
+++ b/mace/core/workspace.h
@@ -5,6 +5,11 @@
 #ifndef MACE_CORE_WORKSPACE_H_
 #define MACE_CORE_WORKSPACE_H_
 
+#include <map>
+#include <string>
+#include <vector>
+#include <memory>
+
 #include "mace/core/preallocated_pooled_allocator.h"
 #include "mace/core/tensor.h"
 #include "mace/public/mace.h"
diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py
index 2e5716976b5a8cafdd22dceee0785b88a199bc11..8753b2700157416f9f078a8ee6a14b64c8fec718 100644
--- a/mace/python/tools/memory_optimizer.py
+++ b/mace/python/tools/memory_optimizer.py
@@ -43,6 +43,9 @@ class MemoryOptimizer(object):
       mem_size[1] = output_shape[0] * output_shape[1]
     return mem_size
 
+  def mem_area(self, memory_size):
+    return memory_size[0] * memory_size[1]
+
   def optimize(self):
     for op in self.net_def.op:
       if self.is_buffer_image_op(op):
@@ -54,22 +57,34 @@ class MemoryOptimizer(object):
         print('WARNING: the number of output shape is not equal to the number of output.')
         return
       for i in range(len(op.output)):
-        if len(self.idle_mem) == 0:
-          # allocate new mem
+        op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
+        mem_id = -1
+        if len(self.idle_mem) > 0:
+          best_mem_candidate_id = -1
+          best_mem_candidate_delta_area = sys.maxint
+          best_mem_candidate_shape = []
+          for mid in self.idle_mem:
+            reuse_mem_size = self.mem_block[mid]
+            resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])]
+            delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size)
+            if delta_mem_area < best_mem_candidate_delta_area:
+              best_mem_candidate_id = mid
+              best_mem_candidate_delta_area = delta_mem_area
+              best_mem_candidate_shape = resize_mem_size
+
+          if best_mem_candidate_delta_area <= self.mem_area(op_mem_size):
+            # reuse
+            self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape
+            mem_id = best_mem_candidate_id
+            self.idle_mem.remove(mem_id)
+
+        if mem_id == -1:
           mem_id = self.total_mem_count
           self.total_mem_count += 1
-        else:
-          # reuse mem
-          mem_id = self.idle_mem.pop()
+          self.mem_block[mem_id] = op_mem_size
 
         op.mem_id.extend([mem_id])
         self.op_mem[op.output[i]] = mem_id
-        if mem_id not in self.mem_block:
-          self.mem_block[mem_id] = [0, 0]
-        mem_size = self.mem_block[mem_id]
-        op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
-        mem_size[0] = max(mem_size[0], op_mem_size[0])
-        mem_size[1] = max(mem_size[1], op_mem_size[1])
 
       # de-ref input tensor mem
       for ipt in op.input:
diff --git a/tools/benchmark.sh b/tools/benchmark.sh
index e050f5b42ebe0ceb036e8699f7fa0dde69578eb1..dbcc862c431a188b8bd55cd4fe93ba0703a9f296 100644
--- a/tools/benchmark.sh
+++ b/tools/benchmark.sh
@@ -29,7 +29,8 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then
 fi
 
 if [ x"$TARGET_ABI" == x"host" ]; then
-  bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
+  bazel build --verbose_failures -c opt --strip always \
+    //mace/benchmark:benchmark_model \
     --copt="-std=c++11" \
     --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
     --copt="-Werror=return-type" \
@@ -52,16 +53,18 @@ if [ x"$TARGET_ABI" == x"host" ]; then
       $OPTION_ARGS || exit 1
 
 else
-  bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
+  bazel build --verbose_failures -c opt --strip always \
+    //mace/benchmark:benchmark_model \
     --crosstool_top=//external:android/crosstool \
     --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
     --cpu=${TARGET_ABI} \
     --copt="-std=c++11" \
     --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
     --copt="-Werror=return-type" \
+    --copt="-DMACE_OBFUSCATE_LITERALS" \
     --copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \
-    --copt="-O3" \
     --define openmp=true \
+    --copt="-O3" \
     --define production=true || exit 1
 
   cp bazel-bin/mace/benchmark/benchmark_model $MODEL_OUTPUT_DIR
@@ -70,11 +73,14 @@ else
   IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}"
   for NAME in "${INPUT_NAMES[@]}";do
     FORMATTED_NAME=$(sed s/[^[:alnum:]]/_/g <<< ${NAME})
-    adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${FORMATTED_NAME} ${PHONE_DATA_DIR} || exit 1
+    adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${FORMATTED_NAME} \
+        ${PHONE_DATA_DIR} > /dev/null || exit 1
   done
-  adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/benchmark_model ${PHONE_DATA_DIR} || exit 1
+  adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/benchmark_model \
+      ${PHONE_DATA_DIR} > /dev/null || exit 1
   if [ "$EMBED_MODEL_DATA" = 0 ]; then
-    adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
+    adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data
+        ${PHONE_DATA_DIR} > /dev/null || exit 1
   fi
 
   adb -s $DEVICE_ID </dev/null shell \