set clang-format to BinPackParameters=false

291a5ee6 · Liangliang He · 578b382a · 291a5ee6 · 291a5ee6 · 291a5ee6
25 changed file
--- a/mace/core/logging.h
+++ b/mace/core/logging.h
@@ -30,7 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
 }
 template <typename T, typename... Args>
-inline void MakeStringInternal(std::stringstream& ss, const T& t,
+inline void MakeStringInternal(std::stringstream& ss,
+                               const T& t,
                               const Args&... args) {
  MakeStringInternal(ss, t);
  MakeStringInternal(ss, args...);

--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -6,12 +6,14 @@
 namespace mace {
-NetBase::NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
+NetBase::NetBase(const std::shared_ptr<const NetDef>& net_def,
+                 Workspace* ws,
                 DeviceType type)
    : name_(net_def->name()) {}
 SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def,
-                     Workspace* ws, DeviceType type)
+                     Workspace* ws,
+                     DeviceType type)
    : NetBase(net_def, ws, type) {
  VLOG(1) << "Constructing SimpleNet " << net_def->name();
  for (int idx = 0; idx < net_def->op_size(); ++idx) {
@@ -37,14 +39,16 @@ bool SimpleNet::Run() {
  return true;
 }
-unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws,
+unique_ptr<NetBase> CreateNet(const NetDef& net_def,
+                              Workspace* ws,
                              DeviceType type) {
  std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def));
  return CreateNet(tmp_net_def, ws, type);
 }
 unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
-                              Workspace* ws, DeviceType type) {
+                              Workspace* ws,
+                              DeviceType type) {
  unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type));
  return net;
 }

--- a/mace/core/net.h
+++ b/mace/core/net.h
@@ -14,7 +14,8 @@ namespace mace {
 class NetBase {
 public:
-  NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
+  NetBase(const std::shared_ptr<const NetDef>& net_def,
+          Workspace* ws,
          DeviceType type);
  virtual ~NetBase() noexcept {}
@@ -30,7 +31,8 @@ class NetBase {
 class SimpleNet : public NetBase {
 public:
-  SimpleNet(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
+  SimpleNet(const std::shared_ptr<const NetDef>& net_def,
+            Workspace* ws,
            DeviceType type);
  bool Run() override;
@@ -41,10 +43,12 @@ class SimpleNet : public NetBase {
  DISABLE_COPY_AND_ASSIGN(SimpleNet);
 };
-unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws,
+unique_ptr<NetBase> CreateNet(const NetDef& net_def,
+                              Workspace* ws,
                              DeviceType type);
 unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
-                              Workspace* ws, DeviceType type);
+                              Workspace* ws,
+                              DeviceType type);
 }  //  namespace mace

--- a/mace/core/operator.cc
+++ b/mace/core/operator.cc
@@ -11,16 +11,21 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
  return &g_device_type_registry;
 }
-MACE_DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&,
+MACE_DEFINE_REGISTRY(CPUOperatorRegistry,
+                     OperatorBase,
+                     const OperatorDef&,
                     Workspace*);
 MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
-MACE_DEFINE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef&,
+MACE_DEFINE_REGISTRY(NEONOperatorRegistry,
+                     OperatorBase,
+                     const OperatorDef&,
                     Workspace*);
 MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry);
 unique_ptr<OperatorBase> CreateOperator(const OperatorDef& operator_def,
-                                        Workspace* ws, DeviceType type) {
+                                        Workspace* ws,
+                                        DeviceType type) {
  OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
  return registry->Create(operator_def.type(), operator_def, ws);
 }

--- a/mace/core/operator.h
+++ b/mace/core/operator.h
@@ -140,7 +140,9 @@ struct DeviceTypeRegisterer {
      type, &registry_function);                                   \
  }
-MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &,
+MACE_DECLARE_REGISTRY(CPUOperatorRegistry,
+                      OperatorBase,
+                      const OperatorDef &,
                      Workspace *);
 #define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
@@ -148,7 +150,9 @@ MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &,
 #define REGISTER_CPU_OPERATOR(name, ...) \
  MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
-MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &,
+MACE_DECLARE_REGISTRY(NEONOperatorRegistry,
+                      OperatorBase,
+                      const OperatorDef &,
                      Workspace *);
 #define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
@@ -157,7 +161,8 @@ MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &,
  MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
 unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
-                                        Workspace *ws, DeviceType type);
+                                        Workspace *ws,
+                                        DeviceType type);
 }  //  namespace mace

--- a/mace/core/proto_utils.cc
+++ b/mace/core/proto_utils.cc
@@ -316,7 +316,8 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
             ProtoDebugString(def));
 }
-bool GetFlagArgument(const OperatorDef& def, const string& name,
+bool GetFlagArgument(const OperatorDef& def,
+                     const string& name,
                     bool def_value) {
  for (const Argument& arg : def.arg()) {
    if (arg.name() == name) {
@@ -328,7 +329,8 @@ bool GetFlagArgument(const OperatorDef& def, const string& name,
  return def_value;
 }
-Argument* GetMutableArgument(const string& name, const bool create_if_missing,
+Argument* GetMutableArgument(const string& name,
+                             const bool create_if_missing,
                             OperatorDef* def) {
  for (int i = 0; i < def->arg_size(); ++i) {
    if (def->arg(i).name() == name) {

--- a/mace/core/proto_utils.h
+++ b/mace/core/proto_utils.h
@@ -107,7 +107,8 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
 template <class IterableInputs = std::initializer_list<string>,
          class IterableOutputs = std::initializer_list<string>,
          class IterableArgs = std::initializer_list<Argument>>
-OperatorDef CreateOperatorDef(const string& type, const string& name,
+OperatorDef CreateOperatorDef(const string& type,
+                              const string& name,
                              const IterableInputs& inputs,
                              const IterableOutputs& outputs,
                              const IterableArgs& args) {
@@ -130,7 +131,8 @@ OperatorDef CreateOperatorDef(const string& type, const string& name,
 // to specify args.
 template <class IterableInputs = std::initializer_list<string>,
          class IterableOutputs = std::initializer_list<string>>
-inline OperatorDef CreateOperatorDef(const string& type, const string& name,
+inline OperatorDef CreateOperatorDef(const string& type,
+                                     const string& name,
                                     const IterableInputs& inputs,
                                     const IterableOutputs& outputs) {
  return CreateOperatorDef(type, name, inputs, outputs,
@@ -153,7 +155,8 @@ class ArgumentHelper {
  }
  template <typename Def, typename T>
-  static T GetSingleArgument(const Def& def, const string& name,
+  static T GetSingleArgument(const Def& def,
+                             const string& name,
                             const T& default_value) {
    return ArgumentHelper(def).GetSingleArgument<T>(name, default_value);
  }
@@ -165,7 +168,8 @@ class ArgumentHelper {
  template <typename Def, typename T>
  static vector<T> GetRepeatedArgument(
-      const Def& def, const string& name,
+      const Def& def,
+      const string& name,
      const std::vector<T>& default_value = std::vector<T>()) {
    return ArgumentHelper(def).GetRepeatedArgument<T>(name, default_value);
  }
@@ -223,10 +227,12 @@ class ArgumentHelper {
 };
 const Argument& GetArgument(const OperatorDef& def, const string& name);
-bool GetFlagArgument(const OperatorDef& def, const string& name,
+bool GetFlagArgument(const OperatorDef& def,
+                     const string& name,
                     bool def_value = false);
-Argument* GetMutableArgument(const string& name, const bool create_if_missing,
+Argument* GetMutableArgument(const string& name,
+                             const bool create_if_missing,
                             OperatorDef* def);
 template <typename T>

--- a/mace/core/registry.h
+++ b/mace/core/registry.h
@@ -101,7 +101,8 @@ class Registerer {
 #define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...)                    \
  namespace {                                                                \
  static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
-      key, RegistryName(),                                                   \
+      key,                                                                   \
+      RegistryName(),                                                        \
      Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);                \
  }

--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -16,7 +16,8 @@ vector<string> Workspace::Tensors() const {
  return names;
 }
-Tensor* Workspace::CreateTensor(const string& name, Allocator* alloc,
+Tensor* Workspace::CreateTensor(const string& name,
+                                Allocator* alloc,
                                DataType type) {
  if (HasTensor(name)) {
    VLOG(1) << "Tensor " << name << " already exists. Skipping.";

--- a/mace/kernels/batch_norm.h
+++ b/mace/kernels/batch_norm.h
@@ -18,9 +18,15 @@ struct BatchNormFunctor {
  BatchNormFunctor(const float variance_epsilon)
      : variance_epsilon_(variance_epsilon) {}
-  void operator()(const T* input, const T* scale, const T* offset,
+  void operator()(const T* input,
-                  const T* mean, const T* var, const index_t n,
+                  const T* scale,
-                  const index_t channel, const index_t sample_size, T* output) {
+                  const T* offset,
+                  const T* mean,
+                  const T* var,
+                  const index_t n,
+                  const index_t channel,
+                  const index_t sample_size,
+                  T* output) {
    // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
    // The calculation formula for inference is
    // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
@@ -49,9 +55,15 @@ struct BatchNormFunctor {
 template <>
 void BatchNormFunctor<DeviceType::NEON, float>::operator()(
-    const float* input, const float* scale, const float* offset,
+    const float* input,
-    const float* mean, const float* var, const index_t n, const index_t channel,
+    const float* scale,
-    const index_t sample_size, float* output);
+    const float* offset,
+    const float* mean,
+    const float* var,
+    const index_t n,
+    const index_t channel,
+    const index_t sample_size,
+    float* output);
 }  //  namepsace kernels
 }  //  namespace mace

--- a/mace/kernels/conv_2d.h
+++ b/mace/kernels/conv_2d.h
@@ -102,8 +102,12 @@ class Conv2dFunctor {
 template <>
 void Conv2dFunctor<DeviceType::NEON, float>::operator()(
-    const float* input, const index_t* input_shape, const float* filter,
+    const float* input,
-    const index_t* filter_shape, const float* bias, float* output,
+    const index_t* input_shape,
+    const float* filter,
+    const index_t* filter_shape,
+    const float* bias,
+    float* output,
    const index_t* output_shape);
 }  //  namespace kernels

--- a/mace/kernels/conv_pool_2d_util.cc
+++ b/mace/kernels/conv_pool_2d_util.cc
@@ -9,8 +9,10 @@ namespace kernels {
 void CalcPaddingAndOutputSize(const index_t *input_shape,   // NCHW
                              const index_t *filter_shape,  // OIHW
-                              const int *dilations, const int *strides,
+                              const int *dilations,
-                              Padding padding, index_t *output_shape,
+                              const int *strides,
+                              Padding padding,
+                              index_t *output_shape,
                              int *padding_size) {
  MACE_CHECK(dilations[0] > 0 && dilations[1] > 0,
             "Invalid dilations, must >= 1");
@@ -69,8 +71,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape,   // NCHW
  output_shape[3] = output_width;
 }
-void ConstructInputWithPadding(const float *input, const index_t *input_shape,
+void ConstructInputWithPadding(const float *input,
-                               const int *paddings, Tensor *output_tensor) {
+                               const index_t *input_shape,
+                               const int *paddings,
+                               Tensor *output_tensor) {
  index_t batch = input_shape[0];
  index_t channels = input_shape[1];
  index_t height = input_shape[2];

--- a/mace/kernels/conv_pool_2d_util.h
+++ b/mace/kernels/conv_pool_2d_util.h
@@ -19,12 +19,16 @@ namespace kernels {
 void CalcPaddingAndOutputSize(const index_t *input_shape,   // NCHW
                              const index_t *filter_shape,  // OIHW
-                              const int *dilations, const int *strides,
+                              const int *dilations,
-                              Padding padding, index_t *output_shape,
+                              const int *strides,
+                              Padding padding,
+                              index_t *output_shape,
                              int *padding_size);
-void ConstructInputWithPadding(const float *input, const index_t *input_shape,
+void ConstructInputWithPadding(const float *input,
-                               const int *paddings, Tensor *output_tensor);
+                               const index_t *input_shape,
+                               const int *paddings,
+                               Tensor *output_tensor);
 }  //  namespace kernels
 }  //  namespace mace

--- a/mace/kernels/neon/batch_norm_neon.cc
+++ b/mace/kernels/neon/batch_norm_neon.cc
@@ -10,9 +10,15 @@ namespace kernels {
 template <>
 void BatchNormFunctor<DeviceType::NEON, float>::operator()(
-    const float* input, const float* scale, const float* offset,
+    const float* input,
-    const float* mean, const float* var, const index_t n, const index_t channel,
+    const float* scale,
-    const index_t sample_size, float* output) {
+    const float* offset,
+    const float* mean,
+    const float* var,
+    const index_t n,
+    const index_t channel,
+    const index_t sample_size,
+    float* output) {
  // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
  // The calculation formula for inference is
  // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +

--- a/mace/kernels/neon/conv_2d_neon.cc
+++ b/mace/kernels/neon/conv_2d_neon.cc
@@ -8,17 +8,26 @@
 namespace mace {
 namespace kernels {
-extern void Conv2dNeonK1x1S1(const float *input, const index_t *input_shape,
+extern void Conv2dNeonK1x1S1(const float *input,
-                             const float *filter, const float *bias,
+                             const index_t *input_shape,
-                             float *output, const index_t *output_shape);
+                             const float *filter,
+                             const float *bias,
+                             float *output,
+                             const index_t *output_shape);
-extern void Conv2dNeonK3x3S1(const float *input, const index_t *input_shape,
+extern void Conv2dNeonK3x3S1(const float *input,
-                             const float *filter, const float *bias,
+                             const index_t *input_shape,
-                             float *output, const index_t *output_shape);
+                             const float *filter,
+                             const float *bias,
+                             float *output,
+                             const index_t *output_shape);
-extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape,
+extern void Conv2dNeonK5x5S1(const float *input,
-                             const float *filter, const float *bias,
+                             const index_t *input_shape,
-                             float *output, const index_t *output_shape);
+                             const float *filter,
+                             const float *bias,
+                             float *output,
+                             const index_t *output_shape);
 template <>
 void Conv2dFunctor<DeviceType::NEON,

--- a/mace/kernels/neon/conv_2d_neon_3x3.cc
+++ b/mace/kernels/neon/conv_2d_neon_3x3.cc
@@ -60,7 +60,8 @@ void Conv2dNeonK3x3S1(const float* input,  // NCHW
            float32x4_t sum0 = vdupq_n_f32(.0f);
            float32x4_t sum1 = vdupq_n_f32(.0f);
            float32x4_t row0_ext_0 = vld1q_f32(row[0]);  // 0123
-            float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize);  // 4567
+            float32x4_t row0_latter =
+                vld1q_f32(row[0] + kRegisterSize);  // 4567
            float32x4_t row0_ext_1 =
                vextq_f32(row0_ext_0, row0_latter, 1);  // 1234
            float32x4_t row0_ext_2 =
@@ -71,7 +72,8 @@ void Conv2dNeonK3x3S1(const float* input,  // NCHW
            sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
            float32x4_t row1_ext_0 = vld1q_f32(row[1]);  // 0123
-            float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize);  // 4567
+            float32x4_t row1_latter =
+                vld1q_f32(row[1] + kRegisterSize);  // 4567
            float32x4_t row1_ext_1 =
                vextq_f32(row1_ext_0, row1_latter, 1);  // 1234
            float32x4_t row1_ext_2 =
@@ -158,7 +160,8 @@ void Conv2dNeonK3x3S1(const float* input,  // NCHW
          for (; count > 0; --count) {
            float32x4_t sum0 = vdupq_n_f32(.0f);
            float32x4_t row0_ext_0 = vld1q_f32(row[0]);  // 0123
-            float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize);  // 4567
+            float32x4_t row0_latter =
+                vld1q_f32(row[0] + kRegisterSize);  // 4567
            float32x4_t row0_ext_1 =
                vextq_f32(row0_ext_0, row0_latter, 1);  // 1234
            float32x4_t row0_ext_2 =
@@ -169,7 +172,8 @@ void Conv2dNeonK3x3S1(const float* input,  // NCHW
            sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
            float32x4_t row1_ext_0 = vld1q_f32(row[1]);  // 0123
-            float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize);  // 4567
+            float32x4_t row1_latter =
+                vld1q_f32(row[1] + kRegisterSize);  // 4567
            float32x4_t row1_ext_1 =
                vextq_f32(row1_ext_0, row1_latter, 1);  // 1234
            float32x4_t row1_ext_2 =

--- a/mace/kernels/neon/max_pooling_neon_2x2.cc
+++ b/mace/kernels/neon/max_pooling_neon_2x2.cc
@@ -11,8 +11,10 @@
 namespace mace {
 namespace kernels {
-void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
+void PoolingMaxNeonK2x2S2x2(const float *input,
-                            float *output, const index_t *out_shape,
+                            const index_t *in_shape,
+                            float *output,
+                            const index_t *out_shape,
                            const int *paddings) {
  index_t batch = in_shape[0];
  index_t channels = in_shape[1];
@@ -101,8 +103,10 @@ void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
 }
 // assume the input has already been padded
-void PoolingMaxNeonK2x2S2x2Padded(const float *input, const index_t *in_shape,
+void PoolingMaxNeonK2x2S2x2Padded(const float *input,
-                                  float *output, const index_t *out_shape) {
+                                  const index_t *in_shape,
+                                  float *output,
+                                  const index_t *out_shape) {
  index_t batch = in_shape[0];
  index_t channels = in_shape[1];
  index_t in_height = in_shape[2];

--- a/mace/kernels/neon/max_pooling_neon_3x3.cc
+++ b/mace/kernels/neon/max_pooling_neon_3x3.cc
@@ -11,8 +11,10 @@
 namespace mace {
 namespace kernels {
-void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
+void PoolingMaxNeonK3x3S2x2(const float *input,
-                            float *output, const index_t *out_shape,
+                            const index_t *in_shape,
+                            float *output,
+                            const index_t *out_shape,
                            const int *paddings) {
  index_t batch = in_shape[0];
  index_t channels = in_shape[1];
@@ -127,8 +129,10 @@ void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
 }
 // assume the input has already been padded
-void PoolingMaxNeonK3x3S2x2Padded(const float *input, const index_t *in_shape,
+void PoolingMaxNeonK3x3S2x2Padded(const float *input,
-                                  float *output, const index_t *out_shape) {
+                                  const index_t *in_shape,
+                                  float *output,
+                                  const index_t *out_shape) {
  index_t batch = in_shape[0];
  index_t channels = in_shape[1];
  index_t in_height = in_shape[2];

--- a/mace/kernels/neon/pooling_neon.cc
+++ b/mace/kernels/neon/pooling_neon.cc
@@ -9,26 +9,34 @@
 namespace mace {
 namespace kernels {
-extern void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
+extern void PoolingMaxNeonK2x2S2x2(const float *input,
-                                   float *output, const index_t *out_shape,
+                                   const index_t *in_shape,
+                                   float *output,
+                                   const index_t *out_shape,
                                   const int *paddings);
-extern void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
+extern void PoolingMaxNeonK3x3S2x2(const float *input,
-                                   float *output, const index_t *out_shape,
+                                   const index_t *in_shape,
+                                   float *output,
+                                   const index_t *out_shape,
                                   const int *paddings);
 #ifdef __COPY_MAKE_PADDING
 extern void PoolingMaxNeonK2x2S2x2Padded(const float *input,
-                                         const index_t *in_shape, float *output,
+                                         const index_t *in_shape,
+                                         float *output,
                                         const index_t *out_shape);
 extern void PoolingMaxNeonK3x3S2x2Padded(const float *input,
-                                         const index_t *in_shape, float *output,
+                                         const index_t *in_shape,
+                                         float *output,
                                         const index_t *out_shape);
 #endif
 template <>
 void PoolingFunctor<DeviceType::NEON, float>::operator()(
-    const float *input, const index_t *input_shape, float *output,
+    const float *input,
+    const index_t *input_shape,
+    float *output,
    const index_t *output_shape) {
  if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 &&
      strides_[1] == 2 && pooling_type_ == MAX) {

--- a/mace/kernels/pooling.h
+++ b/mace/kernels/pooling.h
@@ -20,15 +20,20 @@ namespace kernels {
 template <DeviceType D, typename T>
 class PoolingFunctor {
 public:
-  PoolingFunctor(const PoolingType pooling_type, const int *kernels,
+  PoolingFunctor(const PoolingType pooling_type,
-                 const int *strides, const int *paddings, const int *dilations)
+                 const int *kernels,
+                 const int *strides,
+                 const int *paddings,
+                 const int *dilations)
      : pooling_type_(pooling_type),
        kernels_(kernels),
        strides_(strides),
        paddings_(paddings),
        dilations_(dilations) {}
-  void operator()(const T *input, const index_t *input_shape, T *output,
+  void operator()(const T *input,
+                  const index_t *input_shape,
+                  T *output,
                  const index_t *output_shape) {
    index_t batch = output_shape[0];
    index_t channels = output_shape[1];
@@ -118,7 +123,9 @@ class PoolingFunctor {
 template <>
 void PoolingFunctor<DeviceType::NEON, float>::operator()(
-    const float *input, const index_t *input_shape, float *output,
+    const float *input,
+    const index_t *input_shape,
+    float *output,
    const index_t *output_shape);
 }  //  namespace kernels

--- a/mace/kernels/resize_bilinear.h
+++ b/mace/kernels/resize_bilinear.h
@@ -19,7 +19,8 @@ struct CachedInterpolation {
  float lerp;
 };
-inline float CalculateResizeScale(index_t in_size, index_t out_size,
+inline float CalculateResizeScale(index_t in_size,
+                                  index_t out_size,
                                  bool align_corners) {
  return (align_corners && out_size > 1)
             ? (in_size - 1) / static_cast<float>(out_size - 1)
@@ -40,21 +41,28 @@ inline void ComputeInterpolationWeights(const index_t out_size,
  }
 }
-inline float ComputeLerp(const float top_left, const float top_right,
+inline float ComputeLerp(const float top_left,
-                         const float bottom_left, const float bottom_right,
+                         const float top_right,
-                         const float x_lerp, const float y_lerp) {
+                         const float bottom_left,
+                         const float bottom_right,
+                         const float x_lerp,
+                         const float y_lerp) {
  const float top = top_left + (top_right - top_left) * x_lerp;
  const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
  return top + (bottom - top) * y_lerp;
 }
 template <typename T>
-void ResizeImage(const T *images, const index_t batch_size,
+void ResizeImage(const T *images,
-                 const index_t in_height, const index_t in_width,
+                 const index_t batch_size,
-                 const index_t out_height, const index_t out_width,
+                 const index_t in_height,
+                 const index_t in_width,
+                 const index_t out_height,
+                 const index_t out_width,
                 const index_t channels,
                 const std::vector<CachedInterpolation> &xs_vec,
-                 const std::vector<CachedInterpolation> &ys, float *output) {
+                 const std::vector<CachedInterpolation> &ys,
+                 float *output) {
  const index_t in_channel_size = in_height * in_width;
  const index_t in_batch_num_values = channels * in_channel_size;
  const index_t out_channel_size = out_height * out_width;
@@ -98,8 +106,13 @@ struct ResizeBilinearFunctor {
  ResizeBilinearFunctor(bool align_corners) : align_corners_(align_corners) {}
-  void operator()(const T *input, T *output, index_t n, index_t channels,
+  void operator()(const T *input,
-                  index_t in_height, index_t in_width, index_t out_height,
+                  T *output,
+                  index_t n,
+                  index_t channels,
+                  index_t in_height,
+                  index_t in_width,
+                  index_t out_height,
                  index_t out_width) {
    if (out_height == in_height && out_width == in_width) {
      std::copy(input, input + channels * in_height * in_width, output);

--- a/mace/ops/batch_norm_benchmark.cc
+++ b/mace/ops/batch_norm_benchmark.cc
@@ -8,8 +8,8 @@
 namespace mace {
 template <DeviceType D, typename T>
-static void BatchNorm(int iters, int batch, int channels, int height,
+static void BatchNorm(
-                      int width) {
+    int iters, int batch, int channels, int height, int width) {
  mace::testing::StopTiming();
  OpsTestNet net;

--- a/mace/ops/conv_2d_benchmark.cc
+++ b/mace/ops/conv_2d_benchmark.cc
@@ -12,8 +12,15 @@
 namespace mace {
 template <DeviceType D, typename T>
-static void Conv2d(int iters, int batch, int channels, int height, int width,
+static void Conv2d(int iters,
-                   int kernel_h, int kernel_w, int stride, Padding padding,
+                   int batch,
+                   int channels,
+                   int height,
+                   int width,
+                   int kernel_h,
+                   int kernel_w,
+                   int stride,
+                   Padding padding,
                   int output_channels) {
  mace::testing::StopTiming();

--- a/mace/ops/ops_test_util.h
+++ b/mace/ops/ops_test_util.h
@@ -44,7 +44,8 @@ class OpsTestNet {
  OpsTestNet() {}
  template <typename T>
-  void AddInputFromArray(const char *name, const std::vector<index_t> &shape,
+  void AddInputFromArray(const char *name,
+                         const std::vector<index_t> &shape,
                         const std::vector<T> &data) {
    Tensor *input =
        ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
@@ -55,7 +56,8 @@ class OpsTestNet {
  }
  template <typename T>
-  void AddRepeatedInput(const char *name, const std::vector<index_t> &shape,
+  void AddRepeatedInput(const char *name,
+                        const std::vector<index_t> &shape,
                        const T data) {
    Tensor *input =
        ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
@@ -66,7 +68,8 @@ class OpsTestNet {
  }
  template <typename T>
-  void AddRandomInput(const char *name, const std::vector<index_t> &shape,
+  void AddRandomInput(const char *name,
+                      const std::vector<index_t> &shape,
                      bool positive = false) {
    Tensor *input =
        ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
@@ -84,7 +87,8 @@ class OpsTestNet {
  }
  template <typename T>
-  void AddFixedInput(const char *name, const std::vector<index_t> &shape,
+  void AddFixedInput(const char *name,
+                     const std::vector<index_t> &shape,
                     T value) {
    Tensor *input =
        ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());

--- a/mace/ops/pooling_benchmark.cc
+++ b/mace/ops/pooling_benchmark.cc
@@ -12,8 +12,14 @@ using namespace mace;
 using namespace mace::kernels;
 template <DeviceType D>
-static void Pooling(int iters, int batch, int channels, int height, int width,
+static void Pooling(int iters,
-                    int kernel, int stride, Padding padding,
+                    int batch,
+                    int channels,
+                    int height,
+                    int width,
+                    int kernel,
+                    int stride,
+                    Padding padding,
                    PoolingType pooling_type) {
  mace::testing::StopTiming();