diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h
index 803bc2407fa23cbc57d0114155df36d1d633dd52..472733af3656db1969e278ee9743b2510c2980ea 100644
--- a/mace/kernels/depthwise_conv2d.h
+++ b/mace/kernels/depthwise_conv2d.h
@@ -15,30 +15,30 @@ namespace kernels {
 template<DeviceType D, typename T>
 class DepthwiseConv2dFunctor {
  public:
-  DepthwiseConv2dFunctor(const index_t* input_shape,
-                         const index_t* filter_shape,
-                         const int* strides,
+  DepthwiseConv2dFunctor(const index_t *input_shape,
+                         const index_t *filter_shape,
+                         const int *strides,
                          const Padding padding,
-                         const int* dilations) :
+                         const int *dilations) :
       strides_(strides),
       paddings_(2, 0),
       dilations_(dilations) {
     CalPaddingSize(input_shape, filter_shape, dilations_, strides_, padding, paddings_.data());
   }
-  DepthwiseConv2dFunctor(const int* strides,
-                         const std::vector<int>& paddings,
-                         const int* dilations) :
+  DepthwiseConv2dFunctor(const int *strides,
+                         const std::vector<int> &paddings,
+                         const int *dilations) :
       strides_(strides),
       paddings_(paddings),
       dilations_(dilations) {}
 
-  void operator()(const T* input, // NCHW
-                  const index_t* input_shape,
-                  const T* filter, // c_out, c_in, kernel_h, kernel_w
-                  const index_t* filter_shape,
-                  const T* bias, // c_out
-                  T* output, // NCHW
-                  const index_t* output_shape) {
+  void operator()(const T *input, // NCHW
+                  const index_t *input_shape,
+                  const T *filter, // c_out, c_in, kernel_h, kernel_w
+                  const index_t *filter_shape,
+                  const T *bias, // c_out
+                  T *output, // NCHW
+                  const index_t *output_shape) {
 
     MACE_CHECK_NOTNULL(output);
 
@@ -80,7 +80,7 @@ class DepthwiseConv2dFunctor {
             index_t offset = n * channels * height * width +
                 c * height * width + h * width + w;
             T sum = 0;
-            const T* filter_ptr = filter + c * kernel_size;
+            const T *filter_ptr = filter + c * kernel_size;
             for (int kh = 0; kh < kernel_h; ++kh) {
               for (int kw = 0; kw < kernel_w; ++kw) {
                 int inh = padded_h_start + h * stride_h + dilation_h * kh;
@@ -110,19 +110,19 @@ class DepthwiseConv2dFunctor {
     }
   }
  private:
-  const int* strides_; // [stride_h, stride_w]
+  const int *strides_; // [stride_h, stride_w]
   std::vector<int> paddings_;   // [padding_h, padding_w]
-  const int* dilations_; // [dilation_h, dilation_w]
+  const int *dilations_; // [dilation_h, dilation_w]
 };
 
-template <>
-void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float* input,
-                                                        const index_t* input_shape,
-                                                        const float* filter,
-                                                        const index_t* filter_shape,
-                                                        const float* bias,
-                                                        float* output,
-                                                        const index_t* output_shape);
+template<>
+void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float *input,
+                                                                 const index_t *input_shape,
+                                                                 const float *filter,
+                                                                 const index_t *filter_shape,
+                                                                 const float *bias,
+                                                                 float *output,
+                                                                 const index_t *output_shape);
 } //  namespace kernels
 } //  namespace mace
 
diff --git a/mace/kernels/neon/conv_2d_neon_1x1.cc b/mace/kernels/neon/conv_2d_neon_1x1.cc
index 006864a3aeab5d0518ebe646dfe11b6d2d64a4d4..a82505e79296e7f362139643bd700584d6d89caa 100644
--- a/mace/kernels/neon/conv_2d_neon_1x1.cc
+++ b/mace/kernels/neon/conv_2d_neon_1x1.cc
@@ -8,13 +8,13 @@
 namespace mace {
 namespace kernels {
 
-void Conv2dNeonK1x1S1(const float* input,  // NCHW
-                      const index_t* input_shape,
-                      const float* filter,  // c_out, c_in, kernel_h, kernel_w
-                      const index_t* filter_shape,
-                      const float* bias,    // c_out
-                      float* output,        // NCHW
-                      const index_t* output_shape) {
+void Conv2dNeonK1x1S1(const float *input,  // NCHW
+                      const index_t *input_shape,
+                      const float *filter,  // c_out, c_in, kernel_h, kernel_w
+                      const index_t *filter_shape,
+                      const float *bias,    // c_out
+                      float *output,        // NCHW
+                      const index_t *output_shape) {
   const index_t batch = output_shape[0];
   const index_t channels = output_shape[1];
   const index_t height = output_shape[2];
@@ -26,7 +26,7 @@ void Conv2dNeonK1x1S1(const float* input,  // NCHW
   const index_t input_width = input_shape[3];
 
   MACE_CHECK(input_batch == batch && input_height == height &&
-             input_width == width);
+      input_width == width);
 
   const index_t total_pixels = height * width;
   // Process 4 * 2 = 8 pixels for each innermost loop
@@ -36,17 +36,17 @@ void Conv2dNeonK1x1S1(const float* input,  // NCHW
 
   // benchmark omp collapsed(2)
   for (index_t n = 0; n < batch; ++n) {
-    const float* filter_ptr = filter;
+    const float *filter_ptr = filter;
 #pragma omp parallel for
     for (index_t c = 0; c < channels; ++c) {
       // TODO Will GCC opt these out?
-      float* channel_output_start =
+      float *channel_output_start =
           output + n * channels * height * width + c * height * width;
-      const float* input_ptr =
+      const float *input_ptr =
           input + n * input_channels * input_height * input_width;
 
       // Fill with bias
-      float* output_ptr = channel_output_start;
+      float *output_ptr = channel_output_start;
       for (index_t ptr = 0; ptr < total_pixels; ++ptr) {
         output_ptr[ptr] = bias[c];  // TODO can we avoid this?
       }
@@ -54,15 +54,15 @@ void Conv2dNeonK1x1S1(const float* input,  // NCHW
       index_t inc = 0;
       // Process 4 input channels in batch
       for (; inc + 3 < input_channels; inc += 4) {
-        float* output_ptr = channel_output_start;
+        float *output_ptr = channel_output_start;
         // The begining of each input feature map channel
         MACE_ASSERT(input_ptr ==
-                    input + n * input_channels * input_height * input_width +
-                        inc * input_height * input_width);
+            input + n * input_channels * input_height * input_width +
+                inc * input_height * input_width);
 
-        const float* input_ptr1 = input_ptr + total_pixels;
-        const float* input_ptr2 = input_ptr1 + total_pixels;
-        const float* input_ptr3 = input_ptr2 + total_pixels;
+        const float *input_ptr1 = input_ptr + total_pixels;
+        const float *input_ptr2 = input_ptr1 + total_pixels;
+        const float *input_ptr3 = input_ptr2 + total_pixels;
 
         // filter is in c_out, c_in, 1, 1 order
         MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
@@ -140,10 +140,10 @@ void Conv2dNeonK1x1S1(const float* input,  // NCHW
       }
       // Process the remaining channels
       for (; inc < input_channels; ++inc) {
-        float* output_ptr = channel_output_start;
+        float *output_ptr = channel_output_start;
         MACE_ASSERT(input_ptr ==
-                    input + n * input_channels * input_height * input_width +
-                        inc * input_height * input_width);
+            input + n * input_channels * input_height * input_width +
+                inc * input_height * input_width);
         MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
 
         const float k0 = filter_ptr[0];
diff --git a/mace/kernels/neon/conv_2d_neon_3x3.cc b/mace/kernels/neon/conv_2d_neon_3x3.cc
index 2b29ee6363e9a9f781788fcefeb95168e82bafe0..6b62cb5937f84c7169e1da05883e9eaf40da701c 100644
--- a/mace/kernels/neon/conv_2d_neon_3x3.cc
+++ b/mace/kernels/neon/conv_2d_neon_3x3.cc
@@ -20,19 +20,18 @@ namespace kernels {
   int multiplier     = filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels);    \
   int filter_in_channels = filter_shape == nullptr ? input_channels : filter_shape[1];      \
   for (int b = 0; b < output_batch; ++b) {                                                  \
-    float* output_ptr_base = output + b * output_channels * output_height * output_width;   \
+    float *output_ptr_base = output + b * output_channels * output_height * output_width;   \
     for (int oc = 0; oc < output_channels; ++oc) {                                          \
-        const float* filter_ptr = filter + oc * filter_in_channels * kFilterSize;           \
-        const float* input_ptr = input + b * input_channels * input_height * input_width;   \
+        const float *filter_ptr = filter + oc * filter_in_channels * kFilterSize;           \
+        const float *input_ptr = input + b * input_channels * input_height * input_width;   \
         if (filter_shape != nullptr) {                                                      \
           input_ptr += (oc / multiplier) * input_height * input_width;                      \
         }                                                                                   \
-        float* output_ptr = output_ptr_base + oc * output_height * output_width;            \
+        float *output_ptr = output_ptr_base + oc * output_height * output_width;            \
         std::fill(output_ptr, output_ptr + output_height * output_width, bias[oc]);         \
         for (int ic = 0; ic < filter_in_channels; ++ic) {                                   \
           float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), vld1q_f32(filter_ptr+3), vld1q_f32(filter_ptr+6)};
 
-
 #define KERNEL_TAIL_CODE                         \
         filter_ptr += kFilterSize;               \
         input_ptr += input_height * input_width; \
diff --git a/mace/kernels/neon/conv_2d_neon_5x5.cc b/mace/kernels/neon/conv_2d_neon_5x5.cc
index 724fe3e74bb44d3627201933749b92bf0aac452f..02c5ced2a3177af71544c6ccaf324cc133f686cf 100644
--- a/mace/kernels/neon/conv_2d_neon_5x5.cc
+++ b/mace/kernels/neon/conv_2d_neon_5x5.cc
@@ -10,13 +10,13 @@
 namespace mace {
 namespace kernels {
 
-void Conv2dNeonK5x5S1(const float* input,  // NCHW
-                      const index_t* input_shape,
-                      const float* filter,  // c_out, c_in, kernel_h, kernel_w
-                      const index_t* filter_shape,
-                      const float* bias,    // c_out
-                      float* output,        // NCHW
-                      const index_t* output_shape) {
+void Conv2dNeonK5x5S1(const float *input,  // NCHW
+                      const index_t *input_shape,
+                      const float *filter,  // c_out, c_in, kernel_h, kernel_w
+                      const index_t *filter_shape,
+                      const float *bias,    // c_out
+                      float *output,        // NCHW
+                      const index_t *output_shape) {
   const index_t batch = output_shape[0];
   const index_t channels = output_shape[1];
   const index_t height = output_shape[2];
@@ -40,9 +40,9 @@ void Conv2dNeonK5x5S1(const float* input,  // NCHW
 #pragma omp parallel for collapse(2)
   for (index_t n = 0; n < batch; ++n) {
     for (index_t c = 0; c < channels; ++c) {
-      float* output_ptr = output + n * output_total_pixels_per_batch +
-                          c * output_total_pixels_per_channel;
-      const float* input_ptr = input + n * input_total_pixels_per_batch;
+      float *output_ptr = output + n * output_total_pixels_per_batch +
+          c * output_total_pixels_per_channel;
+      const float *input_ptr = input + n * input_total_pixels_per_batch;
 
       // Fill with bias
       for (index_t i = 0; i < output_total_pixels_per_channel; ++i) {
@@ -50,24 +50,24 @@ void Conv2dNeonK5x5S1(const float* input,  // NCHW
       }
 
       for (index_t inc = 0; inc < input_channels; ++inc) {
-        float* outptr = output_ptr;
-        float* outptr2 = outptr + width;
-
-        const float* inptr = input_ptr + inc * input_total_pixels_per_channel;
-        const float* filter_ptr = filter + c * patch_size + inc * 25;
-
-        const float* r0 = inptr;
-        const float* r1 = inptr + input_width;
-        const float* r2 = inptr + input_width * 2;
-        const float* r3 = inptr + input_width * 3;
-        const float* r4 = inptr + input_width * 4;
-        const float* r5 = inptr + input_width * 5;
-
-        const float* k0 = filter_ptr;
-        const float* k1 = filter_ptr + 5;
-        const float* k2 = filter_ptr + 10;
-        const float* k3 = filter_ptr + 15;
-        const float* k4 = filter_ptr + 20;
+        float *outptr = output_ptr;
+        float *outptr2 = outptr + width;
+
+        const float *inptr = input_ptr + inc * input_total_pixels_per_channel;
+        const float *filter_ptr = filter + c * patch_size + inc * 25;
+
+        const float *r0 = inptr;
+        const float *r1 = inptr + input_width;
+        const float *r2 = inptr + input_width * 2;
+        const float *r3 = inptr + input_width * 3;
+        const float *r4 = inptr + input_width * 4;
+        const float *r5 = inptr + input_width * 5;
+
+        const float *k0 = filter_ptr;
+        const float *k1 = filter_ptr + 5;
+        const float *k2 = filter_ptr + 10;
+        const float *k3 = filter_ptr + 15;
+        const float *k4 = filter_ptr + 20;
 
         float32x4_t _k0123 = vld1q_f32(filter_ptr);
         float32x4_t _k4567 = vld1q_f32(filter_ptr + 4);
diff --git a/mace/kernels/neon/depthwise_conv_neon.cc b/mace/kernels/neon/depthwise_conv_neon.cc
index 7bf0a839ab0d7db294beb3e3bf073841bc84b986..eda2325d8b371218f2dcedefd34c124e3b75a9e9 100644
--- a/mace/kernels/neon/depthwise_conv_neon.cc
+++ b/mace/kernels/neon/depthwise_conv_neon.cc
@@ -25,13 +25,13 @@ extern void Conv2dNeonK3x3S2(const float *input,
                              const index_t *output_shape);
 
 template<>
-void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // NCHW
-                                                        const index_t* input_shape,
-                                                        const float* filter, // c_out, c_in, kernel_h, kernel_w
-                                                        const index_t* filter_shape,
-                                                        const float* bias, // c_out
-                                                        float* output, // NCHW
-                                                        const index_t* output_shape) {
+void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float *input, // NCHW
+                                                                 const index_t *input_shape,
+                                                                 const float *filter, // c_out, c_in, kernel_h, kernel_w
+                                                                 const index_t *filter_shape,
+                                                                 const float *bias, // c_out
+                                                                 float *output, // NCHW
+                                                                 const index_t *output_shape) {
   typedef void (*Conv2dNeonFunction)(
       const float *input,
       const index_t *input_shape,
diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h
index ad3206b0045db7be28452fcfc602ffc5da9082ff..3ac6689cd8d8a6f5198c56a70623ed50a7d6e0b7 100644
--- a/mace/ops/conv_2d.h
+++ b/mace/ops/conv_2d.h
@@ -13,17 +13,17 @@
 
 namespace mace {
 
-template <DeviceType D, typename T>
+template<DeviceType D, typename T>
 class Conv2dOp : public ConvPool2dOpBase<D, T> {
  public:
-  Conv2dOp(const OperatorDef& op_def, Workspace* ws)
-      : ConvPool2dOpBase<D, T>(op_def, ws){};
+  Conv2dOp(const OperatorDef &op_def, Workspace *ws)
+      : ConvPool2dOpBase<D, T>(op_def, ws) {};
 
   bool Run() override {
-    const Tensor* input = this->Input(INPUT);
-    const Tensor* filter = this->Input(FILTER);
-    const Tensor* bias = this->Input(BIAS);
-    Tensor* output = this->Output(OUTPUT);
+    const Tensor *input = this->Input(INPUT);
+    const Tensor *filter = this->Input(FILTER);
+    const Tensor *bias = this->Input(BIAS);
+    Tensor *output = this->Output(OUTPUT);
 
     std::vector<index_t> output_shape(4);
     std::vector<int> paddings(2);
diff --git a/mace/ops/depthwise_conv2d.h b/mace/ops/depthwise_conv2d.h
index b6a458ead4e1c8c08630772a5a7f161ace2a3cd8..cc220f3c5f5848bf5e989adc466c585153eb55d7 100644
--- a/mace/ops/depthwise_conv2d.h
+++ b/mace/ops/depthwise_conv2d.h
@@ -14,25 +14,25 @@
 
 namespace mace {
 
-template <DeviceType D, typename T>
+template<DeviceType D, typename T>
 class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> {
  public:
-  DepthwiseConv2dOp(const OperatorDef& op_def, Workspace* ws)
+  DepthwiseConv2dOp(const OperatorDef &op_def, Workspace *ws)
       : ConvPool2dOpBase<D, T>(op_def, ws),
         functor_(this->Input(INPUT)->shape().data(),
                  this->Input(FILTER)->shape().data(),
-                 this->strides_.data(), this->padding_, this->dilations_.data()){};
+                 this->strides_.data(), this->padding_, this->dilations_.data()) {};
 
   bool Run() override {
-    const Tensor* input = this->Input(INPUT);
-    const Tensor* filter = this->Input(FILTER);
-    const Tensor* bias = this->Input(BIAS);
-    Tensor* output = this->Output(OUTPUT);
+    const Tensor *input = this->Input(INPUT);
+    const Tensor *filter = this->Input(FILTER);
+    const Tensor *bias = this->Input(BIAS);
+    Tensor *output = this->Output(OUTPUT);
 
     // resize filter shape.
     std::vector<index_t> filter_shape(filter->shape().begin(), filter->shape().end());
     filter_shape[0] *= filter_shape[1];
-    filter_shape[1]  = 1;
+    filter_shape[1] = 1;
     std::vector<index_t> output_shape(4);
     this->CalOutputSize(input->shape().data(), filter_shape.data(), output_shape.data());
     output->Resize(output_shape);