diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp
index 68e0c7fa1e6996534ef87f771c9f1a3fb924224f..ecebc009bc36542f54578c881716d5fa92c04b7b 100644
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -17,7 +17,7 @@ limitations under the License. */
 #pragma once
 
 #include "operators/kernel/batchnorm_kernel.h"
-#include "operators/kernel/central-arm-func/batchnorm_func.h"
+#include "operators/kernel/central-arm-func/batchnorm_arm_func.h"
 
 namespace paddle_mobile {
 namespace operators {
diff --git a/src/operators/kernel/arm/conv_add_relu_kernel.cpp b/src/operators/kernel/arm/conv_add_relu_kernel.cpp
index 0ff86c7344fed8e4060b8d46b7ff457b031479d6..d3c04179b37014adc6c81f32dd6c08f697283671 100644
--- a/src/operators/kernel/arm/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADD_RELU_OP
 
 #include "operators/kernel/conv_add_relu_kernel.h"
-#include "operators/kernel/central-arm-func/conv_add_relu_func.h"
+#include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h"
 
 namespace paddle_mobile {
 namespace operators {
diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp
index 45fae59a838d4d5fd44a94559cdf60b615f5b924..049425d88f96a322a0b4cb47c18d85f2df03d577 100644
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef CONV_OP
 
 #include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_func.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
 
 namespace paddle_mobile {
 namespace operators {
diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
index 6cd16fda0dc6ea9928ab9fcfac5cd8b3c31a15a2..4cbfa23248e87e2bf3a8d97330fa19f92985a9d0 100644
--- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp
+++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef DEPTHWISECONV_OP
 
 #include "operators/kernel/depthwise_conv_kernel.h"
-#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/depthwise_conv_arm_func.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -27,91 +27,7 @@ bool DepthwiseConvKernel<CPU, float>::Init(const ConvParam &para) const {
 
 template <>
 void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam &param) const {
-  LOG(kLOG_DEBUG) << param;
-
-  const Tensor *input = param.Input();
-  Tensor filter = *param.Filter();
-  Tensor *output = param.Output();
-  output->mutable_data<float>();
-  int groups = param.Groups();
-  std::vector<int> strides = param.Strides();
-  std::vector<int> paddings = param.Paddings();
-  std::vector<int> dilations = param.Dilations();
-
-  //  DLOG << " compute end get Attrs " << strides[0];
-
-  const int batch_size = static_cast<int>(input->dims()[0]);
-
-  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-  size_t data_dim = filter_shape_vec.size() - 2;
-  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-  col_shape_vec[0] = input->dims()[1] / groups;
-  for (size_t j = 0; j < data_dim; ++j) {
-    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-  }
-  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-
-  framework::DDim col_matrix_shape =
-      framework::flatten_to_2d(col_shape, data_dim + 1);
-
-  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
-  Tensor col;
-  Tensor col_matrix;
-  if (is_expand) {
-    col.mutable_data<float>(col_shape);
-    col_matrix.ShareDataWith(col);
-    col_matrix.Resize(col_matrix_shape);
-  }
-
-  framework::DDim input_shape = framework::slice_ddim(
-      input->dims(), 1, static_cast<int>(input->dims().size()));
-
-  framework::DDim filter_matrix_shape = {filter.dims()[0],
-                                         filter.numel() / filter.dims()[0]};
-  filter.Resize(filter_matrix_shape);
-  framework::DDim output_matrix_shape = {
-      output->dims()[1],
-      output->numel() / (output->dims()[0] * output->dims()[1])};
-
-  // convolution operator: im2col(or vol2col) + gemm
-  int in_step = static_cast<int>(input->dims()[1]) / groups;
-  int out_step = static_cast<int>(output->dims()[1]) / groups;
-
-  math::Vol2ColFunctor<CPU, float> vol2col;
-  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-
-  for (int i = 0; i < batch_size; i++) {
-    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-
-    for (int g = 0; g < groups; g++) {
-      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-
-      if (!is_expand) {
-        col.ShareDataWith(in_slice);
-        col_matrix.ShareDataWith(col);
-        col_matrix.Resize(col_matrix_shape);
-      } else if (data_dim == 2U) {
-        // im2col
-        im2col(in_slice, dilations, strides,
-               std::vector<int>{paddings[0], paddings[1], paddings[0],
-                                paddings[1]},
-               &col);
-      } else if (data_dim == 3U) {
-        // vol2col
-        vol2col(in_slice, dilations, strides, paddings, &col);
-      }
-
-      // gemm
-      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
-      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
-      math::matmul<float>(filter_slice, false, col_matrix, false,
-                          static_cast<float>(1), &out_slice,
-                          static_cast<float>(0));
-    }
-  }
+  DepthwiseConvCompute<float>(param);
 }
 
 template class DepthwiseConvKernel<CPU, float>;
diff --git a/src/operators/kernel/central-arm-func/batchnorm_func.h b/src/operators/kernel/central-arm-func/batchnorm_arm_func.h
similarity index 100%
rename from src/operators/kernel/central-arm-func/batchnorm_func.h
rename to src/operators/kernel/central-arm-func/batchnorm_arm_func.h
diff --git a/src/operators/kernel/central-arm-func/conv_add_relu_func.h b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
similarity index 99%
rename from src/operators/kernel/central-arm-func/conv_add_relu_func.h
rename to src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
index 416b4963aefdbd6f2c796378aec4b953a08e28cb..6aadbab95c591d4286fdbb3c3f01a291cdd90429 100644
--- a/src/operators/kernel/central-arm-func/conv_add_relu_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADD_RELU_OP
 
 #pragma once
+#include <vector>
 #include "operators/op_param.h"
 
 namespace paddle_mobile {
diff --git a/src/operators/kernel/central-arm-func/conv_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h
similarity index 96%
rename from src/operators/kernel/central-arm-func/conv_func.h
rename to src/operators/kernel/central-arm-func/conv_arm_func.h
index 30cfb24043b32effb723f029ef7e5e5cdd1f1e99..d08eebe5493bd9026073c3349631a42024579b95 100644
--- a/src/operators/kernel/central-arm-func/conv_func.h
+++ b/src/operators/kernel/central-arm-func/conv_arm_func.h
@@ -15,6 +15,8 @@ limitations under the License. */
 #ifdef CONV_OP
 
 #pragma once
+#include <vector>
+#include "operators/math/conv_func.h"
 #include "operators/op_param.h"
 
 namespace paddle_mobile {
@@ -48,7 +50,8 @@ void ConvCompute(const ConvParam &param) {
   framework::DDim col_matrix_shape =
       framework::flatten_to_2d(col_shape, data_dim + 1);
 
-  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
+  bool is_expand =
+      math::IsExpand(filter_shape_vec, strides, paddings, dilations);
   Tensor col;
   Tensor col_matrix;
   if (is_expand) {
diff --git a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
new file mode 100644
index 0000000000000000000000000000000000000000..e43e3664cb005bab4d3c5ec8b5b35bd6925c982d
--- /dev/null
+++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
@@ -0,0 +1,116 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef DEPTHWISECONV_OP
+
+#pragma once
+#include <vector>
+#include "operators/math/conv_func.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename P>
+void DepthwiseConvCompute(const ConvParam &param) {
+  const Tensor *input = param.Input();
+  Tensor filter = *param.Filter();
+  Tensor *output = param.Output();
+  output->mutable_data<float>();
+  int groups = param.Groups();
+  std::vector<int> strides = param.Strides();
+  std::vector<int> paddings = param.Paddings();
+  std::vector<int> dilations = param.Dilations();
+
+  //  DLOG << " compute end get Attrs " << strides[0];
+
+  const int batch_size = static_cast<int>(input->dims()[0]);
+
+  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
+  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
+  size_t data_dim = filter_shape_vec.size() - 2;
+  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+  col_shape_vec[0] = input->dims()[1] / groups;
+  for (size_t j = 0; j < data_dim; ++j) {
+    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+  }
+  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+
+  framework::DDim col_matrix_shape =
+      framework::flatten_to_2d(col_shape, data_dim + 1);
+
+  bool is_expand =
+      math::IsExpand(filter_shape_vec, strides, paddings, dilations);
+  Tensor col;
+  Tensor col_matrix;
+  if (is_expand) {
+    col.mutable_data<float>(col_shape);
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+  }
+
+  framework::DDim input_shape = framework::slice_ddim(
+      input->dims(), 1, static_cast<int>(input->dims().size()));
+
+  framework::DDim filter_matrix_shape = {filter.dims()[0],
+                                         filter.numel() / filter.dims()[0]};
+  filter.Resize(filter_matrix_shape);
+  framework::DDim output_matrix_shape = {
+      output->dims()[1],
+      output->numel() / (output->dims()[0] * output->dims()[1])};
+
+  // convolution operator: im2col(or vol2col) + gemm
+  int in_step = static_cast<int>(input->dims()[1]) / groups;
+  int out_step = static_cast<int>(output->dims()[1]) / groups;
+
+  math::Vol2ColFunctor<CPU, float> vol2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+
+  for (int i = 0; i < batch_size; i++) {
+    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+
+    for (int g = 0; g < groups; g++) {
+      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+
+      if (!is_expand) {
+        col.ShareDataWith(in_slice);
+        col_matrix.ShareDataWith(col);
+        col_matrix.Resize(col_matrix_shape);
+      } else if (data_dim == 2U) {
+        // im2col
+        im2col(in_slice, dilations, strides,
+               std::vector<int>{paddings[0], paddings[1], paddings[0],
+                                paddings[1]},
+               &col);
+      } else if (data_dim == 3U) {
+        // vol2col
+        vol2col(in_slice, dilations, strides, paddings, &col);
+      }
+
+      // gemm
+      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      math::matmul<float>(filter_slice, false, col_matrix, false,
+                          static_cast<float>(1), &out_slice,
+                          static_cast<float>(0));
+    }
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/conv_kernel.h b/src/operators/kernel/conv_kernel.h
index 8efdc0bc067927d4a0f0d7fcef56d8a732e0b848..812ddd5a441f3a24c557546c1780248a557a6eb0 100644
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -35,21 +35,6 @@ class ConvKernel : public OpKernelBase<DeviceType, ConvParam> {
   bool Init(const ConvParam &para) const;
 };
 
-inline bool IsExpand(const std::vector<int64_t> &filter_dim,
-                     const std::vector<int> &strides,
-                     const std::vector<int> &paddings,
-                     const std::vector<int> &dilations) {
-  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
-  for (size_t j = 0; j < strides.size(); ++j) {
-    filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
-    strides_1 = strides_1 && (strides[j] == 1);
-    padding_0 = padding_0 && (paddings[j] == 0);
-    dilation_1 = dilation_1 && (dilations[j] == 1);
-  }
-
-  return !(filter_1 && strides_1 && padding_0 && dilation_1);
-}
-
 }  // namespace operators
 }  // namespace paddle_mobile