diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7e9e272fed1fbbe0e933fb02568cf1168e97b10d
--- /dev/null
+++ b/mace/kernels/conv_pool_2d_util.cc
@@ -0,0 +1,73 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/kernels/conv_pool_2d_util.h"
+
+namespace mace {
+namespace kernels {
+
+void CalcPaddingAndOutputSize(const index_t* input_shape,  // NCHW
+                              const index_t* filter_shape,  // OIHW
+                              const int* dilations,
+                              const int* strides,
+                              Padding padding,
+                              std::vector<index_t>* output_shape,
+                              std::vector<int>* padding_size) {
+    MACE_CHECK(dilations[0] > 0 && dilations[1] > 0,
+               "Invalid dilations, must >= 1");
+    MACE_CHECK((dilations[0] == 1 || strides[0] == 1) &&
+               (dilations[1] == 1 || strides[1] == 1),
+               "If dilations > 1, strides should be 1");
+    /*
+    * Convlution/pooling arithmetic:
+    * o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
+    * For details, see https://arxiv.org/pdf/1603.07285.pdf or
+    * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
+    */
+    *padding_size = {0, 0};
+
+    index_t output_height, output_width;
+    index_t kernel_height = filter_shape[2];
+    index_t kernel_width = filter_shape[3];
+    index_t output_channels = filter_shape[0];
+
+    index_t k_extent_height = (kernel_height - 1) * dilations[0] + 1;
+    index_t k_extent_width = (kernel_width - 1) * dilations[1] + 1;
+
+    switch (padding) {
+      case VALID:
+        output_height = (input_shape[2] - k_extent_height) / strides[0] + 1;
+        output_width = (input_shape[3] - k_extent_width) / strides[1] + 1;
+        break;
+      case SAME:
+        output_height = (input_shape[2] - 1) / strides[0] + 1;
+        output_width = (input_shape[3] - 1) / strides[1] + 1;
+        break;
+      case FULL:
+        output_height = (input_shape[2] + k_extent_height - 2) / strides[0] + 1;
+        output_width = (input_shape[3] + k_extent_width - 2) / strides[1] + 1;
+        break;
+      default:
+        MACE_CHECK(false, "Unsupported padding type: ", padding);
+    }
+
+    // Note: TensorFlow may padded one more on the right/bottom side
+    // TODO may be it's better to also truncate the left/top to
+    // utilize the more centered features. We need to benchmark
+    // based on the model accuracy.
+
+    (*padding_size)[0] = (output_height - 1) * strides[0] +
+                         k_extent_height - input_shape[2];
+    (*padding_size)[1] = (output_width - 1) * strides[1] +
+                         k_extent_width - input_shape[3];
+
+    *output_shape = std::vector<index_t>(4); // NCHW
+    (*output_shape)[0] = input_shape[0];
+    (*output_shape)[1] = output_channels;
+    (*output_shape)[2] = output_height;
+    (*output_shape)[3] = output_width;
+  }
+
+} //  namespace kernels
+} //  namespace mace
diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/kernels/conv_pool_2d_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..c1c5154cdbe760529f0da74c21781f6a9fecd685
--- /dev/null
+++ b/mace/kernels/conv_pool_2d_util.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_KERNELS_CONV_POOL_2D_UTIL_H_
+#define MACE_KERNELS_CONV_POOL_2D_UTIL_H_
+
+#include "mace/core/tensor.h"
+
+namespace mace {
+
+enum Padding {
+  VALID = 0, // No padding
+  SAME  = 1, // Pads with half the filter size (rounded down) on both sides
+  FULL  = 2, // Pads with one less than the filter size on both sides
+};
+
+namespace kernels {
+
+void CalcPaddingAndOutputSize(const index_t* input_shape,  // NCHW
+                              const index_t* filter_shape,  // OIHW
+                              const int* dilations,
+                              const int* strides,
+                              Padding padding,
+                              std::vector<index_t>* output_shape,
+                              std::vector<int>* padding_size);
+
+} //  namespace kernels
+} //  namespace mace
+
+#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_
diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h
index 7ce4e69a020e123c1ea0a9f3249dc979c9a958f6..14b9c8ee725e5ee629ea78b1550d60a745e123be 100644
--- a/mace/ops/conv_2d.h
+++ b/mace/ops/conv_2d.h
@@ -27,10 +27,13 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
 
     std::vector<index_t> output_shape;
     std::vector<int> paddings;
-    this->CalcPaddingAndOutputSize(input->shape().data(),
-                                   filter->shape().data(),
-                                   &output_shape,
-                                   &paddings);
+    kernels::CalcPaddingAndOutputSize(input->shape().data(),
+                                      filter->shape().data(),
+                                      this->dilations_.data(),
+                                      this->strides_.data(),
+                                      this->padding_,
+                                      &output_shape,
+                                      &paddings);
     output->Resize(output_shape);
 
     auto conv2d = kernels::Conv2dFunctor<D, T>(this->strides_.data(),
diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h
index d95668ec44b4726ec987a8ad045c30d3e2af20d8..a84e4152d6d42e514439e9037accce2eb2601db9 100644
--- a/mace/ops/conv_pool_2d_base.h
+++ b/mace/ops/conv_pool_2d_base.h
@@ -6,15 +6,10 @@
 #define MACE_OPS_CONV_POOL_2D_BASE_H_
 
 #include "mace/core/operator.h"
+#include "mace/kernels/conv_pool_2d_util.h"
 
 namespace mace {
 
-enum Padding {
-  VALID = 0, // No padding
-  SAME  = 1, // Pads with half the filter size (rounded down) on both sides
-  FULL  = 2, // Pads with one less than the filter size on both sides
-};
-
 template<DeviceType D, class T>
 class ConvPool2dOpBase : public Operator<D, T> {
  public:
@@ -26,65 +21,6 @@ class ConvPool2dOpBase : public Operator<D, T> {
                                                static_cast<int>(SAME)))),
     dilations_(OperatorBase::GetRepeatedArgument<int>("dilations")) {}
 
-  void CalcPaddingAndOutputSize(const index_t* input_shape,  // NCHW
-                                const index_t* filter_shape,  // OIHW
-                                std::vector<index_t>* output_shape,
-                                std::vector<int>* padding_size) {
-    MACE_CHECK(dilations_[0] > 0 && dilations_[1] > 0,
-               "Invalid dilations, must >= 1");
-    MACE_CHECK((dilations_[0] == 1 || strides_[0] == 1) &&
-               (dilations_[1] == 1 || strides_[1] == 1),
-               "If dilations > 1, strides should be 1");
-    /*
-    * Convlution/pooling arithmetic:
-    * o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
-    * For details, see https://arxiv.org/pdf/1603.07285.pdf or
-    * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
-    */
-    *padding_size = {0, 0};
-
-    index_t output_height, output_width;
-    index_t kernel_height = filter_shape[2];
-    index_t kernel_width = filter_shape[3];
-    index_t output_channels = filter_shape[0];
-
-    index_t k_extent_height = (kernel_height - 1) * dilations_[0] + 1;
-    index_t k_extent_width = (kernel_width - 1) * dilations_[1] + 1;
-
-    switch (padding_) {
-      case VALID:
-        output_height = (input_shape[2] - k_extent_height) / strides_[0] + 1;
-        output_width = (input_shape[3] - k_extent_width) / strides_[1] + 1;
-        break;
-      case SAME:
-        output_height = (input_shape[2] - 1) / strides_[0] + 1;
-        output_width = (input_shape[3] - 1) / strides_[1] + 1;
-        break;
-      case FULL:
-        output_height = (input_shape[2] + k_extent_height - 2) / strides_[0] + 1;
-        output_width = (input_shape[3] + k_extent_width - 2) / strides_[1] + 1;
-        break;
-      default:
-        MACE_CHECK(false, "Unsupported padding type: ", this->padding_);
-    }
-
-    // Note: TensorFlow may padded one more on the right/bottom side
-    // TODO may be it's better to also truncate the left/top to
-    // utilize the more centered features. We need to benchmark
-    // based on the model accuracy.
-
-    (*padding_size)[0] = (output_height - 1) * strides_[0] +
-                         k_extent_height - input_shape[2];
-    (*padding_size)[1] = (output_width - 1) * strides_[1] +
-                         k_extent_width - input_shape[3];
-
-    *output_shape = std::vector<index_t>(4); // NCHW
-    (*output_shape)[0] = input_shape[0];
-    (*output_shape)[1] = output_channels;
-    (*output_shape)[2] = output_height;
-    (*output_shape)[3] = output_width;
-  }
-
  protected:
   std::vector<int> strides_;
   Padding padding_;
diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h
index 042eb389c11d2d0c09c72530ead8f3b7c98da5ef..0c36b54622ab3b3e196f49206ab2a48139fc62f0 100644
--- a/mace/ops/pooling.h
+++ b/mace/ops/pooling.h
@@ -6,8 +6,8 @@
 #define MACE_OPS_POOLING_H_
 
 #include "mace/core/operator.h"
-#include "mace/ops/conv_pool_2d_base.h"
 #include "mace/kernels/pooling.h"
+#include "mace/ops/conv_pool_2d_base.h"
 
 namespace mace {
 
@@ -33,8 +33,13 @@ public:
     filter_shape[1] = in_shape[0];
     filter_shape[2] = kernels_[0];
     filter_shape[3] = kernels_[1];
-    this->CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(),
-                                   &output_shape, &paddings);
+    kernels::CalcPaddingAndOutputSize(in_shape.data(),
+                                      filter_shape.data(),
+                                      this->dilations_.data(),
+                                      this->strides_.data(),
+                                      this->padding_,
+                                      &output_shape,
+                                      &paddings);
     output->Resize(output_shape);
 
     auto pooling_func = kernels::PoolingFunctor<D, T>(pooling_type_,