From e802471c58068669de78a3eaec143b705cb654b8 Mon Sep 17 00:00:00 2001
From: luotao1 <luotao02@baidu.com>
Date: Mon, 7 Nov 2016 19:41:20 +0800
Subject: [PATCH] abstract outputSize function in CNN-related layers (#314)

---
 paddle/gserver/layers/ConvBaseLayer.cpp       |  9 ++-
 paddle/gserver/layers/ConvBaseLayer.h         | 26 +------
 paddle/gserver/layers/ConvOperator.cpp        | 72 ++++++-------------
 paddle/gserver/layers/ConvProjection.h        | 12 ++--
 paddle/gserver/layers/CudnnPoolLayer.cpp      | 20 +++---
 paddle/gserver/layers/PoolLayer.h             | 11 +--
 paddle/gserver/layers/PoolProjectionLayer.cpp | 29 ++++----
 paddle/gserver/tests/test_LayerGrad.cpp       | 56 +++++++--------
 paddle/math/MathUtils.cpp                     | 19 +++--
 paddle/math/MathUtils.h                       | 16 +++++
 python/paddle/trainer/config_parser.py        | 53 +++++++-------
 11 files changed, 139 insertions(+), 184 deletions(-)

diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 040510b7ad..42ff0b70d8 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "ConvBaseLayer.h"
 namespace paddle {
@@ -78,10 +77,10 @@ size_t ConvBaseLayer::calOutputSize() {
       imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
     if (imgSizeW_[i] == 0)
       imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
-    outputH_.push_back(
-        outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
-    outputW_.push_back(
-        outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
+    outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i],
+                                  strideY_[i], caffeMode_));
+    outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i],
+                                  stride_[i], caffeMode_));
     CHECK_EQ(outputH_[i], outputH_[0]);
     CHECK_EQ(outputW_[i], outputW_[0]);
   }
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index 316514acf1..e660a6d6f5 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #pragma once
 
 #include "Layer.h"
+#include "paddle/math/MathUtils.h"
 namespace paddle {
 
 /**
@@ -87,31 +88,6 @@ public:
   virtual size_t calOutputSize();
 
   Weight& getWeight(int idx) { return *weights_[idx]; }
-
-  /**
-   * Calculate output size based on caffeMode_.
-   * - input(+padding): 0123456789
-   * - imageSize(+padding) = 10;
-   * - filterSize = 3;
-   * - stride = 2;
-   * - caffeMode_ is true:
-       - output: (012), (234), (456), (678)
-       - outputSize = 4;
-   * - caffeMode_ is false:
-   *   - output: (012), (234), (456), (678), (9)
-   *   - outputSize = 5;
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-     outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    CHECK_GE(outputSize, 1);
-    return outputSize;
-  }
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp
index 8c72c17784..2d9c892fe5 100644
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include "Operator.h"
 
 namespace paddle {
@@ -35,8 +35,8 @@ public:
    */
   virtual ~ConvOperator() {
     if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
-        workSpaceInBytes_ = 0;
+      hl_free_mem_device(workSpace_);
+      workSpaceInBytes_ = 0;
     }
 
     hl_destroy_tensor_descriptor(inputDesc_);
@@ -83,33 +83,6 @@ private:
              filterSize_ * filterSizeY_ * channels_ * numFilters_);
   }
 
-  /**
-   * Calculate output size.
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678), (9)
-       * outputSize = 5;
-       */
-      outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678)
-       * outputSize = 4;
-       */
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    return outputSize;
-  }
   /// Most of member variables are same with CudnnConvLayer.
   /// There is no explanation here.
   int imageH_, imageW_, outputH_, outputW_;
@@ -129,7 +102,7 @@ private:
   int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
   size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
   size_t workSpaceInBytes_;
-  void* workSpace_;
+  void *workSpace_;
   bool isSelectAlgo_;
 };
 
@@ -160,7 +133,7 @@ ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
 void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
   if (maxWorkSpace > workSpaceInBytes_) {
     if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
+      hl_free_mem_device(workSpace_);
     }
     // total amount of storage needed
     workSpace_ = hl_malloc_device(maxWorkSpace);
@@ -168,14 +141,13 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
   }
 }
 
-
 void ConvOperator::reshape(int batchSize) {
   imageH_ = ins_[0]->getFrameHeight();
   imageW_ = ins_[0]->getFrameWidth();
   if (imageH_ == 0) imageH_ = imgSize_;
   if (imageW_ == 0) imageW_ = imgSize_;
-  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_);
-  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_);
+  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
+  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
 
   out_->setFrameHeight(outputH_);
   out_->setFrameWidth(outputW_);
@@ -183,10 +155,10 @@ void ConvOperator::reshape(int batchSize) {
   reshapeImageDescriptors();
 
   if (!isSelectAlgo_) {
-    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_,
-               convDesc_, &fwdAlgo_, &fwdLimitBytes_,
-               &bwdDataAlgo_, &bwdDataLimitBytes_,
-               &bwdFilterAlgo_, &bwdFilterLimitBytes_);
+    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, convDesc_,
+                      &fwdAlgo_, &fwdLimitBytes_, &bwdDataAlgo_,
+                      &bwdDataLimitBytes_, &bwdFilterAlgo_,
+                      &bwdFilterLimitBytes_);
 
     size_t maxWorkSpace = 0;
     maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
@@ -202,7 +174,8 @@ void ConvOperator::computeConvSizes() {
   hl_create_filter_descriptor(&filterDesc_, channels_, numFilters_,
                               filterSizeY_, filterSize_);
   hl_create_tensor_descriptor(&inputDesc_);
-  int outputX = outputSize(imgSize_, filterSize_, padding_, stride_);
+  int outputX =
+      outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
   CHECK_EQ(outputX, outputX_);
   hl_create_tensor_descriptor(&outputDesc_);
   hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
@@ -211,13 +184,13 @@ void ConvOperator::computeConvSizes() {
 
 void ConvOperator::reshapeImageDescriptors() {
   hl_tensor_reshape(inputDesc_, 1, channels_, imageH_, imageW_,
-                    channels_ * imageH_ * imageW_, imageH_ * imageW_,
-                    imageW_, 1);
+                    channels_ * imageH_ * imageW_, imageH_ * imageW_, imageW_,
+                    1);
   hl_tensor_reshape(outputDesc_, 1, numFilters_, outputH_, outputW_,
                     numFilters_ * outputH_ * outputW_, outputH_ * outputW_,
                     outputW_, 1);
-  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_,
-                                  paddingY_, padding_, strideY_, stride_);
+  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, paddingY_,
+                                  padding_, strideY_, stride_);
   inputOffset_ = channels_ * imageH_ * imageW_;
   outputOffset_ = numFilters_ * outputH_ * outputW_;
   weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
@@ -273,18 +246,17 @@ void ConvOperator::backward() {
         real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
         hl_convolution_backward_filter(inputDesc_, inputData, outputDesc_,
                                        outGrad, filterDesc_, weightGrad,
-                                       convDesc_, workSpace_,
-                                       workSpaceInBytes_, bwdFilterAlgo_);
+                                       convDesc_, workSpace_, workSpaceInBytes_,
+                                       bwdFilterAlgo_);
       }
 
       MatrixPtr preGrad = ins_[0]->grad;
       if (NULL != preGrad) {
         real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
         real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
-        hl_convolution_backward_data(inputDesc_, inputGrad, outputDesc_,
-                                     outGrad, filterDesc_, wgtData,
-                                     convDesc_, workSpace_,
-                                     workSpaceInBytes_, bwdDataAlgo_);
+        hl_convolution_backward_data(
+            inputDesc_, inputGrad, outputDesc_, outGrad, filterDesc_, wgtData,
+            convDesc_, workSpace_, workSpaceInBytes_, bwdDataAlgo_);
       }
     }
   }
diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/gserver/layers/ConvProjection.h
index 41a100ac3c..d0bfe9a6ed 100644
--- a/paddle/gserver/layers/ConvProjection.h
+++ b/paddle/gserver/layers/ConvProjection.h
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #pragma once
 
 #include "Projection.h"
+#include "paddle/math/MathUtils.h"
 
 namespace paddle {
 
@@ -42,17 +42,15 @@ protected:
   void reshapeTensorDesc(int batchSize);
   void reshape(int batchSize);
 
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    return (imageSize - filterSize + 2 * padding) / stride + 1;
-  }
-
   size_t calOutputSize() {
     imageH_ = in_->getFrameHeight();
     imageW_ = in_->getFrameWidth();
     if (imageH_ == 0) imageH_ = configImgH_;
     if (imageW_ == 0) imageW_ = configImgW_;
-    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_);
-    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_);
+    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_,
+                          /* caffeMode */ true);
+    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_,
+                          /* caffeMode */ true);
 
     const_cast<Argument*>(out_)->setFrameHeight(outputH_);
     const_cast<Argument*>(out_)->setFrameWidth(outputW_);
diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/gserver/layers/CudnnPoolLayer.cpp
index 4c733591b3..24adb50a98 100644
--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/math/Matrix.h"
@@ -62,9 +61,9 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
   strideHeight = strideY_;
   strideWidth = stride_;
 
-  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight,
-                               windowWidth, heightPadding, widthPadding,
-                               strideHeight, strideWidth);
+  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, windowWidth,
+                               heightPadding, widthPadding, strideHeight,
+                               strideWidth);
 
   return true;
 }
@@ -80,8 +79,10 @@ void CudnnPoolLayer::reshape(int batchSize) {
   }
   CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(),
            channels_ * imageH_ * imageW_);
-  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_);
-  outputW_ = outputSize(imageW_, sizeX_, confPadding_, stride_);
+  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_,
+                        /* caffeMode */ false);
+  outputW_ =
+      outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false);
   getOutput().setFrameHeight(outputH_);
   getOutput().setFrameWidth(outputW_);
 
@@ -99,8 +100,7 @@ void CudnnPoolLayer::forward(PassType passType) {
 
   real *inputData = getInputValue(0)->getData();
   real *outData = getOutputValue()->getData();
-  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData,
-                     poolingDesc_);
+  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_);
 }
 
 void CudnnPoolLayer::backward(const UpdateCallback &callback) {
@@ -113,8 +113,8 @@ void CudnnPoolLayer::backward(const UpdateCallback &callback) {
   real *inputGrad = getInputGrad(0)->getData();
   real *outData = getOutputValue()->getData();
   real *outGrad = getOutputGrad()->getData();
-  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_,
-                      outData, outGrad, poolingDesc_);
+  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, outData,
+                      outGrad, poolingDesc_);
 }
 
 CudnnPoolLayer::~CudnnPoolLayer() {
diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h
index bde1f5b8dc..e87ad08251 100644
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@@ -17,6 +17,7 @@ limitations under the License. */
 
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include <vector>
 
 namespace paddle {
@@ -47,16 +48,6 @@ public:
   static Layer* create(const LayerConfig& config);
 
   virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
-
-  /**
-   * Calculate output size according window size and padding size.
-   */
-  int outputSize(int imageSize, int windowSize, int padding, int stride) {
-    int outputSize;
-    outputSize =
-        (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
-    return outputSize;
-  }
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp
index 5a2e9afb6e..9e8ce77850 100644
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "PoolProjectionLayer.h"
@@ -31,8 +30,10 @@ size_t PoolProjectionLayer::getSize() {
     imgSizeW_ = imgSize_;
   }
 
-  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
-  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
+  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_,
+                        /* caffeMode */ false);
+  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_,
+                        /* caffeMode */ false);
 
   layerSize = outputH_ * outputW_ * channels_;
 
@@ -53,9 +54,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
 
   MatrixPtr outV = getOutputValue();
 
-  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
-                       sizeX_, sizeY_, strideY_, stride_,
-                       outputH_, outputW_, confPaddingY_, confPadding_);
+  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
+                       strideY_, stride_, outputH_, outputW_, confPaddingY_,
+                       confPadding_);
 }
 
 void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
@@ -72,9 +73,8 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
   MatrixPtr inputGrad = getInputGrad(0);
 
   inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
-                             sizeX_, sizeY_,
-                             strideY_, stride_, outputH_, outputW_, 1, 1,
-                             confPaddingY_, confPadding_);
+                             sizeX_, sizeY_, strideY_, stride_, outputH_,
+                             outputW_, 1, 1, confPaddingY_, confPadding_);
 }
 
 void AvgPoolProjectionLayer::forward(PassType passType) {
@@ -89,9 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
 
   MatrixPtr outV = getOutputValue();
 
-  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
-                       sizeX_, sizeY_, strideY_, stride_,
-                       outputH_, outputW_, confPaddingY_, confPadding_);
+  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
+                       strideY_, stride_, outputH_, outputW_, confPaddingY_,
+                       confPadding_);
 }
 
 void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
@@ -103,9 +103,8 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
   /* Do derivation */
   MatrixPtr outputGrad = getOutputGrad();
   MatrixPtr inputGrad = getInputGrad(0);
-  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
-                             sizeX_, sizeY_, strideY_, stride_,
-                             outputH_, outputW_, 1, 1,
+  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_,
+                             strideY_, stride_, outputH_, outputW_, 1, 1,
                              confPaddingY_, confPadding_);
 }
 }  // namespace paddle
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index bf2c2e0499..5397b952bc 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/gserver/layers/DataLayer.h"
 #include "ModelConfig.pb.h"
 #include "paddle/trainer/Trainer.h"
+#include "paddle/math/MathUtils.h"
 
 #include "TestUtil.h"
 #include "LayerGradUtil.h"
@@ -134,7 +135,6 @@ TEST(Projection, identity) {
   }
 }
 
-
 #ifndef PADDLE_ONLY_CPU
 TEST(Projection, conv) {
   const int NUM_FILTERS = 16;
@@ -158,21 +158,23 @@ TEST(Projection, conv) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(IMAGE_SIZE);
-  int outputSize = (2 * conv->padding() + conv->img_size() -
-      conv->filter_size()) / conv->stride() + 1;
-  int outputSizeY = (2 * conv->padding_y() + conv->img_size() -
-      conv->filter_size_y()) / conv->stride_y() + 1;
-  conv->set_output_x(outputSize);
+  int output_x =
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
+                 conv->stride(), /* caffeMode */ true);
+  int output_y =
+      outputSize(conv->img_size(), conv->filter_size_y(), conv->padding_y(),
+                 conv->stride_y(), /* caffeMode */ true);
+  conv->set_output_x(output_x);
   conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS);
-  conf.set_output_size(outputSize * outputSizeY * NUM_FILTERS);
+  conf.set_output_size(output_x * output_y * NUM_FILTERS);
 
-  testProjectionGrad(conf, INPUT_DATA,
+  testProjectionGrad(
+      conf, INPUT_DATA,
       /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * FILTER_SIZE_Y,
       /* batchSize */ 100, true, false, NUM_FILTERS, true);
 }
 #endif
 
-
 TEST(Layer, concat) {
   TestConfig config;
   config.biasSize = 0;
@@ -293,10 +295,9 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(16);
-  conv->set_output_x(
-      (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
-          ((float)conv->stride()) +
-      1.5);
+  conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                conv->padding(), conv->stride(),
+                                /* caffeMode */ true));
   config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                               config.layerConfig.num_filters());
 
@@ -329,15 +330,13 @@ TEST(Layer, blockExpandLayer) {
   blockExpand->set_stride_x(2);
   blockExpand->set_stride_y(2);
   blockExpand->set_output_x(
-      1 +
-      (2 * blockExpand->padding_x() + blockExpand->img_size_x() -
-       blockExpand->block_x() + blockExpand->stride_x() - 1) /
-          blockExpand->stride_x());
+      outputSize(blockExpand->img_size_x(), blockExpand->block_x(),
+                 blockExpand->padding_x(), blockExpand->stride_x(),
+                 /* caffeMode */ false));
   blockExpand->set_output_y(
-      1 +
-      (2 * blockExpand->padding_y() + blockExpand->img_size_y() -
-       blockExpand->block_y() + blockExpand->stride_y() - 1) /
-          blockExpand->stride_y());
+      outputSize(blockExpand->img_size_y(), blockExpand->block_y(),
+                 blockExpand->padding_y(), blockExpand->stride_y(),
+                 /* caffeMode */ false));
   config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() *
                               blockExpand->channels());
 
@@ -862,8 +861,8 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
   pool->set_stride(sw);
   pool->set_stride_y(sh);
 
-  int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
-  int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
+  int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
+  int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
   pool->set_output_x(ow);
   pool->set_output_y(oh);
 }
@@ -1255,12 +1254,11 @@ TEST(Operator, conv) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(IMAGE_SIZE);
-  int outputSize =
-      int(1.0 * (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
-          conv->stride()) +
-      1;
-  conv->set_output_x(outputSize);
-  config.layerConfig.set_size(outputSize * outputSize *
+  int output_x =
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
+                 conv->stride(), /* caffeMode */ true);
+  conv->set_output_x(output_x);
+  config.layerConfig.set_size(output_x * output_x *
                               config.layerConfig.num_filters());
   config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                               NUM_FILTERS);
diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp
index 5b78ab1b07..c1af8628d0 100644
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "MathUtils.h"
 #include <algorithm>
 #include "paddle/utils/Logging.h"
@@ -24,11 +23,7 @@ namespace paddle {
  * major is rows and minor is cols, according to
  * major value to initialize minor value"
  */
-void sparseRand(int* major,
-                int* minor,
-                int nnz,
-                int majorLen,
-                int minorMax,
+void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
                 bool useGpu) {
   CHECK(size_t(nnz) > size_t(1));
   int* cpuMajor;
@@ -72,5 +67,17 @@ void sparseRand(int* major,
   }
 }
 
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode) {
+  int outputSize;
+  if (!caffeMode) {
+    outputSize =
+        (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
+  } else {
+    outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
+  }
+  CHECK_GE(outputSize, 1);
+  return outputSize;
+}
 
 }  // namespace paddle
diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h
index 83375022ab..49d0c10a8f 100644
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
@@ -44,4 +44,20 @@ namespace paddle {
 void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
                 bool useGpu);
 
+/**
+ * Calculate output size based on caffeMode_.
+ * - input(+padding): 0123456789
+ * - imageSize(+padding) = 10;
+ * - filterSize = 3;
+ * - stride = 2;
+ * - caffeMode is true:
+     - output: (012), (234), (456), (678)
+     - outputSize = 4;
+ * - caffeMode is false:
+ *   - output: (012), (234), (456), (678), (9)
+ *   - outputSize = 5;
+ */
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode);
+
 }  // namespace paddle
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index e9038fea8a..73631602a9 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1006,6 +1006,17 @@ def TestData(data_config, async_load_data=None):
                        " Data definition")
         g_config.test_data_config.async_load_data = async_load_data
 
+'''
+caffe_mode: compute the output size using floor instead of ceil,
+            which is consistent of caffe and CuDNN's convention.
+'''
+def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
+    output = (2 * padding + img_size - filter_size) / float(stride)
+    if caffe_mode:
+        return 1 + int(math.floor(output))
+    else:
+        return 1 + int(math.ceil(output))
+
 def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.pool_type = pool.pool_type
     config_assert(pool.pool_type in ['max-projection', 'avg-projection',
@@ -1036,12 +1047,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
     if pool.padding is not None:
         pool_conf.padding = pool.padding
         pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-        pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
-            2*pool_conf.padding - pool_conf.size_x) / \
-            float(pool_conf.stride))) + 1
-        pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
-            2*pool_conf.padding_y - pool_conf.size_y) / \
-            float(pool_conf.stride_y))) + 1
+        pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
+                                             pool_conf.padding, pool_conf.stride, False)
+        pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
+                                             pool_conf.padding_y, pool_conf.stride_y, False)
 
 def parse_image(image, input_layer_name, image_conf):
     image_conf.channels = image.channels
@@ -1072,10 +1081,7 @@ def parse_norm(norm, input_layer_name, norm_conf):
         norm_conf.scale /= norm.size
     else:
         norm_conf.scale /= norm.size ** 2
-'''
-caffe_mode: compute the output size using floor instead of ceil,
-            which is consistent of caffe and CuDNN's convention.
-'''
+
 def parse_conv(conv, input_layer_name, conv_conf):
     conv_conf.filter_size = conv.filter_size
     conv_conf.filter_size_y = conv.filter_size_y
@@ -1096,14 +1102,9 @@ def parse_conv(conv, input_layer_name, conv_conf):
                   ("Input layer %s: Incorrect input image size %d for input "
                    + "image pixels %d")
                   % (input_layer_name, conv_conf.img_size, img_pixels))
-    if conv.caffe_mode:
-        conv_conf.output_x = \
-            1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
-            - conv.filter_size) / float(conv.stride)))
-    else:
-        conv_conf.output_x = \
-            1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
-            - conv.filter_size) / float(conv.stride)))
+    conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
+                                         conv_conf.padding, conv_conf.stride,
+                                         conv_conf.caffe_mode)
 
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     block_expand_conf.channels = block_expand.channels
@@ -1118,18 +1119,16 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     if block_expand_conf.img_size_x == 0:
         block_expand_conf.output_x = 0
     else:
-        block_expand_conf.output_x = \
-            1 + \
-            int(math.ceil((2 * block_expand.padding_x + block_expand.img_size_x \
-            - block_expand.block_x) / float(block_expand.stride_x)))
+        block_expand_conf.output_x = cnn_output_size(
+            block_expand.img_size_x, block_expand.block_x, 
+            block_expand.padding_x, block_expand.stride_x, False)
 
     if block_expand_conf.img_size_y == 0:
-      block_expand_conf.output_y = 0
+        block_expand_conf.output_y = 0
     else:
-        block_expand_conf.output_y = \
-            1 + \
-            int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \
-            - block_expand.block_y) / float(block_expand.stride_y)))
+        block_expand_conf.output_y = cnn_output_size(
+            block_expand.img_size_y, block_expand.block_y, 
+            block_expand.padding_y, block_expand.stride_y, False)
 
 def parse_maxout(maxout, input_layer_name, maxout_conf):
     maxout_conf.channels = maxout.channels
-- 
GitLab