abstract outputSize function in CNN-related layers (#314)

e802471c · luotao1 · GitHub · f9849ac9 · e802471c · e802471c
11 changed file
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "ConvBaseLayer.h"
 namespace paddle {
@@ -78,10 +77,10 @@ size_t ConvBaseLayer::calOutputSize() {
      imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
    if (imgSizeW_[i] == 0)
      imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
-    outputH_.push_back(
+    outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i],
-        outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
+                                  strideY_[i], caffeMode_));
-    outputW_.push_back(
+    outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i],
-        outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
+                                  stride_[i], caffeMode_));
    CHECK_EQ(outputH_[i], outputH_[0]);
    CHECK_EQ(outputW_[i], outputW_[0]);
  }

--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #pragma once
 #include "Layer.h"
+#include "paddle/math/MathUtils.h"
 namespace paddle {
 /**
@@ -87,31 +88,6 @@ public:
  virtual size_t calOutputSize();
  Weight& getWeight(int idx) { return *weights_[idx]; }
-  /**
-   * Calculate output size based on caffeMode_.
-   * - input(+padding): 0123456789
-   * - imageSize(+padding) = 10;
-   * - filterSize = 3;
-   * - stride = 2;
-   * - caffeMode_ is true:
-       - output: (012), (234), (456), (678)
-       - outputSize = 4;
-   * - caffeMode_ is false:
-   *   - output: (012), (234), (456), (678), (9)
-   *   - outputSize = 5;
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-     outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    CHECK_GE(outputSize, 1);
-    return outputSize;
-  }
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include "Operator.h"
 namespace paddle {
@@ -35,8 +35,8 @@ public:
   */
  virtual ~ConvOperator() {
    if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
+      hl_free_mem_device(workSpace_);
-        workSpaceInBytes_ = 0;
+      workSpaceInBytes_ = 0;
    }
    hl_destroy_tensor_descriptor(inputDesc_);
@@ -83,33 +83,6 @@ private:
             filterSize_ * filterSizeY_ * channels_ * numFilters_);
  }
-  /**
-   * Calculate output size.
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678), (9)
-       * outputSize = 5;
-       */
-      outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678)
-       * outputSize = 4;
-       */
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    return outputSize;
-  }
  /// Most of member variables are same with CudnnConvLayer.
  /// There is no explanation here.
  int imageH_, imageW_, outputH_, outputW_;
@@ -129,7 +102,7 @@ private:
  int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
  size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
  size_t workSpaceInBytes_;
-  void* workSpace_;
+  void *workSpace_;
  bool isSelectAlgo_;
 };
@@ -160,7 +133,7 @@ ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
 void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
  if (maxWorkSpace > workSpaceInBytes_) {
    if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
+      hl_free_mem_device(workSpace_);
    }
    // total amount of storage needed
    workSpace_ = hl_malloc_device(maxWorkSpace);
@@ -168,14 +141,13 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
  }
 }
 void ConvOperator::reshape(int batchSize) {
  imageH_ = ins_[0]->getFrameHeight();
  imageW_ = ins_[0]->getFrameWidth();
  if (imageH_ == 0) imageH_ = imgSize_;
  if (imageW_ == 0) imageW_ = imgSize_;
-  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_);
+  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
-  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_);
+  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
  out_->setFrameHeight(outputH_);
  out_->setFrameWidth(outputW_);
@@ -183,10 +155,10 @@ void ConvOperator::reshape(int batchSize) {
  reshapeImageDescriptors();
  if (!isSelectAlgo_) {
-    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_,
+    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, convDesc_,
-               convDesc_, &fwdAlgo_, &fwdLimitBytes_,
+                      &fwdAlgo_, &fwdLimitBytes_, &bwdDataAlgo_,
-               &bwdDataAlgo_, &bwdDataLimitBytes_,
+                      &bwdDataLimitBytes_, &bwdFilterAlgo_,
-               &bwdFilterAlgo_, &bwdFilterLimitBytes_);
+                      &bwdFilterLimitBytes_);
    size_t maxWorkSpace = 0;
    maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
@@ -202,7 +174,8 @@ void ConvOperator::computeConvSizes() {
  hl_create_filter_descriptor(&filterDesc_, channels_, numFilters_,
                              filterSizeY_, filterSize_);
  hl_create_tensor_descriptor(&inputDesc_);
-  int outputX = outputSize(imgSize_, filterSize_, padding_, stride_);
+  int outputX =
+      outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
  CHECK_EQ(outputX, outputX_);
  hl_create_tensor_descriptor(&outputDesc_);
  hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
@@ -211,13 +184,13 @@ void ConvOperator::computeConvSizes() {
 void ConvOperator::reshapeImageDescriptors() {
  hl_tensor_reshape(inputDesc_, 1, channels_, imageH_, imageW_,
-                    channels_ * imageH_ * imageW_, imageH_ * imageW_,
+                    channels_ * imageH_ * imageW_, imageH_ * imageW_, imageW_,
-                    imageW_, 1);
+                    1);
  hl_tensor_reshape(outputDesc_, 1, numFilters_, outputH_, outputW_,
                    numFilters_ * outputH_ * outputW_, outputH_ * outputW_,
                    outputW_, 1);
-  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_,
+  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, paddingY_,
-                                  paddingY_, padding_, strideY_, stride_);
+                                  padding_, strideY_, stride_);
  inputOffset_ = channels_ * imageH_ * imageW_;
  outputOffset_ = numFilters_ * outputH_ * outputW_;
  weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
@@ -273,18 +246,17 @@ void ConvOperator::backward() {
        real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
        hl_convolution_backward_filter(inputDesc_, inputData, outputDesc_,
                                       outGrad, filterDesc_, weightGrad,
-                                       convDesc_, workSpace_,
+                                       convDesc_, workSpace_, workSpaceInBytes_,
-                                       workSpaceInBytes_, bwdFilterAlgo_);
+                                       bwdFilterAlgo_);
      }
      MatrixPtr preGrad = ins_[0]->grad;
      if (NULL != preGrad) {
        real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
        real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
-        hl_convolution_backward_data(inputDesc_, inputGrad, outputDesc_,
+        hl_convolution_backward_data(
-                                     outGrad, filterDesc_, wgtData,
+            inputDesc_, inputGrad, outputDesc_, outGrad, filterDesc_, wgtData,
-                                     convDesc_, workSpace_,
+            convDesc_, workSpace_, workSpaceInBytes_, bwdDataAlgo_);
-                                     workSpaceInBytes_, bwdDataAlgo_);
      }
    }
  }

--- a/paddle/gserver/layers/ConvProjection.h
+++ b/paddle/gserver/layers/ConvProjection.h
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "Projection.h"
+#include "paddle/math/MathUtils.h"
 namespace paddle {
@@ -42,17 +42,15 @@ protected:
  void reshapeTensorDesc(int batchSize);
  void reshape(int batchSize);
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    return (imageSize - filterSize + 2 * padding) / stride + 1;
-  }
  size_t calOutputSize() {
    imageH_ = in_->getFrameHeight();
    imageW_ = in_->getFrameWidth();
    if (imageH_ == 0) imageH_ = configImgH_;
    if (imageW_ == 0) imageW_ = configImgW_;
-    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_);
+    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_,
-    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_);
+                          /* caffeMode */ true);
+    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_,
+                          /* caffeMode */ true);
    const_cast<Argument*>(out_)->setFrameHeight(outputH_);
    const_cast<Argument*>(out_)->setFrameWidth(outputW_);

--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/math/Matrix.h"
@@ -62,9 +61,9 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
  strideHeight = strideY_;
  strideWidth = stride_;
-  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight,
+  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, windowWidth,
-                               windowWidth, heightPadding, widthPadding,
+                               heightPadding, widthPadding, strideHeight,
-                               strideHeight, strideWidth);
+                               strideWidth);
  return true;
 }
@@ -80,8 +79,10 @@ void CudnnPoolLayer::reshape(int batchSize) {
  }
  CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(),
           channels_ * imageH_ * imageW_);
-  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_);
+  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_,
-  outputW_ = outputSize(imageW_, sizeX_, confPadding_, stride_);
+                        /* caffeMode */ false);
+  outputW_ =
+      outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false);
  getOutput().setFrameHeight(outputH_);
  getOutput().setFrameWidth(outputW_);
@@ -99,8 +100,7 @@ void CudnnPoolLayer::forward(PassType passType) {
  real *inputData = getInputValue(0)->getData();
  real *outData = getOutputValue()->getData();
-  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData,
+  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_);
-                     poolingDesc_);
 }
 void CudnnPoolLayer::backward(const UpdateCallback &callback) {
@@ -113,8 +113,8 @@ void CudnnPoolLayer::backward(const UpdateCallback &callback) {
  real *inputGrad = getInputGrad(0)->getData();
  real *outData = getOutputValue()->getData();
  real *outGrad = getOutputGrad()->getData();
-  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_,
+  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, outData,
-                      outData, outGrad, poolingDesc_);
+                      outGrad, poolingDesc_);
 }
 CudnnPoolLayer::~CudnnPoolLayer() {

--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include <vector>
 namespace paddle {
@@ -47,16 +48,6 @@ public:
  static Layer* create(const LayerConfig& config);
  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
-  /**
-   * Calculate output size according window size and padding size.
-   */
-  int outputSize(int imageSize, int windowSize, int padding, int stride) {
-    int outputSize;
-    outputSize =
-        (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
-    return outputSize;
-  }
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "PoolProjectionLayer.h"
@@ -31,8 +30,10 @@ size_t PoolProjectionLayer::getSize() {
    imgSizeW_ = imgSize_;
  }
-  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
+  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_,
-  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
+                        /* caffeMode */ false);
+  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_,
+                        /* caffeMode */ false);
  layerSize = outputH_ * outputW_ * channels_;
@@ -53,9 +54,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
  MatrixPtr outV = getOutputValue();
-  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
+  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
-                       sizeX_, sizeY_, strideY_, stride_,
+                       strideY_, stride_, outputH_, outputW_, confPaddingY_,
-                       outputH_, outputW_, confPaddingY_, confPadding_);
+                       confPadding_);
 }
 void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
@@ -72,9 +73,8 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inputGrad = getInputGrad(0);
  inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
-                             sizeX_, sizeY_,
+                             sizeX_, sizeY_, strideY_, stride_, outputH_,
-                             strideY_, stride_, outputH_, outputW_, 1, 1,
+                             outputW_, 1, 1, confPaddingY_, confPadding_);
-                             confPaddingY_, confPadding_);
 }
 void AvgPoolProjectionLayer::forward(PassType passType) {
@@ -89,9 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
  MatrixPtr outV = getOutputValue();
-  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
+  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
-                       sizeX_, sizeY_, strideY_, stride_,
+                       strideY_, stride_, outputH_, outputW_, confPaddingY_,
-                       outputH_, outputW_, confPaddingY_, confPadding_);
+                       confPadding_);
 }
 void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
@@ -103,9 +103,8 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
  /* Do derivation */
  MatrixPtr outputGrad = getOutputGrad();
  MatrixPtr inputGrad = getInputGrad(0);
-  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
+  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_,
-                             sizeX_, sizeY_, strideY_, stride_,
+                             strideY_, stride_, outputH_, outputW_, 1, 1,
-                             outputH_, outputW_, 1, 1,
                             confPaddingY_, confPadding_);
 }
 }  // namespace paddle
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/gserver/layers/DataLayer.h"
 #include "ModelConfig.pb.h"
 #include "paddle/trainer/Trainer.h"
+#include "paddle/math/MathUtils.h"
 #include "TestUtil.h"
 #include "LayerGradUtil.h"
@@ -134,7 +135,6 @@ TEST(Projection, identity) {
  }
 }
 #ifndef PADDLE_ONLY_CPU
 TEST(Projection, conv) {
  const int NUM_FILTERS = 16;
@@ -158,21 +158,23 @@ TEST(Projection, conv) {
  conv->set_groups(1);
  conv->set_filter_channels(conv->channels() / conv->groups());
  conv->set_img_size(IMAGE_SIZE);
-  int outputSize = (2 * conv->padding() + conv->img_size() -
+  int output_x =
-      conv->filter_size()) / conv->stride() + 1;
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
-  int outputSizeY = (2 * conv->padding_y() + conv->img_size() -
+                 conv->stride(), /* caffeMode */ true);
-      conv->filter_size_y()) / conv->stride_y() + 1;
+  int output_y =
-  conv->set_output_x(outputSize);
+      outputSize(conv->img_size(), conv->filter_size_y(), conv->padding_y(),
+                 conv->stride_y(), /* caffeMode */ true);
+  conv->set_output_x(output_x);
  conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS);
-  conf.set_output_size(outputSize * outputSizeY * NUM_FILTERS);
+  conf.set_output_size(output_x * output_y * NUM_FILTERS);
-  testProjectionGrad(conf, INPUT_DATA,
+  testProjectionGrad(
+      conf, INPUT_DATA,
      /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * FILTER_SIZE_Y,
      /* batchSize */ 100, true, false, NUM_FILTERS, true);
 }
 #endif
 TEST(Layer, concat) {
  TestConfig config;
  config.biasSize = 0;
@@ -293,10 +295,9 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
  conv->set_groups(1);
  conv->set_filter_channels(conv->channels() / conv->groups());
  conv->set_img_size(16);
-  conv->set_output_x(
+  conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
-      (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
+                                conv->padding(), conv->stride(),
-          ((float)conv->stride()) +
+                                /* caffeMode */ true));
-      1.5);
  config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                              config.layerConfig.num_filters());
@@ -329,15 +330,13 @@ TEST(Layer, blockExpandLayer) {
  blockExpand->set_stride_x(2);
  blockExpand->set_stride_y(2);
  blockExpand->set_output_x(
-      1 +
+      outputSize(blockExpand->img_size_x(), blockExpand->block_x(),
-      (2 * blockExpand->padding_x() + blockExpand->img_size_x() -
+                 blockExpand->padding_x(), blockExpand->stride_x(),
-       blockExpand->block_x() + blockExpand->stride_x() - 1) /
+                 /* caffeMode */ false));
-          blockExpand->stride_x());
  blockExpand->set_output_y(
-      1 +
+      outputSize(blockExpand->img_size_y(), blockExpand->block_y(),
-      (2 * blockExpand->padding_y() + blockExpand->img_size_y() -
+                 blockExpand->padding_y(), blockExpand->stride_y(),
-       blockExpand->block_y() + blockExpand->stride_y() - 1) /
+                 /* caffeMode */ false));
-          blockExpand->stride_y());
  config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() *
                              blockExpand->channels());
@@ -862,8 +861,8 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
  pool->set_stride(sw);
  pool->set_stride_y(sh);
-  int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
+  int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
-  int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
+  int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
  pool->set_output_x(ow);
  pool->set_output_y(oh);
 }
@@ -1255,12 +1254,11 @@ TEST(Operator, conv) {
  conv->set_groups(1);
  conv->set_filter_channels(conv->channels() / conv->groups());
  conv->set_img_size(IMAGE_SIZE);
-  int outputSize =
+  int output_x =
-      int(1.0 * (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
-          conv->stride()) +
+                 conv->stride(), /* caffeMode */ true);
-      1;
+  conv->set_output_x(output_x);
-  conv->set_output_x(outputSize);
+  config.layerConfig.set_size(output_x * output_x *
-  config.layerConfig.set_size(outputSize * outputSize *
                              config.layerConfig.num_filters());
  config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                              NUM_FILTERS);

--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "MathUtils.h"
 #include <algorithm>
 #include "paddle/utils/Logging.h"
@@ -24,11 +23,7 @@ namespace paddle {
 * major is rows and minor is cols, according to
 * major value to initialize minor value"
 */
-void sparseRand(int* major,
+void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
-                int* minor,
-                int nnz,
-                int majorLen,
-                int minorMax,
                bool useGpu) {
  CHECK(size_t(nnz) > size_t(1));
  int* cpuMajor;
@@ -72,5 +67,17 @@ void sparseRand(int* major,
  }
 }
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode) {
+  int outputSize;
+  if (!caffeMode) {
+    outputSize =
+        (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
+  } else {
+    outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
+  }
+  CHECK_GE(outputSize, 1);
+  return outputSize;
+}
 }  // namespace paddle
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
@@ -44,4 +44,20 @@ namespace paddle {
 void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
                bool useGpu);
+/**
+ * Calculate output size based on caffeMode_.
+ * - input(+padding): 0123456789
+ * - imageSize(+padding) = 10;
+ * - filterSize = 3;
+ * - stride = 2;
+ * - caffeMode is true:
+     - output: (012), (234), (456), (678)
+     - outputSize = 4;
+ * - caffeMode is false:
+ *   - output: (012), (234), (456), (678), (9)
+ *   - outputSize = 5;
+ */
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode);
 }  // namespace paddle
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1006,6 +1006,17 @@ def TestData(data_config, async_load_data=None):
                       " Data definition")
        g_config.test_data_config.async_load_data = async_load_data
+'''
+caffe_mode: compute the output size using floor instead of ceil,
+            which is consistent of caffe and CuDNN's convention.
+'''
+def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
+    output = (2 * padding + img_size - filter_size) / float(stride)
+    if caffe_mode:
+        return 1 + int(math.floor(output))
+    else:
+        return 1 + int(math.ceil(output))
 def parse_pool(pool, input_layer_name, pool_conf):
    pool_conf.pool_type = pool.pool_type
    config_assert(pool.pool_type in ['max-projection', 'avg-projection',
@@ -1036,12 +1047,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
    if pool.padding is not None:
        pool_conf.padding = pool.padding
        pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-        pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
+        pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
-            2*pool_conf.padding - pool_conf.size_x) / \
+                                             pool_conf.padding, pool_conf.stride, False)
-            float(pool_conf.stride))) + 1
+        pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
-        pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
+                                             pool_conf.padding_y, pool_conf.stride_y, False)
-            2*pool_conf.padding_y - pool_conf.size_y) / \
-            float(pool_conf.stride_y))) + 1
 def parse_image(image, input_layer_name, image_conf):
    image_conf.channels = image.channels
@@ -1072,10 +1081,7 @@ def parse_norm(norm, input_layer_name, norm_conf):
        norm_conf.scale /= norm.size
    else:
        norm_conf.scale /= norm.size ** 2
-'''
-caffe_mode: compute the output size using floor instead of ceil,
-            which is consistent of caffe and CuDNN's convention.
-'''
 def parse_conv(conv, input_layer_name, conv_conf):
    conv_conf.filter_size = conv.filter_size
    conv_conf.filter_size_y = conv.filter_size_y
@@ -1096,14 +1102,9 @@ def parse_conv(conv, input_layer_name, conv_conf):
                  ("Input layer %s: Incorrect input image size %d for input "
                   + "image pixels %d")
                  % (input_layer_name, conv_conf.img_size, img_pixels))
-    if conv.caffe_mode:
+    conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
-        conv_conf.output_x = \
+                                         conv_conf.padding, conv_conf.stride,
-            1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
+                                         conv_conf.caffe_mode)
-            - conv.filter_size) / float(conv.stride)))
-    else:
-        conv_conf.output_x = \
-            1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
-            - conv.filter_size) / float(conv.stride)))
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
    block_expand_conf.channels = block_expand.channels
@@ -1118,18 +1119,16 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
    if block_expand_conf.img_size_x == 0:
        block_expand_conf.output_x = 0
    else:
-        block_expand_conf.output_x = \
+        block_expand_conf.output_x = cnn_output_size(
-            1 + \
+            block_expand.img_size_x, block_expand.block_x, 
-            int(math.ceil((2 * block_expand.padding_x + block_expand.img_size_x \
+            block_expand.padding_x, block_expand.stride_x, False)
-            - block_expand.block_x) / float(block_expand.stride_x)))
    if block_expand_conf.img_size_y == 0:
-      block_expand_conf.output_y = 0
+        block_expand_conf.output_y = 0
    else:
-        block_expand_conf.output_y = \
+        block_expand_conf.output_y = cnn_output_size(
-            1 + \
+            block_expand.img_size_y, block_expand.block_y, 
-            int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \
+            block_expand.padding_y, block_expand.stride_y, False)
-            - block_expand.block_y) / float(block_expand.stride_y)))
 def parse_maxout(maxout, input_layer_name, maxout_conf):
    maxout_conf.channels = maxout.channels