diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp
index 8052b35ec69c500b9005d4ffef882ceafa3bdab8..7bf4c1fd5ec1a61948dea22383c58cd5d1d2343a 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.cpp
+++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp
@@ -61,15 +61,10 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
 
 void BatchNormBaseLayer::calFeatureMapSize() {
   const ImageConfig& conf = config_.inputs(0).image_conf();
-  if (inputLayers_[0]->getOutput().getFrameHeight() == 0 &&
-      inputLayers_[0]->getOutput().getFrameWidth() == 0) {
-    imgSize_ = conf.img_size();
-    imageH_ = imgSize_;
-    imageW_ = imgSize_;
-  } else {
-    imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
-    imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
-  }
+  imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
+  imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
+  if (imageH_ == 0) imageH_ = conf.img_size_y();
+  if (imageW_ == 0) imageW_ = conf.img_size();
   imgPixels_ = imageH_ * imageW_;
   getOutput().setFrameHeight(imageH_);
   getOutput().setFrameWidth(imageW_);
diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h
index 2302d1a8e0b17f4b67835e65a3453f8f6e20f721..4ea493b5f5208d5e4cdba26aa4116c6c2c6ba13f 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.h
+++ b/paddle/gserver/layers/BatchNormBaseLayer.h
@@ -78,9 +78,8 @@ protected:
   MatrixPtr savedMean_;
   MatrixPtr savedInvVar_;
 
-  /// Height or width of input image feature, now height is equal to width.
-  /// imgSize is 1 if the input is fully-connected layer.
-  int imgSize_;
+  /// Height or width of input image feature.
+  /// Both of them are 1 if the input is fully-connected layer.
   int imageH_;
   int imageW_;
   /// Height * Width.
diff --git a/paddle/gserver/layers/BilinearInterpLayer.cpp b/paddle/gserver/layers/BilinearInterpLayer.cpp
index ac5f87be7af070a1146f79b633c777e77633b80b..64d3046b56547168fcba9590368937dc3470c85d 100644
--- a/paddle/gserver/layers/BilinearInterpLayer.cpp
+++ b/paddle/gserver/layers/BilinearInterpLayer.cpp
@@ -26,15 +26,15 @@ size_t BilinearInterpLayer::getSize() {
 
   const BilinearInterpConfig& conf = config_.inputs(0).bilinear_interp_conf();
   if (inImgH_ == 0) {
-    inImgH_ = conf.img_size_y();
+    inImgH_ = conf.image_conf().img_size_y();
   }
   if (inImgW_ == 0) {
-    inImgW_ = conf.img_size_x();
+    inImgW_ = conf.image_conf().img_size();
   }
 
   outImgH_ = conf.out_size_y();
   outImgW_ = conf.out_size_x();
-  numChannels_ = conf.num_channels();
+  numChannels_ = conf.image_conf().channels();
 
   CHECK(outImgH_ > 0 && outImgW_ > 0);
   CHECK(inImgH_ > 0 && inImgW_ > 0);
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 6bc3b3b801796a227a7b767c8da048a3ccf88827..8f358a5e411280a47fc0a107b76a70f311420184 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -37,11 +37,13 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     filterSizeY_.push_back(conf.filter_size_y());
     filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
     channels_.push_back(conf.channels());
-    imgSizeH_.push_back(conf.img_size());
+    imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() :
+                        conf.img_size());
     imgSizeW_.push_back(conf.img_size());
     groups_.push_back(conf.groups());
     filterChannels_.push_back(conf.filter_channels());
-    outputH_.push_back(conf.output_x());
+    outputH_.push_back(conf.has_output_y() ? conf.output_y() :
+                       conf.output_x());
     outputW_.push_back(conf.output_x());
   }
 
@@ -90,11 +92,12 @@ size_t ConvBaseLayer::calOutputSize() {
     for (size_t i = 0; i < inputLayers_.size(); i++) {
        inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
        inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+       const ConvConfig& conf = config_.inputs(i).conv_conf();
        if (isDeconv_) {
          if (inH[i] == 0)
-           inH[i] = config_.inputs(i).conv_conf().output_x();
+           inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
          if (inW[i] == 0)
-           inW[i] = config_.inputs(i).conv_conf().output_x();
+           inW[i] = conf.output_x();
          outH.push_back(
              imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
                        caffeMode_));
@@ -103,9 +106,9 @@ size_t ConvBaseLayer::calOutputSize() {
                        caffeMode_));
        } else {
          if (inH[i] == 0)
-           inH[i] = config_.inputs(i).conv_conf().img_size();
+           inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
          if (inW[i] == 0)
-           inW[i] = config_.inputs(i).conv_conf().img_size();
+           inW[i] = conf.img_size();
          outH.push_back(
              outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
                         caffeMode_));
diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp
index 2d9c892fe595f2f4dcdc9dcc3cd392a6c29fac01..7830efab1d0f0dabeb4e5af109f2a47f01fac54c 100644
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
@@ -93,9 +93,9 @@ private:
   bool caffeMode_;
   int inputOffset_, outputOffset_, weightOffset_;
   int numFilters_;
-  int padding_, stride_, filterSize_, channels_, imgSize_;
+  int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
   int paddingY_, strideY_, filterSizeY_;
-  int imgPixels_, filterPixels_, filterChannels_, outputX_, outputs_;
+  int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
 
   /// Following member variables are same with CudnnConvLayer.
   /// There is no explanation here.
@@ -144,7 +144,7 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
 void ConvOperator::reshape(int batchSize) {
   imageH_ = ins_[0]->getFrameHeight();
   imageW_ = ins_[0]->getFrameWidth();
-  if (imageH_ == 0) imageH_ = imgSize_;
+  if (imageH_ == 0) imageH_ = imgSizeY_;
   if (imageW_ == 0) imageW_ = imgSize_;
   outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
   outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
@@ -176,7 +176,10 @@ void ConvOperator::computeConvSizes() {
   hl_create_tensor_descriptor(&inputDesc_);
   int outputX =
       outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
+  int outputY =
+      outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
   CHECK_EQ(outputX, outputX_);
+  CHECK_EQ(outputY, outputY_);
   hl_create_tensor_descriptor(&outputDesc_);
   hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
                                    paddingY_, padding_, strideY_, stride_);
@@ -208,10 +211,12 @@ void ConvOperator::getConvParams() {
   filterPixels_ = filterSize_ * filterSizeY_;
   channels_ = conf.channels();
   imgSize_ = conf.img_size();
-  imgPixels_ = imgSize_ * imgSize_;
+  imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
+  imgPixels_ = imgSize_ * imgSizeY_;
   CHECK_EQ(conf.groups(), 1U);
   filterChannels_ = conf.filter_channels();
   outputX_ = conf.output_x();
+  outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
   outputs_ = outputX_ * outputX_;
 }
 
diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp
index d1ce53fe26351926196a04418900a1555e0282c2..161bbad4f5c9884324fa114eafe735e74419e0f7 100644
--- a/paddle/gserver/layers/ConvProjection.cpp
+++ b/paddle/gserver/layers/ConvProjection.cpp
@@ -47,7 +47,7 @@ void ConvProjection::getConvParams() {
   filterH_ = conf.filter_size_y();
   filterW_ = conf.filter_size();
 
-  configImgH_ = conf.img_size();
+  configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
   configImgW_ = conf.img_size();
 
   channels_ = conf.channels();
diff --git a/paddle/gserver/layers/DataLayer.cpp b/paddle/gserver/layers/DataLayer.cpp
index 79b9181e694f008d99bda170c562a524212b2c73..b83d4f44b0c6e03bafeb6579eafef5e154439471 100644
--- a/paddle/gserver/layers/DataLayer.cpp
+++ b/paddle/gserver/layers/DataLayer.cpp
@@ -48,8 +48,8 @@ void DataLayer::copyDataToOutput(Argument& output) {
       output.ids->copyFrom(*data_.ids);
     }
   }
-  output.setFrameHeight(data_.getFrameHeight());
-  output.setFrameWidth(data_.getFrameWidth());
+  output.setFrameHeight(config_.height());
+  output.setFrameWidth(config_.width());
   output.cpuSequenceDims = data_.cpuSequenceDims;
   output.sequenceStartPositions = data_.sequenceStartPositions;
   output.subSequenceStartPositions = data_.subSequenceStartPositions;
diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
index 0bab0ca764f4fea7dc37f0eae096de1a79c9df21..953c9d78416d2c5f9d256160e1e9a74dceb1fb59 100644
--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
@@ -30,17 +30,19 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
    * meaning as in conv, we need to swap channels_ and numFilters here for
    * convTrans, and in other functions too.
    * */
-  int channel;
-  int numFilters;
+
   /* Initialize the projection */
   for (auto &inputConfig : config_.inputs()) {
     const ConvConfig &conf = inputConfig.conv_conf();
-    numFilters = isDeconv_ ? conf.channels() : numFilters_;
+    int numFilters = isDeconv_ ? conf.channels() : numFilters_;
     subM_.push_back(numFilters / conf.groups());
-    subN_.push_back(conf.output_x() * conf.output_x());
-    channel = isDeconv_ ? numFilters_ : conf.channels();
-    subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
-                    conf.groups());
+    subN_.push_back(conf.output_x() *
+                    (conf.has_output_y() ? conf.output_y() : conf.output_x()));
+    int channel = isDeconv_ ? numFilters_ : conf.channels();
+    subK_.push_back(
+        channel * conf.filter_size() *
+        (conf.has_filter_size_y() ? conf.filter_size_y() : conf.filter_size()) /
+        conf.groups());
     /* Consistent caffe mode for multiple input */
     caffeMode_ = conf.caffe_mode();
   }
@@ -107,9 +109,9 @@ void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
       imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
       useGpu_);
   expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
-                           channel, filterSize_[inIdx],
-                           filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
-                           padding_[inIdx], padding_[inIdx],
+                           channel, filterSizeY_[inIdx],
+                           filterSize_[inIdx], strideY_[inIdx], stride_[inIdx],
+                           paddingY_[inIdx], padding_[inIdx],
                            outputH_[inIdx], outputW_[inIdx]);
   imageTmp->clear();
 }
@@ -188,10 +190,10 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
         imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
         useGpu_);
     vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
-                     channel, filterSize_[inpIdx],
-                     filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
-                     padding_[inpIdx], padding_[inpIdx],
-                     outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
+                     channel, filterSizeY_[inpIdx],
+                     filterSize_[inpIdx], strideY_[inpIdx], stride_[inpIdx],
+                     paddingY_[inpIdx], padding_[inpIdx], outputH_[inpIdx],
+                     outputW_[inpIdx], 1.0f, 1.0f);
     vTmp->clear();
     oneGradTmp->clear();
 
diff --git a/paddle/gserver/layers/MaxOutLayer.cpp b/paddle/gserver/layers/MaxOutLayer.cpp
index a3de069bf7a6c9217e4adfeb2e65409955cc569c..b7f1b98041355624edbc1b480868079887264467 100644
--- a/paddle/gserver/layers/MaxOutLayer.cpp
+++ b/paddle/gserver/layers/MaxOutLayer.cpp
@@ -25,10 +25,10 @@ size_t MaxOutLayer::getSize() {
   imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
   imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
   if (imgSizeH_ == 0) {
-    imgSizeH_ = maxoutConf.img_size_y();
+    imgSizeH_ = maxoutConf.image_conf().img_size_y();
   }
   if (imgSizeW_ == 0) {
-    imgSizeW_ = maxoutConf.img_size_x();
+    imgSizeW_ = maxoutConf.image_conf().img_size();
   }
 
   featLen_ = imgSizeH_ * imgSizeW_;
@@ -50,7 +50,7 @@ bool MaxOutLayer::init(const LayerMap& layerMap,
 
   const MaxOutConfig& conf = config_.inputs(0).maxout_conf();
   groups_ = conf.groups();
-  channels_ = conf.channels();
+  channels_ = conf.image_conf().channels();
   CHECK_EQ(channels_ % groups_, 0UL);
   outputChannels_ = channels_ / groups_;
 
diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp
index ad8b92d2ff72426d30f2488af7d168ffd8e5b65d..b02a542a51e54ece60c6919e9313a9b119f5a758 100644
--- a/paddle/gserver/layers/NormLayer.cpp
+++ b/paddle/gserver/layers/NormLayer.cpp
@@ -49,6 +49,9 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
   outputX_ = conf.output_x();
   imgSize_ = conf.img_size();
   denoms_ = NULL;
+
+  outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
+  imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
   return true;
 }
 
diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h
index 2b05be6fcb44fc3f61f9be4e464b2100284bf5c6..9e4acffd1fdaff79a9a866788919faa50ce296eb 100644
--- a/paddle/gserver/layers/NormLayer.h
+++ b/paddle/gserver/layers/NormLayer.h
@@ -50,7 +50,7 @@ public:
  */
 class ResponseNormLayer : public NormLayer {
 protected:
-  size_t channels_, size_, outputX_, imgSize_;
+  size_t channels_, size_, outputX_, imgSize_, outputY_, imgSizeY_;
   float scale_, pow_;
   MatrixPtr denoms_;
 
diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp
index eab6e904ee998b876a4dd7c503eec3a9a84f7412..e33b985facdabc8a5412ac88ccbdaadf44877c0c 100644
--- a/paddle/gserver/layers/NormProjectionLayer.cpp
+++ b/paddle/gserver/layers/NormProjectionLayer.cpp
@@ -24,7 +24,7 @@ size_t CMRProjectionNormLayer::getSize() {
   imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
   imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
   if (imgSizeH_ == 0) {
-    imgSizeH_ = imgSize_;
+    imgSizeH_ = imgSizeY_;
   }
   if (imgSizeW_ == 0) {
     imgSizeW_ = imgSize_;
diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
index 2fcfc8e1ae68a47822ce8f375fb94ecdb196dea6..2675f954012b882530a8487391d1aa1c546f468d 100644
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
@@ -56,14 +56,14 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
 size_t SpatialPyramidPoolLayer::getSize() {
   CHECK_EQ(inputLayers_.size(), 1UL);
   size_t layerSize = 0;
-  const SppConfig& sppConf = config_.inputs(0).spp_conf();
+  const ImageConfig& conf = config_.inputs(0).spp_conf().image_conf();
   imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
   imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
   if (imgSizeH_ == 0) {
-    imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
+    imgSizeH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
   }
   if (imgSizeW_ == 0) {
-    imgSizeW_ = sppConf.img_size();
+    imgSizeW_ = conf.img_size();
   }
 
   size_t outputH = 1;
@@ -82,9 +82,10 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
   pyramidHeight_ = sppConf.pyramid_height();
   poolType_ = sppConf.pool_type();
 
-  channels_ = sppConf.channels();
-  imgSizeW_ = sppConf.img_size();
-  imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
+  const ImageConfig& imageConf = sppConf.image_conf();
+  channels_ = imageConf.channels();
+  imgSizeW_ = imageConf.img_size();
+  imgSizeH_ = imageConf.has_img_size_y() ? imageConf.img_size_y() : imgSizeW_;
   poolProjections_.reserve(pyramidHeight_);
   projCol_.reserve(pyramidHeight_);
   projOutput_.resize(pyramidHeight_);
diff --git a/paddle/gserver/tests/img_pool_a.conf b/paddle/gserver/tests/img_pool_a.conf
index 5938e7611201c9a4e3b44ca8aae2f39a80b1ff3b..9bd046b533de8200e6c945d1752ce240508b6338 100644
--- a/paddle/gserver/tests/img_pool_a.conf
+++ b/paddle/gserver/tests/img_pool_a.conf
@@ -28,7 +28,6 @@ maxpool = img_pool_layer(input=conv,
                          stride_y=2,
                          padding=1,
                          padding_y=2,
-                         img_width=16,
                          pool_type=MaxPooling(),
 )
 avgpool = img_pool_layer(input=conv,
@@ -39,7 +38,6 @@ avgpool = img_pool_layer(input=conv,
                          stride_y=2,
                          padding=1,
                          padding_y=2,
-                         img_width=16,
                          pool_type=AvgPooling(),
 )
 
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index a79dfe39c9bb26c7b2acec1051699e1804494d93..e839851099057382e9ac0bcde8f530abec3c7d3f 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -194,9 +194,10 @@ TEST(Layer, BilinearInterpLayer) {
 
   LayerInputConfig* input = config.layerConfig.add_inputs();
   BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf();
-  bilinear->set_img_size_x(32);
-  bilinear->set_img_size_y(32);
-  bilinear->set_num_channels(4);
+  ImageConfig* image = bilinear->mutable_image_conf();
+  image->set_img_size(32);
+  image->set_img_size_y(32);
+  image->set_channels(4);
 
   for (auto useGpu : {false, true}) {
     for (auto outSize : {32, 64}) {
@@ -314,7 +315,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
   config.layerConfig.set_partial_sum(1);
   config.layerConfig.set_shared_biases(true);
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 288});
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288});
   LayerInputConfig* input = config.layerConfig.add_inputs();
   ConvConfig* conv = input->mutable_conv_conf();
   conv->set_filter_size(2);
@@ -327,10 +328,14 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(16);
+  conv->set_img_size_y(8);
   conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
                                 conv->padding(), conv->stride(),
                                 /* caffeMode */ true));
-  config.layerConfig.set_size(conv->output_x() * conv->output_x() *
+  conv->set_output_y(outputSize(conv->img_size_y(), conv->filter_size_y(),
+                                conv->padding_y(), conv->stride_y(),
+                                /* caffeMode */ true));
+  config.layerConfig.set_size(conv->output_x() * conv->output_y() *
                               config.layerConfig.num_filters());
 
   testLayerGrad(config, "conv", 100, trans, useGpu);
@@ -427,10 +432,11 @@ TEST(Layer, maxoutLayer) {
   config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0});
   LayerInputConfig* input = config.layerConfig.add_inputs();
   MaxOutConfig* maxout = input->mutable_maxout_conf();
+  ImageConfig* image = maxout->mutable_image_conf();
 
-  maxout->set_img_size_x(32);
-  maxout->set_img_size_y(32);
-  maxout->set_channels(4);
+  image->set_img_size(32);
+  image->set_img_size_y(32);
+  image->set_channels(4);
   maxout->set_groups(2);
 
   for (auto useGpu : {false, true}) {
@@ -902,7 +908,7 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
   config.layerConfig.set_type("norm");
   config.layerConfig.set_active_type("relu");
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0});
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0});
   LayerInputConfig* input = config.layerConfig.add_inputs();
   NormConfig* norm = input->mutable_norm_conf();
   norm->set_norm_type(normType);
@@ -912,7 +918,9 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
   norm->set_pow(0.75);
   norm->set_blocked(0);
   norm->set_img_size(14);
+  norm->set_img_size_y(7);
   norm->set_output_x(norm->img_size());
+  norm->set_output_y(norm->img_size_y());
   if (norm->norm_type() == "cmrnorm" ||
       norm->norm_type() == "cmrnorm-projection") {
     norm->set_scale(norm->scale() / norm->size());
@@ -920,7 +928,7 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
     norm->set_scale(norm->scale() / (norm->size() * norm->size()));
   }
 
-  config.layerConfig.set_size(norm->output_x() * norm->output_x() *
+  config.layerConfig.set_size(norm->output_x() * norm->output_y() *
                               norm->channels());
   config.biasSize = 0;
 
@@ -1018,11 +1026,12 @@ void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
   SppConfig* sppConfig = input->mutable_spp_conf();
   sppConfig->set_pool_type(poolType);
   sppConfig->set_pyramid_height(pyramidHeight);
-  sppConfig->set_channels(16);
-  sppConfig->set_img_size(10);
-  sppConfig->set_img_size_y(20);
+  ImageConfig* imageConfig = sppConfig->mutable_image_conf();
+  imageConfig->set_channels(16);
+  imageConfig->set_img_size(10);
+  imageConfig->set_img_size_y(20);
   int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1);
-  config.layerConfig.set_size(outputSize * sppConfig->channels());
+  config.layerConfig.set_size(outputSize * imageConfig->channels());
   testLayerGrad(config, "spp", 100, trans, useGpu);
 }
 
@@ -1328,12 +1337,13 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
   TestConfig config;
   const int CHANNELS = 10;
   const int IMG_SIZE = 16;
+  const int IMG_SIZE_Y = 8;
+  size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y;
   config.layerConfig.set_type(type);
-  config.layerConfig.set_size(CHANNELS * IMG_SIZE * IMG_SIZE);
+  config.layerConfig.set_size(size);
   config.layerConfig.set_active_type("sigmoid");
   config.biasSize = CHANNELS;
-  config.inputDefs.push_back({INPUT_DATA, "layer_0",
-                              /* dim= */ IMG_SIZE * IMG_SIZE * CHANNELS,
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", /* dim= */ size,
                               /* paraSize= */ CHANNELS});
 
   config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS});
@@ -1348,6 +1358,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
   ImageConfig* img_conf = input->mutable_image_conf();
   img_conf->set_channels(CHANNELS);
   img_conf->set_img_size(IMG_SIZE);
+  img_conf->set_img_size_y(IMG_SIZE_Y);
 
   testLayerGrad(config, "batch_norm", 64, /* trans= */ trans, useGpu,
                 /* useWeight */ true);
@@ -1370,6 +1381,7 @@ TEST(Operator, conv) {
   const int FILTER_SIZE_Y = 3;
   const int CHANNELS = 3;
   const int IMAGE_SIZE = 16;
+  const int IMAGE_SIZE_Y = 8;
   OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs();
   operatorConf.set_type("conv");
   ConvConfig* conv = operatorConf.mutable_conv_conf();
@@ -1384,17 +1396,18 @@ TEST(Operator, conv) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(IMAGE_SIZE);
-  int output_x =
-      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
-                 conv->stride(), /* caffeMode */ true);
-  conv->set_output_x(output_x);
-  config.layerConfig.set_size(output_x * output_x *
-                              config.layerConfig.num_filters());
-  config.layerConfig.set_size(conv->output_x() * conv->output_x() *
+  conv->set_img_size_y(IMAGE_SIZE_Y);
+  conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                conv->padding(), conv->stride(),
+                                /*  caffeMode */ true));
+  conv->set_output_y(outputSize(conv->img_size_y(), conv->filter_size_y(),
+                                conv->padding_y(), conv->stride_y(),
+                                /*  caffeMode */ true));
+  config.layerConfig.set_size(conv->output_x() * conv->output_y() *
                               NUM_FILTERS);
 
   config.inputDefs.push_back(
-      {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE * CHANNELS, 0});
+      {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0});
   config.inputDefs.push_back(
       {INPUT_DATA, "layer_1",
        FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, 0});
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 42c74661d2b2cebe0c2f5f14d0970ab2f1fec866..2d5cd29aed7d92a993c26285e41ff537e0239970 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -203,6 +203,8 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
   }
   resizeAndCopy(udp, src.udp, useGpu, stream);
   resizeAndCopy(strs, src.strs, useGpu, stream);
+  frameWidth = src.frameWidth;
+  frameHeight = src.frameHeight;
 }
 
 int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
diff --git a/paddle/trainer/tests/test_config.conf b/paddle/trainer/tests/test_config.conf
index 664e18cb986811ffca2a4865c5f50045ace122e1..2a4548896ffe0770f48b6c375c41eaf452b19366 100644
--- a/paddle/trainer/tests/test_config.conf
+++ b/paddle/trainer/tests/test_config.conf
@@ -59,7 +59,6 @@ pool = img_pool_layer(input=fc2,
                       padding_y=2,
                       stride=2,
                       stride_y=3,
-                      img_width=3,
                       pool_type=CudnnAvgPooling())
 
 concat = concat_layer(input=[fc3, fc4])
diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4
index aea77248cbac0f3ee044b05894d37718e692a0fc..3c35075a92ee59b28815ab02d89040444c8e2c44 100644
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
@@ -76,6 +76,12 @@ message ConvConfig {
   required uint32 filter_size_y = 10;
   required uint32 padding_y = 11;
   required uint32 stride_y = 12;
+
+  // if not set, use output_x
+  optional uint32 output_y = 13 [default = 0];
+
+  // if not set, use img_size
+  optional uint32 img_size_y = 14 [default = 0];
 }
 
 message PoolConfig {
@@ -121,11 +127,9 @@ message PoolConfig {
 }
 
 message SppConfig {
-  required string pool_type = 1;
-  required uint32 pyramid_height = 2;
-  required uint32 channels = 3;
-  required uint32 img_size = 4;
-  optional uint32 img_size_y = 5;
+  required ImageConfig image_conf = 1;
+  required string pool_type = 2;
+  required uint32 pyramid_height = 3;
 }
 
 message NormConfig {
@@ -155,6 +159,12 @@ message NormConfig {
   // fixed window: shared a fixed window for each value
   // sliding window: have a different window for each value
   optional bool blocked = 8;
+
+  // if not set, use output_x
+  optional uint32 output_y = 9 [default = 0];
+
+  // if not set, use img_size
+  optional uint32 img_size_y = 10 [default = 0];
 }
 
 message BlockExpandConfig {
@@ -179,12 +189,8 @@ message BlockExpandConfig {
 }
 
 message MaxOutConfig {
-  required uint32 channels = 1;
+  required ImageConfig image_conf = 1;
   required uint32 groups = 2;
-
-  // The size of input feature map.
-  required uint32 img_size_x = 3;
-  required uint32 img_size_y = 4;
 }
 
 message ProjectionConfig {
@@ -225,12 +231,10 @@ message OperatorConfig {
 
 message BilinearInterpConfig {
   // The size of input feature map.
-  optional uint32 img_size_x = 1;
-  optional uint32 img_size_y = 2;
+  required ImageConfig image_conf = 1;
   // The size of output feature map.
-  required uint32 out_size_x = 3;
-  required uint32 out_size_y = 4;
-  required uint32 num_channels = 5;
+  required uint32 out_size_x = 2;
+  required uint32 out_size_y = 3;
 }
 
 message ImageConfig {
@@ -240,6 +244,7 @@ message ImageConfig {
 
   // The size of input feature map.
   required uint32 img_size = 8;
+  required uint32 img_size_y = 9;
 }
 
 message LayerInputConfig {
@@ -412,7 +417,10 @@ sinclude(`ModelConfigLayer.proto.m4')
   // string type is used for flexibility: different types can be converted
   // to string and reinterpreted in the user's own layer implementation.  
   optional string user_arg = 49;
-
+  
+  // to indicate rectangle image data
+  optional uint64 height = 50;
+  optional uint64 width = 51;
 }
 
 message EvaluatorConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index dbe2f3b29278c259945564959690a3aa6c0cfbe0..a7ad40e4837b1cca769453babe6bbc608233e184 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -138,7 +138,14 @@ def init_config_environment(
         g_root_submodel=None,
         g_submodel_map={},
         g_submodel_stack=[],
-        g_add_submodel_suffix=False, ):
+        g_add_submodel_suffix=False,
+
+        # Whether current layer needs to pass the image height and width.
+        # Default value is true, but if it encounters recurrent_layer_group, 
+        # it will be false. The reason is that image is converted to be sequence, 
+        # image height will be sequence length, and image width will be feature 
+        # length of each timestep.
+        g_pass_height_width=True, ):
 
     for k, v in locals().iteritems():
         globals()[k] = copy.deepcopy(v)
@@ -592,6 +599,7 @@ class DotMulProjection(Projection):
     def calc_parameter_dims(self, input_size, output_size):
         return [1, output_size]
 
+
 # ScalingProjection
 @config_class
 class ScalingProjection(Projection):
@@ -685,9 +693,9 @@ class ConvProjection(Projection):
 
         parse_conv(conv_conf, input_layer_name, self.proj_conf.conv_conf,
                    num_filters)
-        # TODO: support rectangle input
-        self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x
-                                      **2) * num_filters
+        self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \
+                                     self.proj_conf.conv_conf.output_y * \
+                                     num_filters
 
     def calc_output_size(self, input_layer_config):
         return self.proj_conf.output_size
@@ -762,8 +770,9 @@ class ConvOperator(Operator):
         parse_conv(conv_conf,
                    MakeLayerNameInSubmodel(input_layer_names[0]),
                    self.operator_conf.conv_conf, num_filters)
-        self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x
-                                          **2) * num_filters
+        self.operator_conf.output_size = self.operator_conf.conv_conf.output_x * \
+                                         self.operator_conf.conv_conf.output_y * \
+                                         num_filters
 
         config_assert(len(input_layer_names) == 2, "Conv is binary operator")
 
@@ -798,14 +807,12 @@ class Conv(Cfg):
             config_assert(output_x <= 0)
 
 
-# please refer to the comments in proto/ModelConfig.proto
 @config_class
 class BilinearInterp(Cfg):
-    def __init__(self, out_size_x=None, out_size_y=None, num_channels=None):
+    def __init__(self, out_size_x=None, out_size_y=None, channels=None):
         self.add_keys(locals())
 
 
-# please refer to the comments in proto/ModelConfig.proto
 @config_class
 class Pool(Cfg):
     def __init__(self,
@@ -813,7 +820,6 @@ class Pool(Cfg):
                  channels,
                  size_x,
                  size_y=None,
-                 img_width=None,
                  start=None,
                  stride=None,
                  stride_y=None,
@@ -822,14 +828,12 @@ class Pool(Cfg):
         self.add_keys(locals())
 
 
-# please refer to the comments in proto/ModelConfig.proto
 @config_class
 class SpatialPyramidPool(Cfg):
-    def __init__(self, pool_type, pyramid_height, channels, img_width=None):
+    def __init__(self, pool_type, pyramid_height, channels):
         self.add_keys(locals())
 
 
-# please refer to the comments in proto/ModelConfig.proto
 @config_class
 class Norm(Cfg):
     def __init__(self,
@@ -844,7 +848,6 @@ class Norm(Cfg):
         self.add_keys(locals())
 
 
-# please refer to the comments in proto/ModelConfig.proto
 @config_class
 class Image(Cfg):
     def __init__(self, channels, img_size=None):
@@ -1051,18 +1054,8 @@ def TestData(data_config, async_load_data=None):
         g_config.test_data_config.async_load_data = async_load_data
 
 
-def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
-    bilinear_conf.out_size_x = bilinear.out_size_x
-    bilinear_conf.out_size_y = bilinear.out_size_y
-    bilinear_conf.num_channels = bilinear.num_channels
-
-
-'''
-caffe_mode: compute the output size using floor instead of ceil,
-            which is consistent of caffe and CuDNN's convention.
-'''
-
-
+#caffe_mode: compute the output size using floor instead of ceil,
+#            which is consistent of caffe and CuDNN's convention.
 def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
     output = (2 * padding + img_size - filter_size) / float(stride)
     if caffe_mode:
@@ -1071,20 +1064,34 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
         return 1 + int(math.ceil(output))
 
 
-'''
-calcualte image_size based on output_size for convolution. 
-It is the reverse function of cnn_output_size
-'''
-
-
+#calcualte image_size based on output_size for convolution. 
+#It is the reverse function of cnn_output_size
 def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
-    if caffe_mode:
-        img_size = (output_size - 1) * stride + filter_size - 2 * padding
-    else:
-        img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1
+    img_size = (output_size - 1) * stride + filter_size - 2 * padding
+    if not caffe_mode:
+        img_size = img_size + 1
     return img_size
 
 
+def set_img_size(input_layer_name, channels):
+    input = g_layer_map[input_layer_name]
+    img_pixels = input.size / channels
+    img_size = input.width if input.width > 0 else int(img_pixels**0.5)
+    img_size_y = input.height if input.height > 0 else int(img_pixels /
+                                                           img_size)
+    config_assert(
+        img_size * img_size_y == img_pixels,
+        "Input layer %s: Incorrect input image size %d * %d for input image pixels %d"
+        % (input_layer_name, img_size, img_size_y, img_pixels))
+    return img_size, img_size_y
+
+
+def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
+    parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
+    bilinear_conf.out_size_x = bilinear.out_size_x
+    bilinear_conf.out_size_y = bilinear.out_size_y
+
+
 def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.pool_type = pool.pool_type
     config_assert(pool.pool_type in [
@@ -1100,14 +1107,8 @@ def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
     pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
 
-    img_pixels = g_layer_map[input_layer_name].size / pool.channels
-    # the img_width may be removed,
-    # and it can be calculated automatically later.
-    pool_conf.img_size = default(pool.img_width, int(img_pixels**0.5))
-    pool_conf.img_size_y = img_pixels / pool_conf.img_size
-    config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
-                  "Incorrect input image size %d for input image pixels %d" %
-                  (pool_conf.img_size, img_pixels))
+    pool_conf.img_size, pool_conf.img_size_y = \
+        set_img_size(input_layer_name, pool.channels)
 
     config_assert(not pool.start, "start is deprecated in pooling.")
 
@@ -1123,29 +1124,18 @@ def parse_pool(pool, input_layer_name, pool_conf):
 
 
 def parse_spp(spp, input_layer_name, spp_conf):
+    parse_image(spp, input_layer_name, spp_conf.image_conf)
     spp_conf.pool_type = spp.pool_type
     config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
                   "pool-type %s is not in "
                   "['max-projection', 'avg-projection']" % spp.pool_type)
     spp_conf.pyramid_height = spp.pyramid_height
-    spp_conf.channels = spp.channels
-
-    img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels
-
-    spp_conf.img_size = default(spp.img_width, int(img_pixels**0.5))
-    spp_conf.img_size_y = img_pixels / spp_conf.img_size
-    config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels,
-                  "Incorrect input image size %d for input image pixels %d" %
-                  (spp_conf.img_size, img_pixels))
 
 
 def parse_image(image, input_layer_name, image_conf):
     image_conf.channels = image.channels
-    image_pixels = g_layer_map[input_layer_name].size / image_conf.channels
-    image_conf.img_size = int(image_pixels**0.5)
-    config_assert((image_conf.img_size**2) == image_pixels,
-                  "Incorrect input image size %d for input image pixels %d" %
-                  (image_conf.img_size, image_pixels))
+    image_conf.img_size, image_conf.img_size_y = \
+        set_img_size(input_layer_name, image_conf.channels)
 
 
 def parse_norm(norm, input_layer_name, norm_conf):
@@ -1159,24 +1149,18 @@ def parse_norm(norm, input_layer_name, norm_conf):
     norm_conf.pow = norm.pow
     norm_conf.blocked = norm.blocked
 
-    img_pixels = g_layer_map[input_layer_name].size / norm.channels
-    norm_conf.img_size = int(img_pixels**0.5)
-    config_assert((norm_conf.img_size**2) == img_pixels,
-                  "Incorrect input image size %d for input image pixels %d" %
-                  (norm_conf.img_size, img_pixels))
+    norm_conf.img_size, norm_conf.img_size_y = \
+        set_img_size(input_layer_name, norm.channels)
     norm_conf.output_x = norm_conf.img_size
+    norm_conf.output_y = norm_conf.img_size_y
     if norm.norm_type in ['cmrnorm-projection']:
         norm_conf.scale /= norm.size
     else:
         norm_conf.scale /= norm.size**2
 
 
-'''
-caffe_mode: compute the output size using floor instead of ceil,
-            which is consistent of caffe and CuDNN's convention.
-'''
-
-
+#caffe_mode: compute the output size using floor instead of ceil,
+#            which is consistent of caffe and CuDNN's convention.
 def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
     conv_conf.filter_size = conv.filter_size
     conv_conf.filter_size_y = conv.filter_size_y
@@ -1190,33 +1174,24 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
 
     if not trans:
         conv_conf.filter_channels = conv.channels / conv.groups
-
-        img_pixels = g_layer_map[input_layer_name].size / conv.channels
-        print('channels=%d size=%d' % (conv.channels,
-                                       g_layer_map[input_layer_name].size))
-        conv_conf.img_size = int(img_pixels**0.5)
-        config_assert((conv_conf.img_size**2) == img_pixels, (
-            "Input layer %s: Incorrect input image size %d for input " +
-            "image pixels %d") %
-                      (input_layer_name, conv_conf.img_size, img_pixels))
-
+        conv_conf.img_size, conv_conf.img_size_y = \
+            set_img_size(input_layer_name, conv.channels)
         conv_conf.output_x = cnn_output_size(
             conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
             conv_conf.stride, conv_conf.caffe_mode)
+        conv_conf.output_y = cnn_output_size(
+            conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
+            conv_conf.stride_y, conv_conf.caffe_mode)
     else:
         conv_conf.filter_channels = num_filters / conv.groups
-
-        outputSize = g_layer_map[input_layer_name].size / conv.channels
-        print('channels=%d size=%d' % (conv.channels,
-                                       g_layer_map[input_layer_name].size))
-        conv_conf.output_x = int(outputSize**0.5)
-        config_assert((conv_conf.output_x**2) == outputSize, (
-            "Input layer %s: Incorrect input image size %d for input " +
-            "image pixels %d") %
-                      (input_layer_name, conv_conf.output_x, outputSize))
+        conv_conf.output_x, conv_conf.output_y = \
+            set_img_size(input_layer_name, conv.channels)
         conv_conf.img_size = cnn_image_size(
             conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
             conv_conf.stride, conv_conf.caffe_mode)
+        conv_conf.img_size_y = cnn_output_size(
+            conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
+            conv_conf.stride_y, conv_conf.caffe_mode)
 
 
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
@@ -1245,10 +1220,8 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
 
 
 def parse_maxout(maxout, input_layer_name, maxout_conf):
-    maxout_conf.channels = maxout.channels
+    parse_image(maxout, input_layer_name, maxout_conf.image_conf)
     maxout_conf.groups = maxout.groups
-    maxout_conf.img_size_x = maxout.img_size_x
-    maxout_conf.img_size_y = maxout.img_size_y
 
 
 # Define an evaluator
@@ -1375,6 +1348,12 @@ class LayerBase(object):
 
         g_current_submodel.layer_names.append(self.config.name)
 
+        if self.config.type != 'data' and g_pass_height_width:
+            height = self.get_input_layer(0).height
+            width = self.get_input_layer(0).width
+            if height and width:
+                self.set_layer_height_width(height, width)
+
     def get_input_layer(self, input_index):
         return g_layer_map[self.config.inputs[input_index].input_layer_name]
 
@@ -1492,6 +1471,23 @@ class LayerBase(object):
                           'Different inputs result in' +
                           'different layer size at layer %s' % self.config.name)
 
+    def set_layer_height_width(self, height, width):
+        self.config.height = height
+        self.config.width = width
+
+    def set_cnn_layer(self,
+                      input_layer_name,
+                      height,
+                      width,
+                      channels,
+                      is_print=True):
+        size = height * width * channels
+        self.set_layer_size(size)
+        self.set_layer_height_width(height, width)
+        if is_print:
+            print("output for %s: c = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, height, width, size))
+
 
 @config_layer('multi_class_cross_entropy_with_selfnorm')
 class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
@@ -1581,9 +1577,11 @@ class PrintLayer(LayerBase):
 
 @config_layer('data')
 class DataLayer(LayerBase):
-    def __init__(self, name, size, device=None):
+    def __init__(self, name, size, height=None, width=None, device=None):
         super(DataLayer, self).__init__(
             name, 'data', size, inputs=[], device=device)
+        if height and width:
+            self.set_layer_height_width(height, width)
 
 
 '''
@@ -1682,14 +1680,13 @@ class ConvLayerBase(LayerBase):
 
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
-            parse_conv(self.inputs[input_index].conv, input_layer.name,
-                       self.config.inputs[input_index].conv_conf, num_filters)
             conv_conf = self.config.inputs[input_index].conv_conf
+            parse_conv(self.inputs[input_index].conv, input_layer.name,
+                       conv_conf, num_filters)
             psize = self.calc_parameter_size(conv_conf)
-            print("output size for %s is %d " % (name, conv_conf.output_x))
             self.create_input_parameter(input_index, psize)
-            self.set_layer_size(
-                (conv_conf.output_x**2) * self.config.num_filters)
+            self.set_cnn_layer(name, conv_conf.output_y, conv_conf.output_x,
+                               self.config.num_filters)
 
         psize = self.config.size
         if shared_biases:
@@ -1776,10 +1773,11 @@ class NormLayer(LayerBase):
             name, 'norm', 0, inputs=inputs, device=device)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
-            parse_norm(self.inputs[input_index].norm, input_layer.name,
-                       self.config.inputs[input_index].norm_conf)
             norm_conf = self.config.inputs[input_index].norm_conf
-            self.set_layer_size((norm_conf.output_x**2) * norm_conf.channels)
+            parse_norm(self.inputs[input_index].norm, input_layer.name,
+                       norm_conf)
+            self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
+                               norm_conf.channels, False)
 
 
 @config_layer('pool')
@@ -1789,13 +1787,11 @@ class PoolLayer(LayerBase):
             name, 'pool', 0, inputs=inputs, device=device)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
-            parse_pool(self.inputs[input_index].pool, input_layer.name,
-                       self.config.inputs[input_index].pool_conf)
             pool_conf = self.config.inputs[input_index].pool_conf
-            print("output size for %s is %d*%d " % (name, pool_conf.output_y,
-                                                    pool_conf.output_x))
-            self.set_layer_size(
-                (pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
+            parse_pool(self.inputs[input_index].pool, input_layer.name,
+                       pool_conf)
+            self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x,
+                               pool_conf.channels)
 
 
 @config_layer('spp')
@@ -1805,12 +1801,10 @@ class SpatialPyramidPoolLayer(LayerBase):
             name, 'spp', 0, inputs=inputs, device=device)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
-            parse_spp(self.inputs[input_index].spp, input_layer.name,
-                      self.config.inputs[input_index].spp_conf)
             spp_conf = self.config.inputs[input_index].spp_conf
-            output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
-            print("output size for %s is %d " % (name, output_size))
-            self.set_layer_size(output_size * spp_conf.channels)
+            parse_spp(self.inputs[input_index].spp, input_layer.name, spp_conf)
+            output_x = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
+            self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels)
 
 
 @config_layer('batch_norm')
@@ -1872,10 +1866,10 @@ class BatchNormLayer(LayerBase):
             self.config.moving_average_fraction = moving_average_fraction
 
         input_layer = self.get_input_layer(0)
-        parse_image(self.inputs[0].image, input_layer.name,
-                    self.config.inputs[0].image_conf)
         image_conf = self.config.inputs[0].image_conf
-        self.set_layer_size((image_conf.img_size**2) * image_conf.channels)
+        parse_image(self.inputs[0].image, input_layer.name, image_conf)
+        self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
+                           image_conf.channels)
 
         psize = self.calc_parameter_size(image_conf)
         dims = [1, psize]
@@ -1933,11 +1927,12 @@ class MaxOutLayer(LayerBase):
         super(MaxOutLayer, self).__init__(
             name, 'maxout', 0, inputs=inputs, **xargs)
         input_layer = self.get_input_layer(0)
-        parse_maxout(self.inputs[0].maxout, input_layer.name,
-                     self.config.inputs[0].maxout_conf)
         maxout_conf = self.config.inputs[0].maxout_conf
+        parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf)
         self.set_layer_size(g_layer_map[input_layer.name].size /
                             maxout_conf.groups)
+        self.set_layer_height_width(g_layer_map[input_layer.name].height,
+                                    g_layer_map[input_layer.name].width)
 
 
 # key: cost type
@@ -2517,11 +2512,10 @@ class BilinearInterpLayer(LayerBase):
         super(BilinearInterpLayer, self).__init__(
             name, 'bilinear_interp', 0, inputs=inputs, **xargs)
         input_layer = self.get_input_layer(0)
-        parse_bilinear(self.inputs[0].bilinear_interp, input_layer.name,
-                       self.config.inputs[0].bilinear_interp_conf)
-        conf = self.inputs[0].bilinear_interp
-        self.set_layer_size(conf.out_size_x * conf.out_size_y *
-                            conf.num_channels)
+        conf = self.config.inputs[0].bilinear_interp_conf
+        parse_bilinear(self.inputs[0].bilinear_interp, input_layer.name, conf)
+        self.set_cnn_layer(name, conf.out_size_y, conf.out_size_x,
+                           conf.image_conf.channels)
 
 
 @config_layer('sum_to_one_norm')
@@ -2994,6 +2988,8 @@ class CTCLayer(LayerBase):
 @config_layer('recurrent_layer_group')
 class RecurrentLayerGroup(LayerBase):
     def __init__(self, name, device=None):
+        global g_pass_height_width
+        g_pass_height_width = False
         super(RecurrentLayerGroup, self).__init__(
             name, 'recurrent_layer_group', 0, inputs=[], device=device)
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index d984e843204c1cd99ee5b8941dc056c091504869..fbb28e6caf694515a2aa18ef6f45cd61516dff6f 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -763,7 +763,7 @@ def mixed_layer(size=0,
 
 
 @layer_support()
-def data_layer(name, size, layer_attr=None):
+def data_layer(name, size, height=None, width=None, layer_attr=None):
     """
     Define DataLayer For NeuralNetwork.
 
@@ -778,6 +778,10 @@ def data_layer(name, size, layer_attr=None):
     :type name: basestring
     :param size: Size of this data layer.
     :type size: int
+    :param height: Height of this data layer, used for image
+    :type size: int|None
+    :param width: Width of this data layer, used for image
+    :type size: int|None
     :param layer_attr: Extra Layer Attribute.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.
@@ -787,6 +791,8 @@ def data_layer(name, size, layer_attr=None):
         type=LayerType.DATA,
         name=name,
         size=size,
+        height=height,
+        width=width,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
 
     return LayerOutput(name, LayerType.DATA, size=size)
@@ -1480,7 +1486,7 @@ def bilinear_interp_layer(input,
             bilinear_interp=BilinearInterp(
                 out_size_x=out_size_x,
                 out_size_y=out_size_y,
-                num_channels=num_channels)),
+                channels=num_channels)),
         type=LayerType.BILINEAR_INTERP_LAYER,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
@@ -1908,8 +1914,7 @@ def img_pool_layer(input,
                    layer_attr=None,
                    pool_size_y=None,
                    stride_y=None,
-                   padding_y=None,
-                   img_width=None):
+                   padding_y=None):
     """
     Image pooling Layer.
 
@@ -1940,9 +1945,6 @@ def img_pool_layer(input,
     :type stride_y: int|None
     :param layer_attr: Extra Layer attribute.
     :type layer_attr: ExtraLayerAttribute
-    :param img_width: the width of input feature map. If it is None, the input feature
-                      map should be square.
-    :type img_width: int|None
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
@@ -1978,8 +1980,7 @@ def img_pool_layer(input,
                     padding=padding,
                     size_y=pool_size_y,
                     stride_y=stride_y,
-                    padding_y=padding_y,
-                    img_width=img_width))
+                    padding_y=padding_y))
         ],
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
@@ -1997,7 +1998,6 @@ def spp_layer(input,
               num_channels=None,
               pool_type=None,
               pyramid_height=None,
-              img_width=None,
               layer_attr=None):
     """
     Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
@@ -2014,9 +2014,6 @@ def spp_layer(input,
     :type scale: BasePoolingType
     :param pyramid_height: pyramid height.
     :type pyramid_height: int
-    :param img_width: the width of input feature map. If it is None, the input feature
-                      map should be square.
-    :type img_width: int|None
     :param layer_attr: Extra Layer Attribute.
     :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
@@ -2043,8 +2040,7 @@ def spp_layer(input,
             spp=SpatialPyramidPool(
                 pool_type=type_name,
                 channels=num_channels,
-                pyramid_height=pyramid_height,
-                img_width=img_width)),
+                pyramid_height=pyramid_height)),
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
         name,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
index 1f262af21126c17eb133b92c84a1ae3bb280a1d6..1a577b8d9b1e1915236ba6afcfa97040d70c707a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
@@ -26,11 +26,15 @@ layers {
       filter_size_y: 32
       padding_y: 1
       stride_y: 1
+      output_y: 227
+      img_size_y: 256
     }
   }
   bias_parameter_name: "___conv_0__.wbias"
   num_filters: 64
   shared_biases: true
+  height: 227
+  width: 227
 }
 layers {
   name: "__batch_norm_0__"
@@ -43,6 +47,7 @@ layers {
     image_conf {
       channels: 64
       img_size: 227
+      img_size_y: 227
     }
   }
   inputs {
@@ -55,6 +60,8 @@ layers {
   }
   bias_parameter_name: "___batch_norm_0__.wbias"
   moving_average_fraction: 0.9
+  height: 227
+  width: 227
 }
 layers {
   name: "__crmnorm_0__"
@@ -72,8 +79,12 @@ layers {
       output_x: 227
       img_size: 227
       blocked: false
+      output_y: 227
+      img_size_y: 227
     }
   }
+  height: 227
+  width: 227
 }
 layers {
   name: "__pool_0__"
@@ -97,6 +108,8 @@ layers {
       padding_y: 0
     }
   }
+  height: 196
+  width: 196
 }
 parameters {
   name: "___conv_0__.w0"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
index 38346354080b02bebd937fd998fd3c63c8030346..ac1e2adff5d153962cd63a9e6af6040c0d994acc 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
@@ -26,6 +26,8 @@ layers {
       filter_size_y: 32
       padding_y: 1
       stride_y: 1
+      output_y: 227
+      img_size_y: 198
     }
   }
   bias_parameter_name: "___conv_0__.wbias"
@@ -43,6 +45,7 @@ layers {
     image_conf {
       channels: 64
       img_size: 256
+      img_size_y: 256
     }
   }
   inputs {
@@ -55,6 +58,8 @@ layers {
   }
   bias_parameter_name: "___batch_norm_0__.wbias"
   moving_average_fraction: 0.9
+  height: 256
+  width: 256
 }
 layers {
   name: "__crmnorm_0__"
@@ -72,8 +77,12 @@ layers {
       output_x: 256
       img_size: 256
       blocked: false
+      output_y: 256
+      img_size_y: 256
     }
   }
+  height: 256
+  width: 256
 }
 layers {
   name: "__pool_0__"
@@ -97,6 +106,8 @@ layers {
       padding_y: 0
     }
   }
+  height: 225
+  width: 225
 }
 parameters {
   name: "___conv_0__.w0"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
index 2b3951c242411e0c0990a52bcb2ae6b1723a9367..2943ab130bd7d6f3b78ea611f1c35850ccaf5e92 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
@@ -177,6 +177,8 @@ layers {
       filter_size_y: 3
       padding_y: 0
       stride_y: 1
+      output_y: 30
+      img_size_y: 32
     }
     num_filters: 64
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
index 13d0d477eb58f6da887d0ad9c683caef37e00010..9fae596f281d44dc24c45cb3c750233266e95948 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
@@ -26,11 +26,15 @@ layers {
       filter_size_y: 3
       padding_y: 1
       stride_y: 1
+      output_y: 48
+      img_size_y: 48
     }
   }
   bias_parameter_name: "___conv_0__.wbias"
   num_filters: 16
   shared_biases: true
+  height: 48
+  width: 48
 }
 layers {
   name: "__bilinear_interp_layer_0__"
@@ -40,11 +44,17 @@ layers {
   inputs {
     input_layer_name: "__conv_0__"
     bilinear_interp_conf {
+      image_conf {
+        channels: 16
+        img_size: 48
+        img_size_y: 48
+      }
       out_size_x: 64
       out_size_y: 64
-      num_channels: 16
     }
   }
+  height: 64
+  width: 64
 }
 layers {
   name: "__pool_0__"
@@ -55,19 +65,21 @@ layers {
     input_layer_name: "__bilinear_interp_layer_0__"
     pool_conf {
       pool_type: "max-projection"
-      channels: 4
+      channels: 16
       size_x: 2
       stride: 2
-      output_x: 64
-      img_size: 128
+      output_x: 32
+      img_size: 64
       padding: 0
       size_y: 2
       stride_y: 2
-      output_y: 64
-      img_size_y: 128
+      output_y: 32
+      img_size_y: 64
       padding_y: 0
     }
   }
+  height: 32
+  width: 32
 }
 layers {
   name: "__fc_layer_0__"
@@ -78,6 +90,8 @@ layers {
     input_layer_name: "__pool_0__"
     input_parameter_name: "___fc_layer_0__.w0"
   }
+  height: 32
+  width: 32
 }
 parameters {
   name: "___conv_0__.w0"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
index 1be2a7ceebfb74d677ac056dcc3a9f72fd31ccd6..c763a95f9d1aefa022f38e0beef6d1c86ebb360d 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
@@ -4,6 +4,8 @@ layers {
   type: "data"
   size: 2304
   active_type: ""
+  height: 48
+  width: 48
 }
 layers {
   name: "__conv_0__"
@@ -26,11 +28,15 @@ layers {
       filter_size_y: 3
       padding_y: 1
       stride_y: 1
+      output_y: 48
+      img_size_y: 48
     }
   }
   bias_parameter_name: "___conv_0__.wbias"
   num_filters: 16
   shared_biases: true
+  height: 48
+  width: 48
 }
 layers {
   name: "__maxout_layer_0__"
@@ -40,12 +46,16 @@ layers {
   inputs {
     input_layer_name: "__conv_0__"
     maxout_conf {
-      channels: 16
+      image_conf {
+        channels: 16
+        img_size: 48
+        img_size_y: 48
+      }
       groups: 2
-      img_size_x: 0
-      img_size_y: 0
     }
   }
+  height: 48
+  width: 48
 }
 layers {
   name: "__pool_0__"
@@ -69,48 +79,58 @@ layers {
       padding_y: 0
     }
   }
+  height: 24
+  width: 24
 }
 layers {
   name: "__conv_1__"
   type: "exconv"
-  size: 18432
+  size: 73728
   active_type: ""
   inputs {
     input_layer_name: "__pool_0__"
     input_parameter_name: "___conv_1__.w0"
     conv_conf {
       filter_size: 3
-      channels: 32
+      channels: 8
       stride: 1
       padding: 1
       groups: 1
-      filter_channels: 32
-      output_x: 12
-      img_size: 12
+      filter_channels: 8
+      output_x: 24
+      img_size: 24
       caffe_mode: true
       filter_size_y: 3
       padding_y: 1
       stride_y: 1
+      output_y: 24
+      img_size_y: 24
     }
   }
   bias_parameter_name: "___conv_1__.wbias"
   num_filters: 128
   shared_biases: true
+  height: 24
+  width: 24
 }
 layers {
   name: "__maxout_layer_1__"
   type: "maxout"
-  size: 9216
+  size: 18432
   active_type: ""
   inputs {
-    input_layer_name: "__conv_0__"
+    input_layer_name: "__conv_1__"
     maxout_conf {
-      channels: 128
+      image_conf {
+        channels: 128
+        img_size: 24
+        img_size_y: 24
+      }
       groups: 4
-      img_size_x: 0
-      img_size_y: 0
     }
   }
+  height: 24
+  width: 24
 }
 layers {
   name: "__block_expand_layer_0__"
@@ -118,7 +138,7 @@ layers {
   size: 192
   active_type: ""
   inputs {
-    input_layer_name: "__maxout_layer_0__"
+    input_layer_name: "__maxout_layer_1__"
     block_expand_conf {
       channels: 32
       stride_x: 1
@@ -133,6 +153,8 @@ layers {
       img_size_y: 0
     }
   }
+  height: 24
+  width: 24
 }
 layers {
   name: "__fc_layer_0__"
@@ -143,6 +165,8 @@ layers {
     input_layer_name: "__block_expand_layer_0__"
     input_parameter_name: "___fc_layer_0__.w0"
   }
+  height: 24
+  width: 24
 }
 parameters {
   name: "___conv_0__.w0"
@@ -164,9 +188,9 @@ parameters {
 }
 parameters {
   name: "___conv_1__.w0"
-  size: 36864
+  size: 9216
   initial_mean: 0.0
-  initial_std: 0.0833333333333
+  initial_std: 0.166666666667
   initial_strategy: 0
   initial_smart: false
 }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
index 8b0a8f2146b709ee67981049da8061597e1716be..ca1b2d8cffd6b472dfe40feeeb762e169bc853c7 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
@@ -4,6 +4,8 @@ layers {
   type: "data"
   size: 3200
   active_type: ""
+  height: 20
+  width: 10
 }
 layers {
   name: "__spp_0__"
@@ -13,13 +15,17 @@ layers {
   inputs {
     input_layer_name: "data"
     spp_conf {
+      image_conf {
+        channels: 16
+        img_size: 10
+        img_size_y: 20
+      }
       pool_type: "max-projection"
       pyramid_height: 2
-      channels: 16
-      img_size: 10
-      img_size_y: 20
     }
   }
+  height: 1
+  width: 5
 }
 input_layer_names: "data"
 output_layer_names: "__spp_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
index e15a55b412f9459ecd89a0f654256097099c1398..be83f4f83c5d05ea2ffd9e3df0c09fb1a37a3e57 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
@@ -17,7 +17,7 @@ bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
 
 pool = img_pool_layer(
     input=bilinear,
-    num_channels=4,
+    num_channels=16,
     pool_size=2,
     stride=2,
     pool_type=MaxPooling())
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
index 081430d716093877db6b2e44ac5417c37ede9a6e..eb14270baa0c4ca0b84d2121a80fde0b45eda54a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
@@ -2,7 +2,7 @@ from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
 
-data = data_layer(name='data', size=2304)
+data = data_layer(name='data', size=2304, height=48, width=48)
 
 conv = img_conv_layer(
     input=data,
@@ -21,16 +21,21 @@ pool = img_pool_layer(
 conv2 = img_conv_layer(
     input=pool,
     filter_size=3,
-    num_channels=32,
+    num_channels=8,
     num_filters=128,
     padding=1,
     act=LinearActivation(),
     bias_attr=True)
 
-maxout2 = maxout_layer(input=conv, num_channels=128, groups=4)
+maxout2 = maxout_layer(input=conv2, num_channels=128, groups=4)
 
 block = block_expand_layer(
-    input=maxout, num_channels=32, stride_x=1, stride_y=1, block_x=1, block_y=6)
+    input=maxout2,
+    num_channels=32,
+    stride_x=1,
+    stride_y=1,
+    block_x=1,
+    block_y=6)
 
 fc = fc_layer(input=block, size=384, bias_attr=False)
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
index e20ffb584e8bdd86100455d4e55fe633b878e034..e0b0d0d3be252700d99f7097f0353df885efcf07 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
@@ -2,13 +2,9 @@ from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_rate=1e-5)
 
-data = data_layer(name='data', size=3200)
+data = data_layer(name='data', size=3200, height=20, width=10)
 
 spp = spp_layer(
-    input=data,
-    pyramid_height=2,
-    num_channels=16,
-    pool_type=MaxPooling(),
-    img_width=10)
+    input=data, pyramid_height=2, num_channels=16, pool_type=MaxPooling())
 
 outputs(spp)