deconv layer implementation modification following luotao1 comments

fb20187a · wangyang59 · 3d72e949 · fb20187a · fb20187a · fb20187a
7 changed file
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -89,42 +89,41 @@ size_t ConvBaseLayer::calOutputSize() {
  clearAndReserve(&outputW_);
  size_t layerSize = 0;
-  if (!isDeconv_) {
+  auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
    for (size_t i = 0; i < inputLayers_.size(); i++) {
-      imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
+       inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-      imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+       inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-      if (imgSizeH_[i] == 0)
+       if (isDeconv_) {
-        imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
+         if (inH[i] == 0)
-      if (imgSizeW_[i] == 0)
+           inH[i] = config_.inputs(i).conv_conf().output_x();
-        imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
+         if (inW[i] == 0)
-      outputH_.push_back(
+           inW[i] = config_.inputs(i).conv_conf().output_x();
-          outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
+         outH.push_back(
-      outputW_.push_back(
+             imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
-          outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
+         outW.push_back(
-      CHECK_EQ(outputH_[i], outputH_[0]);
+             imageSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
-      CHECK_EQ(outputW_[i], outputW_[0]);
+       } else {
+         if (inH[i] == 0)
+           inH[i] = config_.inputs(i).conv_conf().img_size();
+         if (inW[i] == 0)
+           inW[i] = config_.inputs(i).conv_conf().img_size();
+         outH.push_back(
+             outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
+         outW.push_back(
+             outputSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
+         CHECK_EQ(outH[i], outH[0]);
+         CHECK_EQ(outW[i], outW[0]);
+       }
    }
-    getOutput().setFrameHeight(outputH_[0]);
+    getOutput().setFrameHeight(outH[0]);
-    getOutput().setFrameWidth(outputW_[0]);
+    getOutput().setFrameWidth(outW[0]);
-    layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_);
+    layerSize = outH[0] * outW[0] * size_t(numFilters_);
+  };
+  if (isDeconv_) {
+    setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
  } else {
-    for (size_t i = 0; i < inputLayers_.size(); i++) {
+    setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
-      outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-      outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-      if (outputH_[i] == 0)
-        outputH_[i] = config_.inputs(i).conv_conf().output_x();
-      if (outputW_[i] == 0)
-        outputW_[i] = config_.inputs(i).conv_conf().output_x();
-      imgSizeH_.push_back(
-          imageSize(outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
-      imgSizeW_.push_back(
-          imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
-      CHECK_EQ(imgSizeH_[i], imgSizeH_[0]);
-      CHECK_EQ(imgSizeW_[i], imgSizeW_[0]);
-    }
-    getOutput().setFrameHeight(imgSizeH_[0]);
-    getOutput().setFrameWidth(imgSizeW_[0]);
-    layerSize = imgSizeH_[0] * imgSizeW_[0] * size_t(numFilters_);
  }
  return layerSize;

--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -78,8 +78,6 @@ protected:
  /// of output size.
  bool caffeMode_;
 public:
  explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}

--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
@@ -31,14 +31,14 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
   * convTrans, and in other functions too.
   * */
  int channel;
-  int nf;
+  int numFilters;
  /* Initialize the projection */
  for (auto &inputConfig : config_.inputs()) {
    const ConvConfig &conf = inputConfig.conv_conf();
-    nf = (!isDeconv_) ? numFilters_ : conf.channels();
+    numFilters = isDeconv_ ? conf.channels() : numFilters_;
-    subM_.push_back(nf / conf.groups());
+    subM_.push_back(numFilters / conf.groups());
    subN_.push_back(conf.output_x() * conf.output_x());
-    channel = (!isDeconv_) ? conf.channels() : numFilters_;
+    channel = isDeconv_ ? numFilters_ : conf.channels();
    subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
                    conf.groups());
    /* Consistent caffe mode for multiple input */
@@ -99,7 +99,7 @@ void ExpandConvBaseLayer::addUnsharedBias() {
 void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
                                     int inIdx) {
-  int channel = (!isDeconv_) ? channels_[inIdx] : numFilters_;
+  int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
  resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
  real *imgData = image->getData() + startIdx * image->getWidth();
@@ -122,10 +122,10 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
  expandOneFrame(image, startIdx, inIdx);
-  int nf = (!isDeconv_) ? numFilters_ : channels_[inIdx];
+  int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
  real *outData =
-      out->getData() + startIdx * subN * nf;
+      out->getData() + startIdx * subN * numFilters;
  real *wgtData = weights_[inIdx]->getW()->getData();
  real *expInData = expandInput_->getData();
@@ -147,7 +147,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
 void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
                                    int inpIdx) {
-  int channel = (!isDeconv_) ? channels_[inpIdx] : numFilters_;
+  int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
  int subM = subM_[inpIdx];
  int subN = subN_[inpIdx];

--- a/paddle/gserver/tests/test_ConvTrans.cpp
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
@@ -189,58 +189,55 @@ void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride,
 }
 TEST(Layer, convTransLayerFwd2) {
-    size_t imgSize, output_x, stride, padding, filter_size;
    MatrixPtr result;
+    result = Matrix::create(1, 5 * 5, false, false);
-    imgSize = 5;
-    output_x = 1;
-    stride = 1;
-    padding = 0;
-    filter_size = 5;
-    result = Matrix::create(1, imgSize * imgSize, false, false);
    result->zeroMem();
    result->add(1.0);
-    doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 1,
+                   /* stride */ 1,
+                   /* padding */ 0,
+                   /* filter_size */ 5,
+                   result);
-    imgSize = 5;
-    output_x = 2;
-    stride = 1;
-    padding = 0;
-    filter_size = 4;
    float resultData[] = {1, 2, 2, 2, 1,
                          2, 4, 4, 4, 2,
                          2, 4, 4, 4, 2,
                          2, 4, 4, 4, 2,
                          1, 2, 2, 2, 1};
-    result = Matrix::create(resultData, 1, imgSize * imgSize, false, false);
+    result->setData(resultData);
-    doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 2,
-    imgSize = 5;
+                   /* stride */ 1,
-    output_x = 2;
+                   /* padding */ 0,
-    stride = 2;
+                   /* filter_size */ 4,
-    padding = 1;
+                   result);
-    filter_size = 5;
    float resultData2[] = {1, 2, 2, 2, 1,
                           2, 4, 4, 4, 2,
                           2, 4, 4, 4, 2,
                           2, 4, 4, 4, 2,
                           1, 2, 2, 2, 1};
-    result = Matrix::create(resultData2, 1, imgSize * imgSize, false, false);
+    result->setData(resultData2);
-    doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 2,
-    imgSize = 5;
+                   /* stride */ 2,
-    output_x = 2;
+                   /* padding */ 1,
-    stride = 2;
+                   /* filter_size */ 5,
-    padding = 0;
+                   result);
-    filter_size = 3;
    float resultData3[] = {1, 1, 2, 1, 1,
                           1, 1, 2, 1, 1,
                           2, 2, 4, 2, 2,
                           1, 1, 2, 1, 1,
                           1, 1, 2, 1, 1};
-    result = Matrix::create(resultData3, 1, imgSize * imgSize, false, false);
+    result->setData(resultData3);
-    doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
+    doOneConvtTest(/* imgSize */ 5,
-}
+                   /* output_x */ 2,
+                   /* stride */ 2,
+                   /* padding */ 0,
+                   /* filter_size */ 3,
+                   result);}
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);

--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -351,12 +351,10 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) {
 TEST(Layer, convTransLayer) {
  testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false);
-/*
 #ifndef PADDLE_ONLY_CPU
-  testConvLayer("exconv",  trans=  false,  useGpu=  true);
+  testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ true);
-  testConvLayer("cudnn_conv",  trans=  false,  useGpu=  true);
+  // testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
 #endif
-*/
 }
 TEST(Layer, blockExpandLayer) {

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1082,7 +1082,11 @@ def parse_norm(norm, input_layer_name, norm_conf):
    else:
        norm_conf.scale /= norm.size ** 2
-def parse_conv(conv, input_layer_name, conv_conf):
+'''
+caffe_mode: compute the output size using floor instead of ceil,
+            which is consistent of caffe and CuDNN's convention.
+'''
+def parse_conv(conv, input_layer_name, conv_conf, trans=False):
    conv_conf.filter_size = conv.filter_size
    conv_conf.filter_size_y = conv.filter_size_y
    conv_conf.channels = conv.channels
@@ -1093,49 +1097,41 @@ def parse_conv(conv, input_layer_name, conv_conf):
    conv_conf.groups = conv.groups
    conv_conf.filter_channels = conv.channels / conv.groups
    conv_conf.caffe_mode = conv.caffe_mode
-    img_pixels = g_layer_map[input_layer_name].size / conv.channels
+    if not trans:
-    print('channels=%d size=%d'%(conv.channels,
+        img_pixels = g_layer_map[input_layer_name].size / conv.channels
-      g_layer_map[input_layer_name].size))
+        print('channels=%d size=%d'%(conv.channels,
-    conv_conf.img_size = int(img_pixels ** 0.5)
+          g_layer_map[input_layer_name].size))
-    config_assert((conv_conf.img_size ** 2) == img_pixels,
+        conv_conf.img_size = int(img_pixels ** 0.5)
-                  ("Input layer %s: Incorrect input image size %d for input "
+        config_assert((conv_conf.img_size ** 2) == img_pixels,
-                   + "image pixels %d")
+                      ("Input layer %s: Incorrect input image size %d for input "
-                  % (input_layer_name, conv_conf.img_size, img_pixels))
+                       + "image pixels %d")
-    conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
+                      % (input_layer_name, conv_conf.img_size, img_pixels))
-                                         conv_conf.padding, conv_conf.stride,
+        if conv.caffe_mode:
-                                         conv_conf.caffe_mode)
+            conv_conf.output_x = \
+                1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
+                - conv.filter_size) / float(conv.stride)))
-def parse_conv_trans(conv, input_layer_name, conv_conf, num_filters):
+        else:
-    conv_conf.filter_size = conv.filter_size
+            conv_conf.output_x = \
-    conv_conf.filter_size_y = conv.filter_size_y
+                1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
-    conv_conf.channels = conv.channels
+                - conv.filter_size) / float(conv.stride)))
-    conv_conf.padding = conv.padding
-    conv_conf.padding_y = conv.padding_y
-    conv_conf.stride = conv.stride
-    conv_conf.stride_y = conv.stride_y
-    conv_conf.groups = conv.groups
-    conv_conf.filter_channels = num_filters / conv.groups
-    conv_conf.caffe_mode = conv.caffe_mode
-    outputSize = g_layer_map[input_layer_name].size / conv.channels
-    print('channels=%d size=%d'%(conv.channels,
-      g_layer_map[input_layer_name].size))
-    conv_conf.output_x = int(outputSize ** 0.5)
-    config_assert((conv_conf.output_x ** 2) == outputSize,
-                  ("Input layer %s: Incorrect input image size %d for input "
-                   + "image pixels %d")
-                  % (input_layer_name, conv_conf.output_x, outputSize))
-    if conv.caffe_mode:
-        conv_conf.img_size = \
-            (conv_conf.output_x - 1) * conv.stride \
-            + conv.filter_size - 2 * conv.padding
    else:
-        conv_conf.img_size = \
+        outputSize = g_layer_map[input_layer_name].size / conv.channels
-            (conv_conf.output_x - 2) * conv.stride \
+        print('channels=%d size=%d'%(conv.channels,
-            + conv.filter_size - 2 * conv.padding + 1
+          g_layer_map[input_layer_name].size))
+        conv_conf.output_x = int(outputSize ** 0.5)
+        config_assert((conv_conf.output_x ** 2) == outputSize,
+                      ("Input layer %s: Incorrect input image size %d for input "
+                       + "image pixels %d")
+                      % (input_layer_name, conv_conf.output_x, outputSize))
+        if conv.caffe_mode:
+            conv_conf.img_size = \
+                (conv_conf.output_x - 1) * conv.stride \
+                + conv.filter_size - 2 * conv.padding
+        else:
+            conv_conf.img_size = \
+                (conv_conf.output_x - 2) * conv.stride \
+                + conv.filter_size - 2 * conv.padding + 1 
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
    block_expand_conf.channels = block_expand.channels
@@ -1685,10 +1681,11 @@ class ConvTransLayerBase(LayerBase):
        for input_index in xrange(len(self.inputs)):
            input_layer = self.get_input_layer(input_index)
-            parse_conv_trans(
+            parse_conv(
                self.inputs[input_index].conv,
                input_layer.name,
-                self.config.inputs[input_index].conv_conf, num_filters)
+                self.config.inputs[input_index].conv_conf, num_filters,
+                trans=True)
            conv_conf = self.config.inputs[input_index].conv_conf
            psize = self.calc_parameter_size(conv_conf)
            print("output size for %s is %d " % (name, conv_conf.output_x))

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -36,7 +36,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
           "pooling_layer", "lstmemory", "last_seq", "first_seq",
           "cos_sim", "hsigmoid", "conv_projection",
           "regression_cost", 'classification_cost', "LayerOutput",
-           'img_conv_layer', 'img_convTrans_layer', 'img_pool_layer', 'batch_norm_layer',
+           'img_conv_layer', 'img_pool_layer', 'batch_norm_layer',
           'img_cmrnorm_layer', 'addto_layer',
           'concat_layer', 'lstm_step_layer', 'recurrent_group',
           'memory', 'StaticInput', 'expand_layer', 'scaling_layer',