refine ROIPoolLayer

3cf01b5d · guosheng · c07cbf7d · 3cf01b5d · 3cf01b5d · 3cf01b5d
7 changed file
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -82,6 +82,11 @@ maxout
 ..  autoclass:: paddle.v2.layer.maxout
    :noindex:
+roi_pool
+--------
+..  autoclass:: paddle.v2.layer.roi_pool
+    :noindex:
 Norm Layer
 ==========

--- a/paddle/gserver/layers/ROIPoolLayer.cpp
+++ b/paddle/gserver/layers/ROIPoolLayer.cpp
@@ -48,7 +48,7 @@ void ROIPoolLayer::forward(PassType passType) {
  resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_);
  MatrixPtr outputValue = getOutputValue();
-  if (useGpu_) {
+  if (useGpu_) {  // TODO(guosheng): implement on GPU later
    MatrixPtr dataCpuBuffer;
    Matrix::resizeOrCreate(dataCpuBuffer,
                           dataValue->getHeight(),
@@ -90,9 +90,6 @@ void ROIPoolLayer::forward(PassType passType) {
                         false);
  real* argmaxData = maxIdxs_->getData();
-  size_t uZero = 0;
-  size_t uOne = 1;
  for (size_t n = 0; n < numROIs; ++n) {
    size_t roiBatchIdx = bottomROIs[0];
    size_t roiStartW = round(bottomROIs[1] * spatialScale_);
@@ -101,8 +98,8 @@ void ROIPoolLayer::forward(PassType passType) {
    size_t roiEndH = round(bottomROIs[4] * spatialScale_);
    CHECK_GE(roiBatchIdx, 0);
    CHECK_LT(roiBatchIdx, batchSize);
-    size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne);
+    size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
-    size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne);
+    size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
    real binSizeH =
        static_cast<real>(roiHeight) / static_cast<real>(pooledHeight_);
    real binSizeW =
@@ -115,10 +112,10 @@ void ROIPoolLayer::forward(PassType passType) {
          size_t wstart = static_cast<size_t>(std::floor(pw * binSizeW));
          size_t hend = static_cast<size_t>(std::ceil((ph + 1) * binSizeH));
          size_t wend = static_cast<size_t>(std::ceil((pw + 1) * binSizeW));
-          hstart = std::min(std::max(hstart + roiStartH, uZero), height_);
+          hstart = std::min(std::max(hstart + roiStartH, 0UL), height_);
-          wstart = std::min(std::max(wstart + roiStartW, uZero), width_);
+          wstart = std::min(std::max(wstart + roiStartW, 0UL), width_);
-          hend = std::min(std::max(hend + roiStartH, uZero), height_);
+          hend = std::min(std::max(hend + roiStartH, 0UL), height_);
-          wend = std::min(std::max(wend + roiStartW, uZero), width_);
+          wend = std::min(std::max(wend + roiStartW, 0UL), width_);
          bool isEmpty = (hend <= hstart) || (wend <= wstart);
          size_t poolIndex = ph * pooledWidth_ + pw;

--- a/paddle/gserver/layers/ROIPoolLayer.h
+++ b/paddle/gserver/layers/ROIPoolLayer.h
@@ -29,6 +29,7 @@ namespace paddle {
 * Reference:
 *    Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
 *    Faster R-CNN: Towards Real-Time Object Detection with Region Proposal
+ * Networks
 */
 class ROIPoolLayer : public Layer {

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1257,6 +1257,7 @@ def roi_pool_layer(input,
                   pooled_width,
                   pooled_height,
                   spatial_scale,
+                   num_channels=None,
                   name=None):
    """
    A layer used by Fast R-CNN to extract feature maps of ROIs from the last
@@ -1274,8 +1275,14 @@ def roi_pool_layer(input,
    :type pooled_height: int
    :param spatial_scale: The spatial scale between the image and feature map.
    :type spatial_scale: float
+    :param num_channels: number of input channel.
+    :type num_channels: int
    :return: LayerOutput
    """
+    if num_channels is None:
+        assert input.num_filters is not None
+        num_channels = input.num_filters
+    size = num_channels * pooled_width * pooled_height
    Layer(
        name=name,
        type=LayerType.ROI_POOL_LAYER,
@@ -1283,7 +1290,8 @@ def roi_pool_layer(input,
        pooled_width=pooled_width,
        pooled_height=pooled_height,
        spatial_scale=spatial_scale)
-    return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois])
+    return LayerOutput(
+        name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size)
 @wrap_name_default("cross_channel_norm")

--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers)
+test_kmax_seq_socre_layer test_seq_select_layers test_roi_pool_layer)
 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 588
+  active_type: ""
+  height: 14
+  width: 14
+}
+layers {
+  name: "rois"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__roi_pool_0__"
+  type: "roi_pool"
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    roi_pool_conf {
+      pooled_width: 7
+      pooled_height: 7
+      spatial_scale: 0.0625
+    }
+  }
+  inputs {
+    input_layer_name: "rois"
+  }
+}
+input_layer_names: "data"
+input_layer_names: "rois"
+output_layer_names: "__roi_pool_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "rois"
+  layer_names: "__roi_pool_0__"
+  input_layer_names: "data"
+  input_layer_names: "rois"
+  output_layer_names: "__roi_pool_0__"
+  is_recurrent_layer_group: false
+}
--- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+from paddle.trainer_config_helpers import *
+data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14)
+rois = data_layer(name='rois', size=10)
+roi_pool = roi_pool_layer(
+    input=data,
+    rois=rois,
+    pooled_width=7,
+    pooled_height=7,
+    spatial_scale=1. / 16)
+outputs(roi_pool)