diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index d4e9d53e5c0955912a594fe8cd9cd41a4080a2d2..203506d7ab84e5a5be2232b077eac2d433a99766 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -82,6 +82,11 @@ maxout .. autoclass:: paddle.v2.layer.maxout :noindex: +roi_pool +-------- +.. autoclass:: paddle.v2.layer.roi_pool + :noindex: + Norm Layer ========== diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99cfddb0cf3337745a716a8c329713c18b99eda3 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -0,0 +1,220 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ROIPoolLayer.h" + +namespace paddle { + +REGISTER_LAYER(roi_pool, ROIPoolLayer); + +bool ROIPoolLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + pooledWidth_ = layerConf.pooled_width(); + pooledHeight_ = layerConf.pooled_height(); + spatialScale_ = layerConf.spatial_scale(); + + return true; +} + +void ROIPoolLayer::forward(PassType passType) { + Layer::forward(passType); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + height_ = getInput(0).getFrameHeight(); + if (!height_) height_ = layerConf.height(); + width_ = getInput(0).getFrameWidth(); + if (!width_) width_ = layerConf.width(); + channels_ = getInputValue(0)->getWidth() / width_ / height_; + + size_t batchSize = getInput(0).getBatchSize(); + size_t numROIs = getInput(1).getBatchSize(); + + MatrixPtr dataValue = getInputValue(0); + MatrixPtr roiValue = getInputValue(1); + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + MatrixPtr outputValue = getOutputValue(); + + if (useGpu_) { // TODO(guosheng): implement on GPU later + MatrixPtr dataCpuBuffer; + Matrix::resizeOrCreate(dataCpuBuffer, + dataValue->getHeight(), + dataValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + dataCpuBuffer->copyFrom(*dataValue); + roiCpuBuffer->copyFrom(*roiValue); + dataValue = dataCpuBuffer; + roiValue = roiCpuBuffer; + MatrixPtr outputCpuBuffer; + Matrix::resizeOrCreate(outputCpuBuffer, + outputValue->getHeight(), + outputValue->getWidth(), + false, + false); + outputCpuBuffer->copyFrom(*outputValue); + outputValue = outputCpuBuffer; + } + + real* bottomData = dataValue->getData(); + size_t batchOffset = dataValue->getWidth(); + size_t channelOffset = height_ * width_; + real* bottomROIs = roiValue->getData(); + size_t roiOffset = roiValue->getWidth(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + + real* outputData = outputValue->getData(); + Matrix::resizeOrCreate(maxIdxs_, + numROIs, + channels_ * pooledHeight_ * pooledWidth_, + false, + false); + real* argmaxData = maxIdxs_->getData(); + + for (size_t n = 0; n < numROIs; ++n) { + // the first five elememts of each RoI should be: + // batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end + size_t roiBatchIdx = bottomROIs[0]; + size_t roiStartW = round(bottomROIs[1] * spatialScale_); + size_t roiStartH = round(bottomROIs[2] * spatialScale_); + size_t roiEndW = round(bottomROIs[3] * spatialScale_); + size_t roiEndH = round(bottomROIs[4] * spatialScale_); + CHECK_GE(roiBatchIdx, 0); + CHECK_LT(roiBatchIdx, batchSize); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); + real binSizeH = + static_cast(roiHeight) / static_cast(pooledHeight_); + real binSizeW = + static_cast(roiWidth) / static_cast(pooledWidth_); + real* batchData = bottomData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t hstart = static_cast(std::floor(ph * binSizeH)); + size_t wstart = static_cast(std::floor(pw * binSizeW)); + size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); + size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); + hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); + wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); + hend = std::min(std::max(hend + roiStartH, 0UL), height_); + wend = std::min(std::max(wend + roiStartW, 0UL), width_); + + bool isEmpty = (hend <= hstart) || (wend <= wstart); + size_t poolIndex = ph * pooledWidth_ + pw; + if (isEmpty) { + outputData[poolIndex] = 0; + argmaxData[poolIndex] = -1; + } + + for (size_t h = hstart; h < hend; ++h) { + for (size_t w = wstart; w < wend; ++w) { + size_t index = h * width_ + w; + if (batchData[index] > outputData[poolIndex]) { + outputData[poolIndex] = batchData[index]; + argmaxData[poolIndex] = index; + } + } + } + } + } + batchData += channelOffset; + outputData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } + if (useGpu_) { + getOutputValue()->copyFrom(*outputValue); + } +} + +void ROIPoolLayer::backward(const UpdateCallback& callback) { + MatrixPtr inGradValue = getInputGrad(0); + MatrixPtr outGradValue = getOutputGrad(); + MatrixPtr roiValue = getInputValue(1); + + if (useGpu_) { + MatrixPtr inGradCpuBuffer; + Matrix::resizeOrCreate(inGradCpuBuffer, + inGradValue->getHeight(), + inGradValue->getWidth(), + false, + false); + MatrixPtr outGradCpuBuffer; + Matrix::resizeOrCreate(outGradCpuBuffer, + outGradValue->getHeight(), + outGradValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + inGradCpuBuffer->copyFrom(*inGradValue); + outGradCpuBuffer->copyFrom(*outGradValue); + roiCpuBuffer->copyFrom(*roiValue); + inGradValue = inGradCpuBuffer; + outGradValue = outGradCpuBuffer; + roiValue = roiCpuBuffer; + } + + real* bottomROIs = roiValue->getData(); + size_t numROIs = getInput(1).getBatchSize(); + size_t roiOffset = getInputValue(1)->getWidth(); + + real* inDiffData = inGradValue->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + + real* outDiffData = outGradValue->getData(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + real* argmaxData = maxIdxs_->getData(); + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + real* batchDiffData = inDiffData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t poolIndex = ph * pooledWidth_ + pw; + if (argmaxData[poolIndex] > 0) { + size_t index = static_cast(argmaxData[poolIndex]); + batchDiffData[index] += outDiffData[poolIndex]; + } + } + } + batchDiffData += channelOffset; + outDiffData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } + + if (useGpu_) { + getInputGrad(0)->copyFrom(*inGradValue); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..4f07e49d6fd1eda9fa7bd46e4cec771a75f571be --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * A layer used by Fast R-CNN to extract feature maps of ROIs from the last + * feature map. + * - Input: This layer needs two input layers: The first input layer is a + * convolution layer; The second input layer contains the ROI data + * which is the output of ProposalLayer in Faster R-CNN. layers for + * generating bbox location offset and the classification confidence. + * - Output: The ROIs' feature map. + * Reference: + * Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. + * Faster R-CNN: Towards Real-Time Object Detection with Region Proposal + * Networks + */ + +class ROIPoolLayer : public Layer { +protected: + size_t channels_; + size_t width_; + size_t height_; + size_t pooledWidth_; + size_t pooledHeight_; + real spatialScale_; + + // Since there is no int matrix, use real maxtrix instead. + MatrixPtr maxIdxs_; + +public: + explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index df73e6781533def5641635e9dfa9c9e4e8a0b57f..fcbcb5b0f1f4cb07066363c9fa93fb1726459f30 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2056,6 +2056,43 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, roi_pool) { + TestConfig config; + config.layerConfig.set_type("roi_pool"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf(); + roiPoolConf->set_pooled_width(7); + roiPoolConf->set_pooled_height(7); + roiPoolConf->set_spatial_scale(1. / 16); + roiPoolConf->set_width(14); + roiPoolConf->set_height(14); + + const size_t roiNum = 10; + const size_t roiDim = 10; + const size_t batchSize = 5; + MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false); + roiValue->zeroMem(); + real* roiData = roiValue->getData(); + for (size_t i = 0; i < roiNum; ++i) { + roiData[i * roiDim + 0] = std::rand() % batchSize; + roiData[i * roiDim + 1] = std::rand() % 224; // xMin + roiData[i * roiDim + 2] = std::rand() % 224; // yMin + size_t xMin = static_cast(roiData[i * roiDim + 1]); + size_t yMin = static_cast(roiData[i * roiDim + 2]); + roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax + roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax + } + + config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false); + } +} + TEST(Layer, SwitchOrderLayer) { TestConfig config; // config input_0 diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 2d7ff1df98a9a448b447890537f20dd416a9ae9d..2c2cc6245932d4af56a68d6399ce31f008bf3748 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -321,6 +321,14 @@ message ClipConfig { required double max = 2; } +message ROIPoolConfig { + required uint32 pooled_width = 1; + required uint32 pooled_height = 2; + required float spatial_scale = 3; + optional uint32 height = 4 [ default = 1 ]; + optional uint32 width = 5 [ default = 1 ]; +} + message ScaleSubRegionConfig { required ImageConfig image_conf = 1; required float value = 2; @@ -348,6 +356,7 @@ message LayerInputConfig { optional DetectionOutputConfig detection_output_conf = 17; optional ClipConfig clip_conf = 18; optional ScaleSubRegionConfig scale_sub_region_conf = 19; + optional ROIPoolConfig roi_pool_conf = 20; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9e2c6f59bd0af1627c79a8a29bd1515ae5c9c6b5..43d02bf70e74c3903d50a4a2177059f4f474045a 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1969,6 +1969,18 @@ class DetectionOutputLayer(LayerBase): self.config.size = size +@config_layer('roi_pool') +class ROIPoolLayer(LayerBase): + def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale, + num_channels, **xargs): + super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) + config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') + self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width + self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height + self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + self.set_cnn_layer(name, pooled_height, pooled_width, num_channels) + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 36406ef86b812bcb4c671a0e1b1f29e391d79b99..617fbff948bf03098eca4a31f44d4ff05e73dbcf 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -122,6 +122,7 @@ __all__ = [ 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'roi_pool_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -221,6 +222,7 @@ class LayerType(object): PRIORBOX_LAYER = 'priorbox' MULTIBOX_LOSS_LAYER = 'multibox_loss' DETECTION_OUTPUT_LAYER = 'detection_output' + ROI_POOL_LAYER = 'roi_pool' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1305,6 +1307,50 @@ def detection_output_layer(input_loc, name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) +@wrap_name_default("roi_pool") +def roi_pool_layer(input, + rois, + pooled_width, + pooled_height, + spatial_scale, + num_channels=None, + name=None): + """ + A layer used by Fast R-CNN to extract feature maps of ROIs from the last + feature map. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param rois: The input ROIs' data. + :type rois: LayerOutput. + :param pooled_width: The width after pooling. + :type pooled_width: int + :param pooled_height: The height after pooling. + :type pooled_height: int + :param spatial_scale: The spatial scale between the image and feature map. + :type spatial_scale: float + :param num_channels: number of input channel. + :type num_channels: int + :return: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + size = num_channels * pooled_width * pooled_height + Layer( + name=name, + type=LayerType.ROI_POOL_LAYER, + inputs=[input.name, rois.name], + pooled_width=pooled_width, + pooled_height=pooled_height, + spatial_scale=spatial_scale, + num_channels=num_channels) + return LayerOutput( + name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 42aaed7a6469342086b8273eb5b80eaea905f851..1c7451e0abf5dc1b99671f292e2ffc2d2282abe9 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -9,7 +9,7 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer -test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer +test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer test_scale_sub_region_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..f1bc65b3aee7488700a9d24e049adb510649c475 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -0,0 +1,98 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 588 + active_type: "" + height: 14 + width: 14 +} +layers { + name: "rois" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__conv_0__" + type: "exconv" + size: 3136 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 14 + img_size: 14 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 1 + output_y: 14 + img_size_y: 14 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 16 + shared_biases: true + height: 14 + width: 14 +} +layers { + name: "__roi_pool_0__" + type: "roi_pool" + size: 784 + active_type: "" + inputs { + input_layer_name: "__conv_0__" + roi_pool_conf { + pooled_width: 7 + pooled_height: 7 + spatial_scale: 0.0625 + } + } + inputs { + input_layer_name: "rois" + } + height: 7 + width: 7 +} +parameters { + name: "___conv_0__.w0" + size: 432 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +input_layer_names: "rois" +output_layer_names: "__roi_pool_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "rois" + layer_names: "__conv_0__" + layer_names: "__roi_pool_0__" + input_layer_names: "data" + input_layer_names: "rois" + output_layer_names: "__roi_pool_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..b739a81b8505c94a2312ac735647fb114982f1f7 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -0,0 +1,23 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) + +rois = data_layer(name='rois', size=10) + +conv = img_conv_layer( + input=data, + filter_size=3, + num_channels=3, + num_filters=16, + padding=1, + act=LinearActivation(), + bias_attr=True) + +roi_pool = roi_pool_layer( + input=conv, + rois=rois, + pooled_width=7, + pooled_height=7, + spatial_scale=1. / 16) + +outputs(roi_pool)