From b0289de5ac92c643e134b8c59f5258e150423c4a Mon Sep 17 00:00:00 2001 From: Wenyu Date: Wed, 29 Sep 2021 09:27:36 +0800 Subject: [PATCH] Add roi pool (#35084) (#36154) * add roi pool * rename input as x --- python/paddle/tests/test_ops_roi_pool.py | 109 ++++++++++++++++++++ python/paddle/vision/ops.py | 125 +++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 python/paddle/tests/test_ops_roi_pool.py diff --git a/python/paddle/tests/test_ops_roi_pool.py b/python/paddle/tests/test_ops_roi_pool.py new file mode 100644 index 0000000000..3c84a55da1 --- /dev/null +++ b/python/paddle/tests/test_ops_roi_pool.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +import paddle +from paddle.vision.ops import roi_pool, RoIPool + + +class TestRoIPool(unittest.TestCase): + def setUp(self): + self.data = np.random.rand(1, 256, 32, 32).astype('float32') + boxes = np.random.rand(3, 4) + boxes[:, 2] += boxes[:, 0] + 3 + boxes[:, 3] += boxes[:, 1] + 4 + self.boxes = boxes.astype('float32') + self.boxes_num = np.array([3], dtype=np.int32) + + def roi_pool_functional(self, output_size): + + if isinstance(output_size, int): + output_shape = (3, 256, output_size, output_size) + else: + output_shape = (3, 256, output_size[0], output_size[1]) + + if paddle.in_dynamic_mode(): + data = paddle.to_tensor(self.data) + boxes = paddle.to_tensor(self.boxes) + boxes_num = paddle.to_tensor(self.boxes_num) + + pool_out = roi_pool( + data, boxes, boxes_num=boxes_num, output_size=output_size) + np.testing.assert_equal(pool_out.shape, output_shape) + + else: + data = paddle.static.data( + shape=self.data.shape, dtype=self.data.dtype, name='data') + boxes = paddle.static.data( + shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes') + boxes_num = paddle.static.data( + shape=self.boxes_num.shape, + dtype=self.boxes_num.dtype, + name='boxes_num') + + pool_out = roi_pool( + data, boxes, boxes_num=boxes_num, output_size=output_size) + + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + + pool_out = exe.run(paddle.static.default_main_program(), + feed={ + 'data': self.data, + 'boxes': self.boxes, + 'boxes_num': self.boxes_num + }, + fetch_list=[pool_out]) + + np.testing.assert_equal(pool_out[0].shape, output_shape) + + def test_roi_pool_functional_dynamic(self): + self.roi_pool_functional(3) + self.roi_pool_functional(output_size=(3, 4)) + + def test_roi_pool_functional_static(self): + paddle.enable_static() + self.roi_pool_functional(3) + paddle.disable_static() + + def test_RoIPool(self): + roi_pool_c = RoIPool(output_size=(4, 3)) + data = paddle.to_tensor(self.data) + boxes = paddle.to_tensor(self.boxes) + boxes_num = paddle.to_tensor(self.boxes_num) + + pool_out = roi_pool_c(data, boxes, boxes_num) + np.testing.assert_equal(pool_out.shape, (3, 256, 4, 3)) + + def test_value(self, ): + data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4, + 4).astype(np.float32) + boxes = np.array( + [[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32) + boxes_num = np.array([2]).astype(np.int32) + output = np.array([[[[11.]]], [[[16.]]]], dtype=np.float32) + + data = paddle.to_tensor(data) + boxes = paddle.to_tensor(boxes) + boxes_num = paddle.to_tensor(boxes_num) + + roi_pool_c = RoIPool(output_size=1) + pool_out = roi_pool_c(data, boxes, boxes_num) + np.testing.assert_almost_equal(pool_out.numpy(), output) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 5f02b805a3..84dcdfa4cf 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -30,6 +30,8 @@ __all__ = [ #noqa 'DeformConv2D', 'read_file', 'decode_jpeg', + 'roi_pool', + 'RoIPool', 'psroi_pool', 'PSRoIPool', ] @@ -1013,3 +1015,126 @@ class PSRoIPool(Layer): def forward(self, x, boxes, boxes_num): return psroi_pool(x, boxes, boxes_num, self.output_size, self.spatial_scale) + + +def roi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): + """ + This operator implements the roi_pooling layer. + Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7). + The operator has three steps: 1. Dividing each region proposal into equal-sized sections with output_size(h, w) 2. Finding the largest value in each section 3. Copying these max values to the output buffer + For more information, please refer to https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn. + + Args: + x (Tensor): input feature, 4D-Tensor with the shape of [N,C,H,W], + where N is the batch size, C is the input channel, H is Height, W is weight. + The data type is float32 or float64. + boxes (Tensor): boxes (Regions of Interest) to pool over. + 2D-Tensor with the shape of [num_boxes,4]. + Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, + and (x2, y2) is the bottom right coordinates. + boxes_num (Tensor): the number of RoIs in each image, data type is int32. Default: None + output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size. + spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0 + name(str, optional): for detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. + + Returns: + pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]]. + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.ops import roi_pool + + data = paddle.rand([1, 256, 32, 32]) + boxes = paddle.rand([3, 4]) + boxes[:, 2] += boxes[:, 0] + 3 + boxes[:, 3] += boxes[:, 1] + 4 + boxes_num = paddle.to_tensor([3]).astype('int32') + pool_out = roi_pool(data, boxes, boxes_num=boxes_num, output_size=3) + assert pool_out.shape == [3, 256, 3, 3], '' + """ + + check_type(output_size, 'output_size', (int, tuple), 'roi_pool') + if isinstance(output_size, int): + output_size = (output_size, output_size) + + pooled_height, pooled_width = output_size + if in_dygraph_mode(): + assert boxes_num is not None, "boxes_num should not be None in dygraph mode." + pool_out, argmaxes = core.ops.roi_pool( + x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width", + pooled_width, "spatial_scale", spatial_scale) + return pool_out + + else: + check_variable_and_dtype(x, 'x', ['float32'], 'roi_pool') + check_variable_and_dtype(boxes, 'boxes', ['float32'], 'roi_pool') + helper = LayerHelper('roi_pool', **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + argmaxes = helper.create_variable_for_type_inference(dtype='int32') + + inputs = { + "X": x, + "ROIs": boxes, + } + if boxes_num is not None: + inputs['RoisNum'] = boxes_num + helper.append_op( + type="roi_pool", + inputs=inputs, + outputs={"Out": pool_out, + "Argmax": argmaxes}, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale + }) + return pool_out + + +class RoIPool(Layer): + """ + This interface is used to construct a callable object of the `RoIPool` class. Please + refer to :ref:`api_paddle_vision_ops_roi_pool`. + + Args: + output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size. + spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0. + + Returns: + pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]]. + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.ops import RoIPool + + data = paddle.rand([1, 256, 32, 32]) + boxes = paddle.rand([3, 4]) + boxes[:, 2] += boxes[:, 0] + 3 + boxes[:, 3] += boxes[:, 1] + 4 + boxes_num = paddle.to_tensor([3]).astype('int32') + roi_pool = RoIPool(output_size=(4, 3)) + pool_out = roi_pool(data, boxes, boxes_num) + assert pool_out.shape == [3, 256, 4, 3], '' + """ + + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + self._output_size = output_size + self._spatial_scale = spatial_scale + + def forward(self, x, boxes, boxes_num): + return roi_pool( + x=x, + boxes=boxes, + boxes_num=boxes_num, + output_size=self._output_size, + spatial_scale=self._spatial_scale) + + def extra_repr(self): + main_str = 'output_size={_output_size}, spatial_scale={_spatial_scale}' + return main_str.format(**self.__dict__) -- GitLab