Add roi pool (#35084) (#36154)

* add roi pool * rename input as x

Add roi pool (#35084) (#36154)
* add roi pool * rename input as x
b0289de5 · Wenyu · GitHub · 632a0064 · b0289de5 · b0289de5
隐藏空白更改
内联并排

Showing with 234 addition and 0 deletion

python/paddle/tests/test_ops_roi_pool.py python/paddle/tests/test_ops_roi_pool.py +109 -0

python/paddle/vision/ops.py python/paddle/vision/ops.py +125 -0

未找到文件。
--- a/python/paddle/tests/test_ops_roi_pool.py
+++ b/python/paddle/tests/test_ops_roi_pool.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+
+import paddle
+from paddle.vision.ops import roi_pool, RoIPool
+
+
+class TestRoIPool(unittest.TestCase):
+    def setUp(self):
+        self.data = np.random.rand(1, 256, 32, 32).astype('float32')
+        boxes = np.random.rand(3, 4)
+        boxes[:, 2] += boxes[:, 0] + 3
+        boxes[:, 3] += boxes[:, 1] + 4
+        self.boxes = boxes.astype('float32')
+        self.boxes_num = np.array([3], dtype=np.int32)
+
+    def roi_pool_functional(self, output_size):
+
+        if isinstance(output_size, int):
+            output_shape = (3, 256, output_size, output_size)
+        else:
+            output_shape = (3, 256, output_size[0], output_size[1])
+
+        if paddle.in_dynamic_mode():
+            data = paddle.to_tensor(self.data)
+            boxes = paddle.to_tensor(self.boxes)
+            boxes_num = paddle.to_tensor(self.boxes_num)
+
+            pool_out = roi_pool(
+                data, boxes, boxes_num=boxes_num, output_size=output_size)
+            np.testing.assert_equal(pool_out.shape, output_shape)
+
+        else:
+            data = paddle.static.data(
+                shape=self.data.shape, dtype=self.data.dtype, name='data')
+            boxes = paddle.static.data(
+                shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes')
+            boxes_num = paddle.static.data(
+                shape=self.boxes_num.shape,
+                dtype=self.boxes_num.dtype,
+                name='boxes_num')
+
+            pool_out = roi_pool(
+                data, boxes, boxes_num=boxes_num, output_size=output_size)
+
+            place = paddle.CPUPlace()
+            exe = paddle.static.Executor(place)
+
+            pool_out = exe.run(paddle.static.default_main_program(),
+                               feed={
+                                   'data': self.data,
+                                   'boxes': self.boxes,
+                                   'boxes_num': self.boxes_num
+                               },
+                               fetch_list=[pool_out])
+
+            np.testing.assert_equal(pool_out[0].shape, output_shape)
+
+    def test_roi_pool_functional_dynamic(self):
+        self.roi_pool_functional(3)
+        self.roi_pool_functional(output_size=(3, 4))
+
+    def test_roi_pool_functional_static(self):
+        paddle.enable_static()
+        self.roi_pool_functional(3)
+        paddle.disable_static()
+
+    def test_RoIPool(self):
+        roi_pool_c = RoIPool(output_size=(4, 3))
+        data = paddle.to_tensor(self.data)
+        boxes = paddle.to_tensor(self.boxes)
+        boxes_num = paddle.to_tensor(self.boxes_num)
+
+        pool_out = roi_pool_c(data, boxes, boxes_num)
+        np.testing.assert_equal(pool_out.shape, (3, 256, 4, 3))
+
+    def test_value(self, ):
+        data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4,
+                                                           4).astype(np.float32)
+        boxes = np.array(
+            [[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32)
+        boxes_num = np.array([2]).astype(np.int32)
+        output = np.array([[[[11.]]], [[[16.]]]], dtype=np.float32)
+
+        data = paddle.to_tensor(data)
+        boxes = paddle.to_tensor(boxes)
+        boxes_num = paddle.to_tensor(boxes_num)
+
+        roi_pool_c = RoIPool(output_size=1)
+        pool_out = roi_pool_c(data, boxes, boxes_num)
+        np.testing.assert_almost_equal(pool_out.numpy(), output)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/vision/ops.py
+++ b/python/paddle/vision/ops.py
@@ -30,6 +30,8 @@ __all__ = [ #noqa
    'DeformConv2D',
    'read_file',
    'decode_jpeg',
+    'roi_pool',
+    'RoIPool',
    'psroi_pool',
    'PSRoIPool',
 ]
@@ -1013,3 +1015,126 @@ class PSRoIPool(Layer):
    def forward(self, x, boxes, boxes_num):
        return psroi_pool(x, boxes, boxes_num, self.output_size,
                          self.spatial_scale)
+
+
+def roi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):
+    """
+    This operator implements the roi_pooling layer.
+    Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7).
+    The operator has three steps: 1. Dividing each region proposal into equal-sized sections with output_size(h, w) 2. Finding the largest value in each section 3. Copying these max values to the output buffer  
+    For more information, please refer to https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn.
+
+    Args:
+        x (Tensor): input feature, 4D-Tensor with the shape of [N,C,H,W], 
+            where N is the batch size, C is the input channel, H is Height, W is weight. 
+            The data type is float32 or float64.
+        boxes (Tensor): boxes (Regions of Interest) to pool over. 
+            2D-Tensor with the shape of [num_boxes,4]. 
+            Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, 
+            and (x2, y2) is the bottom right coordinates.
+        boxes_num (Tensor): the number of RoIs in each image, data type is int32. Default: None
+        output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
+        spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0
+        name(str, optional): for detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
+
+    Returns:
+        pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]].  
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.ops import roi_pool
+
+            data = paddle.rand([1, 256, 32, 32])
+            boxes = paddle.rand([3, 4])
+            boxes[:, 2] += boxes[:, 0] + 3
+            boxes[:, 3] += boxes[:, 1] + 4
+            boxes_num = paddle.to_tensor([3]).astype('int32')
+            pool_out = roi_pool(data, boxes, boxes_num=boxes_num, output_size=3)
+            assert pool_out.shape == [3, 256, 3, 3], ''
+    """
+
+    check_type(output_size, 'output_size', (int, tuple), 'roi_pool')
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size)
+
+    pooled_height, pooled_width = output_size
+    if in_dygraph_mode():
+        assert boxes_num is not None, "boxes_num should not be None in dygraph mode."
+        pool_out, argmaxes = core.ops.roi_pool(
+            x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width",
+            pooled_width, "spatial_scale", spatial_scale)
+        return pool_out
+
+    else:
+        check_variable_and_dtype(x, 'x', ['float32'], 'roi_pool')
+        check_variable_and_dtype(boxes, 'boxes', ['float32'], 'roi_pool')
+        helper = LayerHelper('roi_pool', **locals())
+        dtype = helper.input_dtype()
+        pool_out = helper.create_variable_for_type_inference(dtype)
+        argmaxes = helper.create_variable_for_type_inference(dtype='int32')
+
+        inputs = {
+            "X": x,
+            "ROIs": boxes,
+        }
+        if boxes_num is not None:
+            inputs['RoisNum'] = boxes_num
+        helper.append_op(
+            type="roi_pool",
+            inputs=inputs,
+            outputs={"Out": pool_out,
+                     "Argmax": argmaxes},
+            attrs={
+                "pooled_height": pooled_height,
+                "pooled_width": pooled_width,
+                "spatial_scale": spatial_scale
+            })
+        return pool_out
+
+
+class RoIPool(Layer):
+    """
+    This interface is used to construct a callable object of the `RoIPool` class. Please
+    refer to :ref:`api_paddle_vision_ops_roi_pool`.  
+
+    Args:
+        output_size (int or tuple[int, int]): the pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
+        spatial_scale (float, optional): multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0.
+
+    Returns:
+        pool_out (Tensor): the pooled feature, 4D-Tensor with the shape of [num_boxes, C, output_size[0], output_size[1]].  
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.ops import RoIPool
+            
+            data = paddle.rand([1, 256, 32, 32])
+            boxes = paddle.rand([3, 4])
+            boxes[:, 2] += boxes[:, 0] + 3
+            boxes[:, 3] += boxes[:, 1] + 4
+            boxes_num = paddle.to_tensor([3]).astype('int32')
+            roi_pool = RoIPool(output_size=(4, 3))
+            pool_out = roi_pool(data, boxes, boxes_num)
+            assert pool_out.shape == [3, 256, 4, 3], ''
+    """
+
+    def __init__(self, output_size, spatial_scale=1.0):
+        super(RoIPool, self).__init__()
+        self._output_size = output_size
+        self._spatial_scale = spatial_scale
+
+    def forward(self, x, boxes, boxes_num):
+        return roi_pool(
+            x=x,
+            boxes=boxes,
+            boxes_num=boxes_num,
+            output_size=self._output_size,
+            spatial_scale=self._spatial_scale)
+
+    def extra_repr(self):
+        main_str = 'output_size={_output_size}, spatial_scale={_spatial_scale}'
+        return main_str.format(**self.__dict__)