py_precise_roi_pool.py 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
import numpy as np


19
class PyPrRoIPool:
20 21 22 23 24 25 26 27 28 29
    def __init__(self):
        pass

    def _PrRoIPoolingGetData(self, data, h, w, height, width):
        overflow = (h < 0) or (w < 0) or (h >= height) or (w >= width)
        if overflow:
            return 0.0
        else:
            return data[h][w]

30 31 32
    def _PrRoIPoolingMatCalculation(
        self, this_data, s_h, s_w, e_h, e_w, y0, x0, y1, x1, h0, w0
    ):
33 34 35 36 37
        sum_out = 0.0
        alpha = x0 - float(s_w)
        beta = y0 - float(s_h)
        lim_alpha = x1 - float(s_w)
        lim_beta = y1 - float(s_h)
38 39 40 41 42 43
        tmp = (
            lim_alpha
            - 0.5 * lim_alpha * lim_alpha
            - alpha
            + 0.5 * alpha * alpha
        ) * (lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
44 45 46 47
        sum_out += self._PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp

        alpha = float(e_w) - x1
        lim_alpha = float(e_w) - x0
48 49 50 51 52 53
        tmp = (
            lim_alpha
            - 0.5 * lim_alpha * lim_alpha
            - alpha
            + 0.5 * alpha * alpha
        ) * (lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
54 55 56 57 58 59
        sum_out += self._PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp

        alpha = x0 - float(s_w)
        beta = float(e_h) - y1
        lim_alpha = x1 - float(s_w)
        lim_beta = float(e_h) - y0
60 61 62 63 64 65
        tmp = (
            lim_alpha
            - 0.5 * lim_alpha * lim_alpha
            - alpha
            + 0.5 * alpha * alpha
        ) * (lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
66 67 68 69
        sum_out += self._PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp

        alpha = float(e_w) - x1
        lim_alpha = float(e_w) - x0
70 71 72 73 74 75
        tmp = (
            lim_alpha
            - 0.5 * lim_alpha * lim_alpha
            - alpha
            + 0.5 * alpha * alpha
        ) * (lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
76 77 78 79
        sum_out += self._PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp

        return sum_out

80 81 82 83 84 85 86 87 88
    def compute(
        self,
        x,
        rois,
        output_channels,
        spatial_scale=0.1,
        pooled_height=1,
        pooled_width=1,
    ):
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
        '''
        calculate the precise roi pooling values
        Note: This function is implements as pure python without any paddle concept involved
        :param x (array): array[N, C, H, W]
        :param rois (array): ROIs[id, x1, y1, x2, y2] (Regions of Interest) to pool over.
        :param output_channels (Integer): Expected output channels
        :param spatial_scale (float): spatial scale, default = 0.1
        :param pooled_height (Integer): Expected output height, default = 1
        :param pooled_width (Integer): Expected output width, default = 1
        :return: array[len(rois), output_channels, pooled_height, pooled_width]
        '''
        if not isinstance(output_channels, int):
            raise TypeError("output_channels must be int type")
        if not isinstance(spatial_scale, float):
            raise TypeError("spatial_scale must be float type")
        if not isinstance(pooled_height, int):
            raise TypeError("pooled_height must be int type")
        if not isinstance(pooled_width, int):
            raise TypeError("pooled_width must be int type")

        (batch_size, channels, height, width) = np.array(x).shape
        rois_num = len(rois)
        output_shape = (rois_num, output_channels, pooled_height, pooled_width)
        out_data = np.zeros(output_shape)
        for i in range(rois_num):
            roi = rois[i]
            roi_batch_id = int(roi[0])
            roi_start_w = roi[1] * spatial_scale
            roi_start_h = roi[2] * spatial_scale
            roi_end_w = roi[3] * spatial_scale
            roi_end_h = roi[4] * spatial_scale

            roi_width = max(roi_end_w - roi_start_w, 0.0)
            roi_height = max(roi_end_h - roi_start_h, 0.0)
            bin_size_h = roi_height / float(pooled_height)
            bin_size_w = roi_width / float(pooled_width)

            x_i = x[roi_batch_id]

            for c in range(output_channels):
                for ph in range(pooled_height):
                    for pw in range(pooled_width):
                        win_start_w = roi_start_w + bin_size_w * pw
                        win_start_h = roi_start_h + bin_size_h * ph
                        win_end_w = win_start_w + bin_size_w
                        win_end_h = win_start_h + bin_size_h

                        win_size = max(0.0, bin_size_w * bin_size_h)
                        if win_size == 0.0:
                            out_data[i, c, ph, pw] = 0.0
                        else:
                            sum_out = 0

                            s_w = math.floor(win_start_w)
                            e_w = math.ceil(win_end_w)
                            s_h = math.floor(win_start_h)
                            e_h = math.ceil(win_end_h)

147
                            c_in = c
148 149 150
                            for w_iter in range(int(s_w), int(e_w)):
                                for h_iter in range(int(s_h), int(e_h)):
                                    sum_out += self._PrRoIPoolingMatCalculation(
151 152 153 154
                                        x_i[c_in],
                                        h_iter,
                                        w_iter,
                                        h_iter + 1,
155 156 157
                                        w_iter + 1,
                                        max(win_start_h, float(h_iter)),
                                        max(win_start_w, float(w_iter)),
158 159 160 161 162
                                        min(win_end_h, float(h_iter) + 1.0),
                                        min(win_end_w, float(w_iter + 1.0)),
                                        height,
                                        width,
                                    )
163 164 165 166

                            out_data[i, c, ph, pw] = sum_out / win_size

        return out_data