py_precise_roi_pool.py 6.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
import numpy as np


class PyPrRoIPool(object):
    def __init__(self):
        pass

    def _PrRoIPoolingGetData(self, data, h, w, height, width):
        overflow = (h < 0) or (w < 0) or (h >= height) or (w >= width)
        if overflow:
            return 0.0
        else:
            return data[h][w]

    def _PrRoIPoolingMatCalculation(self, this_data, s_h, s_w, e_h, e_w, y0, x0,
                                    y1, x1, h0, w0):
        sum_out = 0.0
        alpha = x0 - float(s_w)
        beta = y0 - float(s_h)
        lim_alpha = x1 - float(s_w)
        lim_beta = y1 - float(s_h)
        tmp = (
            lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha *
            alpha) * (
                lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
        sum_out += self._PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp

        alpha = float(e_w) - x1
        lim_alpha = float(e_w) - x0
        tmp = (
            lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha *
            alpha) * (
                lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
        sum_out += self._PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp

        alpha = x0 - float(s_w)
        beta = float(e_h) - y1
        lim_alpha = x1 - float(s_w)
        lim_beta = float(e_h) - y0
        tmp = (
            lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha *
            alpha) * (
                lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
        sum_out += self._PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp

        alpha = float(e_w) - x1
        lim_alpha = float(e_w) - x0
        tmp = (
            lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha *
            alpha) * (
                lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta)
        sum_out += self._PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp

        return sum_out

    def compute(self,
                x,
                rois,
                output_channels,
                spatial_scale=0.1,
                pooled_height=1,
                pooled_width=1):
        '''
        calculate the precise roi pooling values
        Note: This function is implements as pure python without any paddle concept involved
        :param x (array): array[N, C, H, W]
        :param rois (array): ROIs[id, x1, y1, x2, y2] (Regions of Interest) to pool over.
        :param output_channels (Integer): Expected output channels
        :param spatial_scale (float): spatial scale, default = 0.1
        :param pooled_height (Integer): Expected output height, default = 1
        :param pooled_width (Integer): Expected output width, default = 1
        :return: array[len(rois), output_channels, pooled_height, pooled_width]
        '''
        if not isinstance(output_channels, int):
            raise TypeError("output_channels must be int type")
        if not isinstance(spatial_scale, float):
            raise TypeError("spatial_scale must be float type")
        if not isinstance(pooled_height, int):
            raise TypeError("pooled_height must be int type")
        if not isinstance(pooled_width, int):
            raise TypeError("pooled_width must be int type")

        (batch_size, channels, height, width) = np.array(x).shape
        rois_num = len(rois)
        output_shape = (rois_num, output_channels, pooled_height, pooled_width)
        out_data = np.zeros(output_shape)
        for i in range(rois_num):
            roi = rois[i]
            roi_batch_id = int(roi[0])
            roi_start_w = roi[1] * spatial_scale
            roi_start_h = roi[2] * spatial_scale
            roi_end_w = roi[3] * spatial_scale
            roi_end_h = roi[4] * spatial_scale

            roi_width = max(roi_end_w - roi_start_w, 0.0)
            roi_height = max(roi_end_h - roi_start_h, 0.0)
            bin_size_h = roi_height / float(pooled_height)
            bin_size_w = roi_width / float(pooled_width)

            x_i = x[roi_batch_id]

            for c in range(output_channels):
                for ph in range(pooled_height):
                    for pw in range(pooled_width):
                        win_start_w = roi_start_w + bin_size_w * pw
                        win_start_h = roi_start_h + bin_size_h * ph
                        win_end_w = win_start_w + bin_size_w
                        win_end_h = win_start_h + bin_size_h

                        win_size = max(0.0, bin_size_w * bin_size_h)
                        if win_size == 0.0:
                            out_data[i, c, ph, pw] = 0.0
                        else:
                            sum_out = 0

                            s_w = math.floor(win_start_w)
                            e_w = math.ceil(win_end_w)
                            s_h = math.floor(win_start_h)
                            e_h = math.ceil(win_end_h)

                            c_in = (c * pooled_height + ph) * pooled_width + pw

                            for w_iter in range(int(s_w), int(e_w)):
                                for h_iter in range(int(s_h), int(e_h)):
                                    sum_out += self._PrRoIPoolingMatCalculation(
                                        x_i[c_in], h_iter, w_iter, h_iter + 1,
                                        w_iter + 1,
                                        max(win_start_h, float(h_iter)),
                                        max(win_start_w, float(w_iter)),
                                        min(win_end_h, float(h_iter) + 1.0),
                                        min(win_end_w, float(w_iter + 1.0)),
                                        height, width)

                            out_data[i, c, ph, pw] = sum_out / win_size

        return out_data