region_proposal_network.py 7.8 KB
Newer Older
C
backup  
chenyuntc 已提交
1 2 3 4 5
import numpy as np
from torch.nn import functional as F
import torch as t
from torch import nn

C
chenyuntc 已提交
6 7
from model.utils.bbox_tools import generate_anchor_base
from model.utils.creator_tool import ProposalCreator
C
backup  
chenyuntc 已提交
8 9


C
chenyuntc 已提交
10
class RegionProposalNetwork(nn.Module):
C
backup  
chenyuntc 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
    """Region Proposal Network introduced in Faster R-CNN.

    This is Region Proposal Network introduced in Faster R-CNN [#]_.
    This takes features extracted from images and propose
    class agnostic bounding boxes around "objects".

    .. [#] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \
    Faster R-CNN: Towards Real-Time Object Detection with \
    Region Proposal Networks. NIPS 2015.

    Args:
        in_channels (int): The channel size of input.
        mid_channels (int): The channel size of the intermediate tensor.
        ratios (list of floats): This is ratios of width to height of
            the anchors.
        anchor_scales (list of numbers): This is areas of anchors.
            Those areas will be the product of the square of an element in
            :obj:`anchor_scales` and the original area of the reference
            window.
        feat_stride (int): Stride size after extracting features from an
            image.
        initialW (callable): Initial weight value. If :obj:`None` then this
            function uses Gaussian distribution scaled by 0.1 to
            initialize weight.
            May also be a callable that takes an array and edits its values.
        proposal_creator_params (dict): Key valued paramters for
C
chenyuntc 已提交
37
            :class:`model.utils.creator_tools.ProposalCreator`.
C
backup  
chenyuntc 已提交
38 39

    .. seealso::
C
chenyuntc 已提交
40
        :class:`~model.utils.creator_tools.ProposalCreator`
C
backup  
chenyuntc 已提交
41 42 43 44 45 46 47 48 49 50 51 52

    """

    def __init__(
            self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2],
            anchor_scales=[8, 16, 32], feat_stride=16,
            proposal_creator_params=dict(),
    ):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_base = generate_anchor_base(
            anchor_scales=anchor_scales, ratios=ratios)
        self.feat_stride = feat_stride
C
chenyuntc 已提交
53
        self.proposal_layer = ProposalCreator(self, **proposal_creator_params)
C
backup  
chenyuntc 已提交
54 55
        n_anchor = self.anchor_base.shape[0]
        self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
C
chenyuntc 已提交
56 57 58 59 60
        self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
        self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
        normal_init(self.conv1, 0, 0.01)
        normal_init(self.score, 0, 0.01)
        normal_init(self.loc, 0, 0.01)
C
backup  
chenyuntc 已提交
61 62 63 64 65 66 67 68 69 70 71 72

    def forward(self, x, img_size, scale=1.):
        """Forward Region Proposal Network.

        Here are notations.

        * :math:`N` is batch size.
        * :math:`C` channel size of the input.
        * :math:`H` and :math:`W` are height and witdh of the input feature.
        * :math:`A` is number of anchors assigned to each pixel.

        Args:
C
chenyuntc 已提交
73
            x (~torch.autograd.Variable): The Features extracted from images.
C
backup  
chenyuntc 已提交
74 75 76 77 78 79 80
                Its shape is :math:`(N, C, H, W)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The amount of scaling done to the input images after
                reading them from files.

        Returns:
C
chenyuntc 已提交
81
            (~torch.autograd.Variable, ~torch.autograd.Variable, array, array, array):
C
backup  
chenyuntc 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102

            This is a tuple of five following values.

            * **rpn_locs**: Predicted bounding box offsets and scales for \
                anchors. Its shape is :math:`(N, H W A, 4)`.
            * **rpn_scores**:  Predicted foreground scores for \
                anchors. Its shape is :math:`(N, H W A, 2)`.
            * **rois**: A bounding box array containing coordinates of \
                proposal boxes.  This is a concatenation of bounding box \
                arrays from multiple images in the batch. \
                Its shape is :math:`(R', 4)`. Given :math:`R_i` predicted \
                bounding boxes from the :math:`i` th image, \
                :math:`R' = \\sum _{i=1} ^ N R_i`.
            * **roi_indices**: An array containing indices of images to \
                which RoIs correspond to. Its shape is :math:`(R',)`.
            * **anchor**: Coordinates of enumerated shifted anchors. \
                Its shape is :math:`(H W A, 4)`.

        """
        n, _, hh, ww = x.shape
        anchor = _enumerate_shifted_anchor(
C
chenyuntc 已提交
103 104 105
            np.array(self.anchor_base),
            self.feat_stride, hh, ww)

C
backup  
chenyuntc 已提交
106 107 108 109
        n_anchor = anchor.shape[0] // (hh * ww)
        h = F.relu(self.conv1(x))

        rpn_locs = self.loc(h)
C
chenyuntc 已提交
110 111
        # UNNOTE: check whether need contiguous
        # A: Yes
C
backup  
chenyuntc 已提交
112
        rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
C
backup  
chenyuntc 已提交
113
        rpn_scores = self.score(h)
C
backup  
chenyuntc 已提交
114
        rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous()
C
chenyun 已提交
115
        rpn_softmax_scores = F.softmax(rpn_scores, dim=3)
C
chenyuntc 已提交
116
        rpn_fg_scores = \
C
chenyun 已提交
117
            rpn_softmax_scores.view(n, hh, ww, n_anchor, 2)[:, :, :, :, 1].contiguous()
C
backup  
chenyuntc 已提交
118 119 120 121 122 123 124
        rpn_fg_scores = rpn_fg_scores.view(n, -1)
        rpn_scores = rpn_scores.view(n, -1, 2)

        rois = list()
        roi_indices = list()
        for i in range(n):
            roi = self.proposal_layer(
C
chenyuntc 已提交
125
                rpn_locs[i].cpu().data.numpy(),
C
backup  
chenyuntc 已提交
126
                rpn_fg_scores[i].cpu().data.numpy(),
C
chenyuntc 已提交
127
                anchor, img_size,
C
backup  
chenyuntc 已提交
128 129 130 131 132
                scale=scale)
            batch_index = i * np.ones((len(roi),), dtype=np.int32)
            rois.append(roi)
            roi_indices.append(batch_index)

C
backup  
chenyuntc 已提交
133 134
        rois = np.concatenate(rois, axis=0)
        roi_indices = np.concatenate(roi_indices, axis=0)
C
backup  
chenyuntc 已提交
135 136
        return rpn_locs, rpn_scores, rois, roi_indices, anchor

C
chenyuntc 已提交
137

C
backup  
chenyuntc 已提交
138 139 140 141 142 143 144 145
def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    # return (K*A, 4)
C
chenyuntc 已提交
146 147

    # !TODO: add support for torch.CudaTensor
C
backup  
chenyuntc 已提交
148 149 150 151 152 153 154 155 156 157 158 159
    # xp = cuda.get_array_module(anchor_base)
    # it seems that it can't be boosed using GPU
    import numpy as xp
    shift_y = xp.arange(0, height * feat_stride, feat_stride)
    shift_x = xp.arange(0, width * feat_stride, feat_stride)
    shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
    shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
                      shift_y.ravel(), shift_x.ravel()), axis=1)

    A = anchor_base.shape[0]
    K = shift.shape[0]
    anchor = anchor_base.reshape((1, A, 4)) + \
C
chenyuntc 已提交
160
             shift.reshape((1, K, 4)).transpose((1, 0, 2))
C
backup  
chenyuntc 已提交
161 162 163
    anchor = anchor.reshape((K * A, 4)).astype(np.float32)
    return anchor

C
chenyuntc 已提交
164

C
backup  
chenyuntc 已提交
165 166 167 168 169 170 171 172
def _enumerate_shifted_anchor_torch(anchor_base, feat_stride, height, width):
    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    # return (K*A, 4)
C
chenyuntc 已提交
173 174

    # !TODO: add support for torch.CudaTensor
C
backup  
chenyuntc 已提交
175 176 177 178 179 180 181 182 183 184 185
    # xp = cuda.get_array_module(anchor_base)
    import torch as t
    shift_y = t.arange(0, height * feat_stride, feat_stride)
    shift_x = t.arange(0, width * feat_stride, feat_stride)
    shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
    shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
                      shift_y.ravel(), shift_x.ravel()), axis=1)

    A = anchor_base.shape[0]
    K = shift.shape[0]
    anchor = anchor_base.reshape((1, A, 4)) + \
C
chenyuntc 已提交
186
             shift.reshape((1, K, 4)).transpose((1, 0, 2))
C
backup  
chenyuntc 已提交
187
    anchor = anchor.reshape((K * A, 4)).astype(np.float32)
C
backup  
chenyuntc 已提交
188 189 190 191 192 193 194 195 196
    return anchor


def normal_init(m, mean, stddev, truncated=False):
    """
    weight initalizer: truncated normal and random normal.
    """
    # x is a parameter
    if truncated:
C
chenyuntc 已提交
197
        m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
C
backup  
chenyuntc 已提交
198 199
    else:
        m.weight.data.normal_(mean, stddev)
C
chenyuntc 已提交
200
        m.bias.data.zero_()