faster_rcnn_vgg16.py 7.5 KB
Newer Older
C
backup  
chenyuntc 已提交
1
import torch as t
C
init  
chenyuntc 已提交
2
from torch import nn
C
chenyuntc 已提交
3
from torchvision.models import vgg16
C
chenyuntc 已提交
4 5 6 7 8
from model.region_proposal_network import RegionProposalNetwork
from model.faster_rcnn import FasterRCNN
from model.roi_module import RoIPooling2D
from utils import array_tool as at
from utils.config import opt
C
chenyuntc 已提交
9 10


C
chenyuntc 已提交
11
def decom_vgg16():
C
backup  
chenyuntc 已提交
12
    # the 30th layer of features is relu of conv5_3
C
chenyuntc 已提交
13 14 15 16 17 18 19
    if opt.caffe_pretrain:
        model = vgg16(pretrained=False)
        if not opt.load_path:
            model.load_state_dict(t.load(opt.caffe_pretrain_path))
    else:
        model = vgg16(not opt.load_path)

C
backup  
chenyuntc 已提交
20 21
    features = list(model.features)[:30]
    classifier = model.classifier
C
chenyuntc 已提交
22 23 24 25 26 27 28

    classifier = list(classifier)
    del classifier[6]
    if not opt.use_drop:
        del classifier[5]
        del classifier[2]
    classifier = nn.Sequential(*classifier)
C
backup  
chenyuntc 已提交
29

C
chenyuntc 已提交
30
    # freeze top4 conv
C
backup  
chenyuntc 已提交
31 32
    for layer in features[:10]:
        for p in layer.parameters():
C
chenyuntc 已提交
33 34 35
            p.requires_grad = False

    return nn.Sequential(*features), classifier
C
backup  
chenyuntc 已提交
36 37


C
chenyuntc 已提交
38
class FasterRCNNVGG16(FasterRCNN):
C
init  
chenyuntc 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
    """Faster R-CNN based on VGG-16.

    When you specify the path of a pre-trained chainer model serialized as
    a :obj:`.npz` file in the constructor, this chain model automatically
    initializes all the parameters with it.
    When a string in prespecified set is provided, a pretrained model is
    loaded from weights distributed on the Internet.
    The list of pretrained models supported are as follows:

    * :obj:`voc07`: Loads weights trained with the trainval split of \
        PASCAL VOC2007 Detection Dataset.
    * :obj:`imagenet`: Loads weights trained with ImageNet Classfication \
        task for the feature extractor and the head modules. \
        Weights that do not have a corresponding layer in VGG-16 \
        will be randomly initialized.

    For descriptions on the interface of this model, please refer to
    :class:`~chainercv.links.model.faster_rcnn.FasterRCNN`.

    :class:`~chainercv.links.model.faster_rcnn.FasterRCNNVGG16`
    supports finer control on random initializations of weights by arguments
    :obj:`vgg_initialW`, :obj:`rpn_initialW`, :obj:`loc_initialW` and
    :obj:`score_initialW`.
    It accepts a callable that takes an array and edits its values.
    If :obj:`None` is passed as an initializer, the default initializer is
    used.

    Args:
        n_fg_class (int): The number of classes excluding the background.
        pretrained_model (str): The destination of the pre-trained
            chainer model serialized as a :obj:`.npz` file.
            If this is one of the strings described
            above, it automatically loads weights stored under a directory
            :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/`,
            where :obj:`$CHAINER_DATASET_ROOT` is set as
            :obj:`$HOME/.chainer/dataset` unless you specify another value
            by modifying the environment variable.
        min_size (int): A preprocessing paramter for :meth:`prepare`.
        max_size (int): A preprocessing paramter for :meth:`prepare`.
        ratios (list of floats): This is ratios of width to height of
            the anchors.
        anchor_scales (list of numbers): This is areas of anchors.
            Those areas will be the product of the square of an element in
            :obj:`anchor_scales` and the original area of the reference
            window.
        vgg_initialW (callable): Initializer for the layers corresponding to
            the VGG-16 layers.
        rpn_initialW (callable): Initializer for Region Proposal Network
            layers.
        loc_initialW (callable): Initializer for the localization head.
        score_initialW (callable): Initializer for the score head.
        proposal_creator_params (dict): Key valued paramters for
            :class:`~chainercv.links.model.faster_rcnn.ProposalCreator`.

    """

C
chenyuntc 已提交
95
    feat_stride = 16  # downsample 16x for output of conv5 in vgg16
C
init  
chenyuntc 已提交
96 97

    def __init__(self,
C
backup  
chenyuntc 已提交
98
                 n_fg_class=20,
C
chenyuntc 已提交
99 100
                 ratios=[0.5, 1, 2],
                 anchor_scales=[8, 16, 32]
C
init  
chenyuntc 已提交
101
                 ):
102
                 
C
chenyuntc 已提交
103
        extractor, classifier = decom_vgg16()
C
init  
chenyuntc 已提交
104 105 106 107 108 109 110 111 112

        rpn = RegionProposalNetwork(
            512, 512,
            ratios=ratios,
            anchor_scales=anchor_scales,
            feat_stride=self.feat_stride,
        )

        head = VGG16RoIHead(
C
chenyuntc 已提交
113 114 115
            n_class=n_fg_class + 1,
            roi_size=7,
            spatial_scale=(1. / self.feat_stride),
C
backup  
chenyuntc 已提交
116
            classifier=classifier
C
init  
chenyuntc 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
        )

        super(FasterRCNNVGG16, self).__init__(
            extractor,
            rpn,
            head,
        )


class VGG16RoIHead(nn.Module):
    """Faster R-CNN Head for VGG-16 based implementation.
    This class is used as a head for Faster R-CNN.
    This outputs class-wise localizations and classification based on feature
    maps in the given RoIs.
    Args:
        n_class (int): The number of classes possibly including the background.
        roi_size (int): Height and width of the feature maps after RoI-pooling.
        spatial_scale (float): Scale of the roi is resized.
        vgg_initialW (callable): Initializer for the layers corresponding to
            the VGG-16 layers.
        loc_initialW (callable): Initializer for the localization head.
        score_initialW (callable): Initializer for the score head.

    """

    def __init__(self, n_class, roi_size, spatial_scale,
C
backup  
chenyuntc 已提交
143
                 classifier):
C
init  
chenyuntc 已提交
144 145 146
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

C
backup  
chenyuntc 已提交
147 148 149
        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)
C
init  
chenyuntc 已提交
150

C
chenyuntc 已提交
151 152
        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)
C
backup  
chenyuntc 已提交
153

C
init  
chenyuntc 已提交
154 155 156
        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
C
chenyuntc 已提交
157
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
C
init  
chenyuntc 已提交
158

C
backup  
chenyuntc 已提交
159
    def forward(self, x, rois, roi_indices):
C
init  
chenyuntc 已提交
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
        """Forward the chain.

        We assume that there are :math:`N` batches.

        Args:
            x (Variable): 4D image variable.
            rois (Tensor): A bounding box array containing coordinates of
                proposal boxes.  This is a concatenation of bounding box
                arrays from multiple images in the batch.
                Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
                RoIs from the :math:`i` th image,
                :math:`R' = \\sum _{i=1} ^ N R_i`.
            roi_indices (Tensor): An array containing indices of images to
                which bounding boxes correspond to. Its shape is :math:`(R',)`.

        """
C
chenyuntc 已提交
176
        # in case roi_indices is  ndarray
C
backup  
chenyuntc 已提交
177 178 179
        roi_indices = at.totensor(roi_indices).float()
        rois = at.totensor(rois).float()
        indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)
C
chenyuntc 已提交
180
        # NOTE: important: yx->xy
C
backup  
chenyuntc 已提交
181 182 183
        xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
        indices_and_rois = t.autograd.Variable(xy_indices_and_rois.contiguous())

C
backup  
chenyuntc 已提交
184
        pool = self.roi(x, indices_and_rois)
C
chenyuntc 已提交
185
        pool = pool.view(pool.size(0), -1)
C
backup  
chenyuntc 已提交
186
        fc7 = self.classifier(pool)
C
init  
chenyuntc 已提交
187 188
        roi_cls_locs = self.cls_loc(fc7)
        roi_scores = self.score(fc7)
C
backup  
chenyuntc 已提交
189 190
        return roi_cls_locs, roi_scores

C
chenyuntc 已提交
191

C
backup  
chenyuntc 已提交
192 193 194 195 196 197
def normal_init(m, mean, stddev, truncated=False):
    """
    weight initalizer: truncated normal and random normal.
    """
    # x is a parameter
    if truncated:
C
chenyuntc 已提交
198
        m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
C
backup  
chenyuntc 已提交
199 200
    else:
        m.weight.data.normal_(mean, stddev)
C
chenyuntc 已提交
201
        m.bias.data.zero_()