bbox_head.py 9.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
#   
# Licensed under the Apache License, Version 2.0 (the "License");   
# you may not use this file except in compliance with the License.  
# You may obtain a copy of the License at   
#   
#     http://www.apache.org/licenses/LICENSE-2.0    
#   
# Unless required by applicable law or agreed to in writing, software   
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
# See the License for the specific language governing permissions and   
# limitations under the License.

15
import paddle
16 17
from paddle import ParamAttr
import paddle.nn as nn
18
import paddle.nn.functional as F
19 20 21 22 23
from paddle.nn import ReLU
from paddle.nn.initializer import Normal, XavierUniform
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling import ops
F
FDInSky 已提交
24

G
Guanghua Yu 已提交
25 26 27
from ..backbone.name_adapter import NameAdapter
from ..backbone.resnet import Blocks

F
FDInSky 已提交
28 29

@register
30
class TwoFCHead(nn.Layer):
31

32
    __shared__ = ['num_stages']
F
FDInSky 已提交
33

34 35 36 37 38 39 40
    def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1):
        super(TwoFCHead, self).__init__()
        self.in_dim = in_dim
        self.mlp_dim = mlp_dim
        self.num_stages = num_stages
        fan = in_dim * resolution * resolution
        self.fc6_list = []
41
        self.fc6_relu_list = []
42
        self.fc7_list = []
43
        self.fc7_relu_list = []
44 45 46 47 48
        for stage in range(num_stages):
            fc6_name = 'fc6_{}'.format(stage)
            fc7_name = 'fc7_{}'.format(stage)
            fc6 = self.add_sublayer(
                fc6_name,
49
                nn.Linear(
50 51
                    in_dim * resolution * resolution,
                    mlp_dim,
52 53
                    weight_attr=ParamAttr(
                        initializer=XavierUniform(fan_out=fan)),
54
                    bias_attr=ParamAttr(
55 56
                        learning_rate=2., regularizer=L2Decay(0.))))
            fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU())
57 58
            fc7 = self.add_sublayer(
                fc7_name,
59
                nn.Linear(
60 61
                    mlp_dim,
                    mlp_dim,
62
                    weight_attr=ParamAttr(initializer=XavierUniform()),
63
                    bias_attr=ParamAttr(
64 65
                        learning_rate=2., regularizer=L2Decay(0.))))
            fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU())
66
            self.fc6_list.append(fc6)
67
            self.fc6_relu_list.append(fc6_relu)
68
            self.fc7_list.append(fc7)
69
            self.fc7_relu_list.append(fc7_relu)
70 71

    def forward(self, rois_feat, stage=0):
72
        rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
73
        fc6 = self.fc6_list[stage](rois_feat)
74 75 76 77
        fc6_relu = self.fc6_relu_list[stage](fc6)
        fc7 = self.fc7_list[stage](fc6_relu)
        fc7_relu = self.fc7_relu_list[stage](fc7)
        return fc7_relu
78 79


G
Guanghua Yu 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
@register
class Res5Head(nn.Layer):
    def __init__(self, feat_in=1024, feat_out=512):
        super(Res5Head, self).__init__()
        na = NameAdapter(self)
        self.res5_conv = []
        self.res5 = self.add_sublayer(
            'res5_roi_feat',
            Blocks(
                feat_in, feat_out, count=3, name_adapter=na, stage_num=5))
        self.feat_out = feat_out * 4

    def forward(self, roi_feat, stage=0):
        y = self.res5(roi_feat)
        return y


97
@register
98
class BBoxFeat(nn.Layer):
99 100 101
    __inject__ = ['roi_extractor', 'head_feat']

    def __init__(self, roi_extractor, head_feat):
F
FDInSky 已提交
102 103
        super(BBoxFeat, self).__init__()
        self.roi_extractor = roi_extractor
104 105 106 107 108
        self.head_feat = head_feat

    def forward(self, body_feats, rois, spatial_scale, stage=0):
        rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
        bbox_feat = self.head_feat(rois_feat, stage)
G
Guanghua Yu 已提交
109
        return bbox_feat, self.head_feat
F
FDInSky 已提交
110 111 112


@register
113
class BBoxHead(nn.Layer):
114
    __shared__ = ['num_classes', 'num_stages']
115
    __inject__ = ['bbox_feat']
F
FDInSky 已提交
116 117

    def __init__(self,
118
                 bbox_feat,
119
                 in_feat=1024,
F
FDInSky 已提交
120
                 num_classes=81,
121 122
                 cls_agnostic=False,
                 num_stages=1,
123 124 125
                 with_pool=False,
                 score_stage=[0, 1, 2],
                 delta_stage=[2]):
F
FDInSky 已提交
126
        super(BBoxHead, self).__init__()
127
        self.num_classes = num_classes
128 129
        self.delta_dim = 2 if cls_agnostic else num_classes
        self.bbox_feat = bbox_feat
130
        self.num_stages = num_stages
131 132 133
        self.bbox_score_list = []
        self.bbox_delta_list = []
        self.with_pool = with_pool
134 135
        self.score_stage = score_stage
        self.delta_stage = delta_stage
136 137 138 139 140
        for stage in range(num_stages):
            score_name = 'bbox_score_{}'.format(stage)
            delta_name = 'bbox_delta_{}'.format(stage)
            bbox_score = self.add_sublayer(
                score_name,
141 142 143 144 145
                nn.Linear(
                    in_feat,
                    1 * self.num_classes,
                    weight_attr=ParamAttr(initializer=Normal(
                        mean=0.0, std=0.01)),
146
                    bias_attr=ParamAttr(
147
                        learning_rate=2., regularizer=L2Decay(0.))))
148 149 150

            bbox_delta = self.add_sublayer(
                delta_name,
151 152 153 154 155
                nn.Linear(
                    in_feat,
                    4 * self.delta_dim,
                    weight_attr=ParamAttr(initializer=Normal(
                        mean=0.0, std=0.001)),
156
                    bias_attr=ParamAttr(
157
                        learning_rate=2., regularizer=L2Decay(0.))))
158 159 160 161
            self.bbox_score_list.append(bbox_score)
            self.bbox_delta_list.append(bbox_delta)

    def forward(self, body_feats, rois, spatial_scale, stage=0):
G
Guanghua Yu 已提交
162 163
        bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois,
                                                   spatial_scale, stage)
164
        bbox_head_out = []
G
Guanghua Yu 已提交
165 166 167 168 169 170 171 172
        if self.with_pool:
            bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
            bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
            scores = self.bbox_score_list[stage](bbox_feat_)
            deltas = self.bbox_delta_list[stage](bbox_feat_)
        else:
            scores = self.bbox_score_list[stage](bbox_feat)
            deltas = self.bbox_delta_list[stage](bbox_feat)
173
        bbox_head_out.append((scores, deltas))
G
Guanghua Yu 已提交
174
        return bbox_feat, bbox_head_out, head_feat_func
175 176

    def _get_head_loss(self, score, delta, target):
F
FDInSky 已提交
177
        # bbox cls  
178
        labels_int64 = paddle.cast(x=target['labels_int32'], dtype='int64')
F
FDInSky 已提交
179
        labels_int64.stop_gradient = True
180
        loss_bbox_cls = F.softmax_with_cross_entropy(
181
            logits=score, label=labels_int64)
182
        loss_bbox_cls = paddle.mean(loss_bbox_cls)
F
FDInSky 已提交
183
        # bbox reg
184 185 186
        loss_bbox_reg = ops.smooth_l1(
            input=delta,
            label=target['bbox_targets'],
187 188
            inside_weight=target['bbox_inside_weights'],
            outside_weight=target['bbox_outside_weights'],
F
FDInSky 已提交
189
            sigma=1.0)
190
        loss_bbox_reg = paddle.mean(loss_bbox_reg)
F
FDInSky 已提交
191
        return loss_bbox_cls, loss_bbox_reg
192

K
Kaipeng Deng 已提交
193
    def get_loss(self, bbox_head_out, targets):
194 195 196 197 198 199 200 201 202 203
        loss_bbox = {}
        for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)):
            score, delta = bboxhead
            cls_name = 'loss_bbox_cls_{}'.format(lvl)
            reg_name = 'loss_bbox_reg_{}'.format(lvl)
            loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
                                                               target)
            loss_bbox[cls_name] = loss_bbox_cls
            loss_bbox[reg_name] = loss_bbox_reg
        return loss_bbox
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

    def get_prediction(self, bbox_head_out, rois):
        if len(bbox_head_out) == 1:
            proposal, proposal_num = rois
            score, delta = bbox_head_out[0]
            bbox_prob = F.softmax(score)
            delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
        else:
            num_stage = len(rois)
            proposal_list = []
            prob_list = []
            delta_list = []
            for stage, (proposals, bboxhead) in zip(rois, bboxheads):
                score, delta = bboxhead
                proposal, proposal_num = proposals
                if stage in self.score_stage:
                    bbox_prob = F.softmax(score)
                    prob_list.append(bbox_prob)
                if stage in self.delta_stage:
                    proposal_list.append(proposal)
                    delta_list.append(delta)
            bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0)
            delta = paddle.mean(paddle.stack(delta_list), axis=0)
            proposal = paddle.mean(paddle.stack(proposal_list), axis=0)
            delta = paddle.reshape(delta, (-1, self.out_dim, 4))
            if self.cls_agnostic:
                N, C, M = delta.shape
                delta = delta[:, 1:2, :]
                delta = paddle.expand(delta, [N, self.num_classes, M])
        bboxes = (proposal, proposal_num)
        bbox_pred = (delta, bbox_prob)
        return bbox_pred, bboxes