diff --git a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.yml b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.yml new file mode 100644 index 0000000000000000000000000000000000000000..93c85862cab8ec8972af12535366b1521d95e987 --- /dev/null +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.yml @@ -0,0 +1,124 @@ +architecture: CascadeRCNN +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final +load_static_weights: True +roi_stages: 3 + +# Model Achitecture +CascadeRCNN: + # model anchor info flow + anchor: Anchor + proposal: Proposal + mask: Mask + # model feat info flow + backbone: ResNet + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + # post process + bbox_post_process: BBoxPostProcess + mask_post_process: MaskPostProcess + +ResNet: + # index 0 stands for res2 + depth: 50 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + +FPN: + in_channels: [256, 512, 1024, 2048] + out_channel: 256 + min_level: 0 + max_level: 4 + spatial_scale: [0.25, 0.125, 0.0625, 0.03125] + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 256 + feat_out: 256 + anchor_per_position: 3 + rpn_channel: 256 + +Anchor: + anchor_generator: + name: AnchorGeneratorRPN + aspect_ratios: [0.5, 1.0, 2.0] + anchor_start_size: 32 + stride: [4., 4.] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 2000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 1000 + infer_post_nms_top_n: 1000 + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: [0.5, 0.6, 0.7] + bg_thresh_lo: [0.0, 0.0, 0.0] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + is_cls_agnostic: true + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIAlign + resolution: 7 + sampling_ratio: 2 + head_feat: + name: TwoFCHead + in_dim: 256 + mlp_dim: 1024 + in_feat: 1024 + cls_agnostic: true + +BBoxPostProcess: + decode: + name: RCNNBox + num_classes: 81 + batch_size: 1 + nms: + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +Mask: + mask_target_generator: + name: MaskTargetGenerator + mask_resolution: 28 + +MaskHead: + mask_feat: + name: MaskFeat + num_convs: 4 + feat_in: 256 + feat_out: 256 + mask_roi_extractor: + name: RoIAlign + resolution: 14 + sampling_ratio: 2 + share_bbox_feat: False + feat_in: 256 + + +MaskPostProcess: + mask_resolution: 28 diff --git a/configs/_base_/models/cascade_rcnn_r50_fpn.yml b/configs/_base_/models/cascade_rcnn_r50_fpn.yml new file mode 100644 index 0000000000000000000000000000000000000000..81cdfb9216dbf54054ef77994d5c063fcc0056d9 --- /dev/null +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.yml @@ -0,0 +1,99 @@ +architecture: CascadeRCNN +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final +load_static_weights: True +roi_stages: 3 + +# Model Achitecture +CascadeRCNN: + # model anchor info flow + anchor: Anchor + proposal: Proposal + # model feat info flow + backbone: ResNet + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + # post process + bbox_post_process: BBoxPostProcess + +ResNet: + # index 0 stands for res2 + depth: 50 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + +FPN: + in_channels: [256, 512, 1024, 2048] + out_channel: 256 + min_level: 0 + max_level: 4 + spatial_scale: [0.25, 0.125, 0.0625, 0.03125] + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 256 + feat_out: 256 + anchor_per_position: 3 + rpn_channel: 256 + +Anchor: + anchor_generator: + name: AnchorGeneratorRPN + aspect_ratios: [0.5, 1.0, 2.0] + anchor_start_size: 32 + stride: [4., 4.] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 2000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 1000 + infer_post_nms_top_n: 1000 + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] + bg_thresh_hi: [0.5, 0.6, 0.7] + bg_thresh_lo: [0.0, 0.0, 0.0] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + is_cls_agnostic: true + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIAlign + resolution: 7 + sampling_ratio: 2 + head_feat: + name: TwoFCHead + in_dim: 256 + mlp_dim: 1024 + in_feat: 1024 + cls_agnostic: true + +BBoxPostProcess: + decode: + name: RCNNBox + num_classes: 81 + batch_size: 1 + nms: + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 diff --git a/configs/_base_/models/faster_rcnn_r50.yml b/configs/_base_/models/faster_rcnn_r50.yml index f2d501387113faae7487abbe98db61d4bfa1ee43..9d5b05e873f538f370bd3cf0e8c19064cfdb3ffb 100644 --- a/configs/_base_/models/faster_rcnn_r50.yml +++ b/configs/_base_/models/faster_rcnn_r50.yml @@ -59,7 +59,7 @@ Proposal: proposal_target_generator: name: ProposalTargetGenerator batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] bg_thresh_hi: [0.5,] bg_thresh_lo: [0.0,] fg_thresh: [0.5,] diff --git a/configs/_base_/models/faster_rcnn_r50_fpn.yml b/configs/_base_/models/faster_rcnn_r50_fpn.yml index b45dffdbf78567cd6c4ae909d4e02c4c19e3f3e7..f7f76221cd14a28c7865d5a8bb500867a838a555 100644 --- a/configs/_base_/models/faster_rcnn_r50_fpn.yml +++ b/configs/_base_/models/faster_rcnn_r50_fpn.yml @@ -66,7 +66,7 @@ Proposal: proposal_target_generator: name: ProposalTargetGenerator batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] bg_thresh_hi: [0.5,] bg_thresh_lo: [0.0,] fg_thresh: [0.5,] diff --git a/configs/_base_/models/mask_rcnn_r50.yml b/configs/_base_/models/mask_rcnn_r50.yml index 7013ecfdc2caba559dfa158a511757e8e22d203c..ffc8dab00351b457fc5c607c45207f8d7762a4b0 100644 --- a/configs/_base_/models/mask_rcnn_r50.yml +++ b/configs/_base_/models/mask_rcnn_r50.yml @@ -60,7 +60,7 @@ Proposal: proposal_target_generator: name: ProposalTargetGenerator batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] bg_thresh_hi: [0.5,] bg_thresh_lo: [0.0,] fg_thresh: [0.5,] diff --git a/configs/_base_/models/mask_rcnn_r50_fpn.yml b/configs/_base_/models/mask_rcnn_r50_fpn.yml index 35001d6ec84793fe096f3def784782cfde3f4eaa..e0464497df1c81dab95e0a5dc3456eb000f74b04 100644 --- a/configs/_base_/models/mask_rcnn_r50_fpn.yml +++ b/configs/_base_/models/mask_rcnn_r50_fpn.yml @@ -68,7 +68,7 @@ Proposal: proposal_target_generator: name: ProposalTargetGenerator batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] bg_thresh_hi: [0.5,] bg_thresh_lo: [0.0,] fg_thresh: [0.5,] diff --git a/configs/_base_/readers/faster_fpn_reader.yml b/configs/_base_/readers/faster_fpn_reader.yml index e205e80d850604a1c71a5e87d2efce22878e8849..63167836e84b1c722f88a4967d370df4757c5b3c 100644 --- a/configs/_base_/readers/faster_fpn_reader.yml +++ b/configs/_base_/readers/faster_fpn_reader.yml @@ -4,7 +4,6 @@ TrainReader: fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] sample_transforms: - DecodeImage: {to_rgb: true} - # check - RandomFlipImage: {prob: 0.5} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} diff --git a/configs/_base_/runtime.yml b/configs/_base_/runtime.yml index cfc9e5b17592a33941acd3e72fc70361ee590c70..deac1cb05f9a8b3fd07b370665b8b64f60394016 100644 --- a/configs/_base_/runtime.yml +++ b/configs/_base_/runtime.yml @@ -1,4 +1,4 @@ use_gpu: true log_iter: 20 save_dir: output -snapshot_epoch: 2 +snapshot_epoch: 1 diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..5995d04b501d1f9ceb8d488799c2a0bf75c18272 --- /dev/null +++ b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml @@ -0,0 +1,7 @@ +_BASE_: [ + './_base_/models/cascade_mask_rcnn_r50_fpn.yml', + './_base_/optimizers/rcnn_1x.yml', + './_base_/datasets/coco.yml', + './_base_/readers/mask_fpn_reader.yml', + './_base_/runtime.yml', +] diff --git a/configs/cascade_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_rcnn_r50_fpn_1x_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..adcd2230e5d1b9e5d575d51ece8e7a3204af9d74 --- /dev/null +++ b/configs/cascade_rcnn_r50_fpn_1x_coco.yml @@ -0,0 +1,7 @@ +_BASE_: [ + './_base_/models/cascade_rcnn_r50_fpn.yml', + './_base_/optimizers/rcnn_1x.yml', + './_base_/datasets/coco.yml', + './_base_/readers/faster_fpn_reader.yml', + './_base_/runtime.yml', +] diff --git a/ppdet/modeling/architecture/cascade_rcnn.py b/ppdet/modeling/architecture/cascade_rcnn.py index 2f76616f5c0ae709eb5201676c72fcf3ddb4f773..61b389b02142874222abbee191675582912ecc7b 100644 --- a/ppdet/modeling/architecture/cascade_rcnn.py +++ b/ppdet/modeling/architecture/cascade_rcnn.py @@ -1,8 +1,22 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function -from paddle import fluid +import paddle from ppdet.core.workspace import register from .meta_arch import BaseArch @@ -12,115 +26,149 @@ __all__ = ['CascadeRCNN'] @register class CascadeRCNN(BaseArch): __category__ = 'architecture' - __shared__ = ['num_stages'] + __shared__ = ['roi_stages'] __inject__ = [ 'anchor', 'proposal', 'mask', 'backbone', + 'neck', 'rpn_head', 'bbox_head', 'mask_head', + 'bbox_post_process', + 'mask_post_process', ] def __init__(self, anchor, proposal, - mask, backbone, rpn_head, bbox_head, - mask_head, - num_stages=3, - *args, - **kwargs): - super(CascadeRCNN, self).__init__(*args, **kwargs) + bbox_post_process, + neck=None, + mask=None, + mask_head=None, + mask_post_process=None, + roi_stages=3): + super(CascadeRCNN, self).__init__() self.anchor = anchor self.proposal = proposal - self.mask = mask self.backbone = backbone self.rpn_head = rpn_head self.bbox_head = bbox_head + self.bbox_post_process = bbox_post_process + self.neck = neck + self.mask = mask self.mask_head = mask_head - self.num_stages = num_stages + self.mask_post_process = mask_post_process + self.roi_stages = roi_stages + self.with_mask = mask is not None def model_arch(self, ): # Backbone - bb_out = self.backbone(self.gbd) - self.gbd.update(bb_out) + body_feats = self.backbone(self.inputs) + + # Neck + if self.neck is not None: + body_feats, spatial_scale = self.neck(body_feats) # RPN - rpn_head_out = self.rpn_head(self.gbd) - self.gbd.update(rpn_head_out) + # rpn_head returns two list: rpn_feat, rpn_head_out + # each element in rpn_feats contains rpn feature on each level, + # and the length is 1 when the neck is not applied. + # each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta) + rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats) # Anchor - anchor_out = self.anchor(self.gbd) - self.gbd.update(anchor_out) - - self.gbd['stage'] = 0 - for i in range(self.num_stages): - self.gbd.update_v('stage', i) + # anchor_out returns a list, + # each element contains (anchor, anchor_var) + self.anchor_out = self.anchor(rpn_feat) + + # Proposal RoI + # compute targets here when training + rois = None + bbox_head_out = None + max_overlap = None + self.bbox_head_list = [] + rois_list = [] + for i in range(self.roi_stages): # Proposal BBox - proposal_out = self.proposal(self.gbd) - self.gbd.update({"proposal_" + str(i): proposal_out}) - + rois = self.proposal( + self.inputs, + self.rpn_head_out, + self.anchor_out, + i, + rois, + bbox_head_out, + max_overlap=max_overlap) + rois_list.append(rois) + max_overlap = self.proposal.get_max_overlap() # BBox Head - bbox_head_out = self.bbox_head(self.gbd) - self.gbd.update({'bbox_head_' + str(i): bbox_head_out}) - - refine_bbox_out = self.proposal.refine_bbox(self.gbd) - self.gbd['proposal_' + str(i)].update(refine_bbox_out) - - if self.gbd['mode'] == 'infer': - bbox_out = self.proposal.post_process(self.gbd) - self.gbd.update(bbox_out) - - # Mask - mask_out = self.mask(self.gbd) - self.gbd.update(mask_out) - - # Mask Head - mask_head_out = self.mask_head(self.gbd) - self.gbd.update(mask_head_out) - - if self.gbd['mode'] == 'infer': - mask_out = self.mask.post_process(self.gbd) - self.gbd.update(mask_out) + bbox_feat, bbox_head_out, _ = self.bbox_head(body_feats, rois, + spatial_scale, i) + self.bbox_head_list.append(bbox_head_out) + + if self.inputs['mode'] == 'infer': + bbox_pred, bboxes = self.bbox_head.get_cascade_prediction( + self.bbox_head_list, rois_list) + self.bboxes = self.bbox_post_process( + bbox_pred, + bboxes, + self.inputs['im_shape'], + self.inputs['scale_factor'], + var_weight=3.) + + if self.with_mask: + rois = rois_list[-1] + rois_has_mask_int32 = None + if self.inputs['mode'] == 'train': + bbox_targets = self.proposal.get_targets()[-1] + self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois, + bbox_targets) + # Mask Head + self.mask_head_out = self.mask_head( + self.inputs, body_feats, self.bboxes, bbox_feat, + rois_has_mask_int32, spatial_scale) def get_loss(self, ): - outs = {} - losses = [] - - rpn_cls_loss, rpn_reg_loss = self.rpn_head.get_loss(self.gbd) - outs['loss_rpn_cls'] = rpn_cls_loss - outs['loss_rpn_reg'] = rpn_reg_loss - losses.extend([rpn_cls_loss, rpn_reg_loss]) - - bbox_cls_loss_list = [] - bbox_reg_loss_list = [] - for i in range(self.num_stages): - self.gbd.update_v('stage', i) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.get_loss(self.gbd) - bbox_cls_loss_list.append(bbox_cls_loss) - bbox_reg_loss_list.append(bbox_reg_loss) - outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss - outs['loss_bbox_reg_' + str(i)] = bbox_reg_loss - losses.extend(bbox_cls_loss_list) - losses.extend(bbox_reg_loss_list) - - mask_loss = self.mask_head.get_loss(self.gbd) - outs['mask_loss'] = mask_loss - losses.append(mask_loss) - - loss = fluid.layers.sum(losses) - outs['loss'] = loss - return outs - - def get_pred(self, ): - outs = { - 'bbox': self.gbd['predicted_bbox'].numpy(), - 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), - 'mask': self.gbd['predicted_mask'].numpy(), - 'im_id': self.gbd['im_id'].numpy(), + loss = {} + + # RPN loss + rpn_loss_inputs = self.anchor.generate_loss_inputs( + self.inputs, self.rpn_head_out, self.anchor_out) + loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs) + loss.update(loss_rpn) + + # BBox loss + bbox_targets_list = self.proposal.get_targets() + loss_bbox = self.bbox_head.get_loss(self.bbox_head_list, + bbox_targets_list) + loss.update(loss_bbox) + + if self.with_mask: + # Mask loss + mask_targets = self.mask.get_targets() + loss_mask = self.mask_head.get_loss(self.mask_head_out, + mask_targets) + loss.update(loss_mask) + + total_loss = paddle.add_n(list(loss.values())) + loss.update({'loss': total_loss}) + return loss + + def get_pred(self, return_numpy=True): + bbox, bbox_num = self.bboxes + output = { + 'bbox': bbox.numpy(), + 'bbox_num': bbox_num.numpy(), + 'im_id': self.inputs['im_id'].numpy(), } - return inputs + + if self.with_mask: + mask = self.mask_post_process(self.bboxes, self.mask_head_out, + self.inputs['im_shape'], + self.inputs['scale_factor']) + output.update(mask) + return output diff --git a/ppdet/modeling/architecture/faster_rcnn.py b/ppdet/modeling/architecture/faster_rcnn.py index 76609807ccdce0b08e5e50a73df8d78e77a32c03..956b2bd8bf0443f733cbf19e34447729588a2986 100644 --- a/ppdet/modeling/architecture/faster_rcnn.py +++ b/ppdet/modeling/architecture/faster_rcnn.py @@ -86,7 +86,7 @@ class FasterRCNN(BaseArch): # BBox loss bbox_targets = self.proposal.get_targets() - loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets) + loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets) loss.update(loss_bbox) total_loss = paddle.add_n(list(loss.values())) loss.update({'loss': total_loss}) diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index 34343a7c096a749d3715e132e37b3e70c0d8ff91..c073499e6333360b74a272398565d52ee0b47312 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -121,7 +121,7 @@ class MaskRCNN(BaseArch): # BBox loss bbox_targets = self.proposal.get_targets() - loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets) + loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets) loss.update(loss_bbox) # Mask loss diff --git a/ppdet/modeling/backbone/resnet.py b/ppdet/modeling/backbone/resnet.py index 27a5dcccae7f3de0666293a23927d8560ce0e99e..6de676808ceae8ca017b3831acc0daa56bafe588 100755 --- a/ppdet/modeling/backbone/resnet.py +++ b/ppdet/modeling/backbone/resnet.py @@ -166,16 +166,16 @@ class BottleNeck(nn.Layer): name=conv_name3) def forward(self, inputs): - if self.shortcut: - short = inputs - else: - short = self.short(inputs) out = self.branch2a(inputs) out = self.branch2b(out) out = self.branch2c(out) + if self.shortcut: + short = inputs + else: + short = self.short(inputs) - out = paddle.add(x=short, y=out) + out = paddle.add(x=out, y=short) out = F.relu(out) return out diff --git a/ppdet/modeling/bbox.py b/ppdet/modeling/bbox.py index 5e1f88644a27bd7e67e70f530304d1e4c527ed5c..771f59c7d583b330f05bd58a3711c218e5fabb33 100644 --- a/ppdet/modeling/bbox.py +++ b/ppdet/modeling/bbox.py @@ -127,7 +127,12 @@ class Proposal(object): rois_num_per_level=rpn_rois_num_list) return rois_collect, rois_num_collect - def generate_proposal_target(self, inputs, rois, rois_num, stage=0): + def generate_proposal_target(self, + inputs, + rois, + rois_num, + stage=0, + max_overlap=None): outs = self.proposal_target_generator( rpn_rois=rois, rpn_rois_num=rois_num, @@ -135,32 +140,36 @@ class Proposal(object): is_crowd=inputs['is_crowd'], gt_boxes=inputs['gt_bbox'], im_info=inputs['im_info'], - stage=stage) + stage=stage, + max_overlap=max_overlap) rois = outs[0] - rois_num = outs[-1] + max_overlap = outs[-1] + rois_num = outs[-2] targets = { 'labels_int32': outs[1], 'bbox_targets': outs[2], 'bbox_inside_weights': outs[3], 'bbox_outside_weights': outs[4] } - return rois, rois_num, targets + return rois, rois_num, targets, max_overlap - def refine_bbox(self, rois, bbox_delta, stage=0): - out_dim = bbox_delta.shape[1] / 4 - bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4)) - bbox_delta_s = fluid.layers.slice( + def refine_bbox(self, roi, bbox_delta, stage=1): + out_dim = bbox_delta.shape[1] // 4 + bbox_delta_r = paddle.reshape(bbox_delta, (-1, out_dim, 4)) + bbox_delta_s = paddle.slice( bbox_delta_r, axes=[1], starts=[1], ends=[2]) + reg_weights = [ + i / stage for i in self.proposal_target_generator.bbox_reg_weights + ] refined_bbox = ops.box_coder( - prior_box=rois, - prior_box_var=self.proposal_target_generator.bbox_reg_weights[ - stage], + prior_box=roi, + prior_box_var=reg_weights, target_box=bbox_delta_s, code_type='decode_center_size', box_normalized=False, axis=1) - refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) + refined_bbox = paddle.reshape(refined_bbox, shape=[-1, 4]) return refined_bbox def __call__(self, @@ -169,30 +178,26 @@ class Proposal(object): anchor_out, stage=0, proposal_out=None, - bbox_head_outs=None, - refined=False): - if refined: - assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None." - return proposal_out + bbox_head_out=None, + max_overlap=None): if stage == 0: roi, rois_num = self.generate_proposal(inputs, rpn_head_out, anchor_out) - self.proposals_list = [] self.targets_list = [] + self.max_overlap = None else: - bbox_delta = bbox_head_outs[stage][0] - roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1) + bbox_delta = bbox_head_out[1] + roi = self.refine_bbox(proposal_out[0], bbox_delta, stage) rois_num = proposal_out[1] if inputs['mode'] == 'train': - roi, rois_num, targets = self.generate_proposal_target( - inputs, roi, rois_num, stage) + roi, rois_num, targets, self.max_overlap = self.generate_proposal_target( + inputs, roi, rois_num, stage, self.max_overlap) self.targets_list.append(targets) - self.proposals_list.append((roi, rois_num)) return roi, rois_num def get_targets(self): return self.targets_list - def get_proposals(self): - return self.proposals_list + def get_max_overlap(self): + return self.max_overlap diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py index dbccd4c58b9f371df0f5187c91b66d5b1e6d38f4..1d5c4b7b866487b167f0045e88e05a3502b6b984 100644 --- a/ppdet/modeling/head/bbox_head.py +++ b/ppdet/modeling/head/bbox_head.py @@ -29,39 +29,42 @@ from ..backbone.resnet import Blocks @register class TwoFCHead(nn.Layer): - __shared__ = ['num_stages'] + __shared__ = ['roi_stages'] - def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1): + def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, roi_stages=1): super(TwoFCHead, self).__init__() self.in_dim = in_dim self.mlp_dim = mlp_dim - self.num_stages = num_stages + self.roi_stages = roi_stages fan = in_dim * resolution * resolution self.fc6_list = [] self.fc6_relu_list = [] self.fc7_list = [] self.fc7_relu_list = [] - for stage in range(num_stages): + for stage in range(roi_stages): fc6_name = 'fc6_{}'.format(stage) fc7_name = 'fc7_{}'.format(stage) + lr_factor = 2**stage fc6 = self.add_sublayer( fc6_name, nn.Linear( in_dim * resolution * resolution, mlp_dim, weight_attr=ParamAttr( + learning_rate=lr_factor, initializer=XavierUniform(fan_out=fan)), bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU()) fc7 = self.add_sublayer( fc7_name, nn.Linear( mlp_dim, mlp_dim, - weight_attr=ParamAttr(initializer=XavierUniform()), + weight_attr=ParamAttr( + learning_rate=lr_factor, initializer=XavierUniform()), bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU()) self.fc6_list.append(fc6) self.fc6_relu_list.append(fc6_relu) @@ -102,16 +105,17 @@ class BBoxFeat(nn.Layer): super(BBoxFeat, self).__init__() self.roi_extractor = roi_extractor self.head_feat = head_feat + self.rois_feat_list = [] def forward(self, body_feats, rois, spatial_scale, stage=0): rois_feat = self.roi_extractor(body_feats, rois, spatial_scale) bbox_feat = self.head_feat(rois_feat, stage) - return bbox_feat, self.head_feat + return rois_feat, bbox_feat @register class BBoxHead(nn.Layer): - __shared__ = ['num_classes', 'num_stages'] + __shared__ = ['num_classes', 'roi_stages'] __inject__ = ['bbox_feat'] def __init__(self, @@ -119,49 +123,65 @@ class BBoxHead(nn.Layer): in_feat=1024, num_classes=81, cls_agnostic=False, - num_stages=1, + roi_stages=1, with_pool=False, score_stage=[0, 1, 2], delta_stage=[2]): super(BBoxHead, self).__init__() self.num_classes = num_classes + self.cls_agnostic = cls_agnostic self.delta_dim = 2 if cls_agnostic else num_classes self.bbox_feat = bbox_feat - self.num_stages = num_stages + self.roi_stages = roi_stages self.bbox_score_list = [] self.bbox_delta_list = [] + self.roi_feat_list = [[] for i in range(roi_stages)] self.with_pool = with_pool self.score_stage = score_stage self.delta_stage = delta_stage - for stage in range(num_stages): + for stage in range(roi_stages): score_name = 'bbox_score_{}'.format(stage) delta_name = 'bbox_delta_{}'.format(stage) + lr_factor = 2**stage bbox_score = self.add_sublayer( score_name, nn.Linear( in_feat, 1 * self.num_classes, - weight_attr=ParamAttr(initializer=Normal( - mean=0.0, std=0.01)), + weight_attr=ParamAttr( + learning_rate=lr_factor, + initializer=Normal( + mean=0.0, std=0.01)), bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) bbox_delta = self.add_sublayer( delta_name, nn.Linear( in_feat, 4 * self.delta_dim, - weight_attr=ParamAttr(initializer=Normal( - mean=0.0, std=0.001)), + weight_attr=ParamAttr( + learning_rate=lr_factor, + initializer=Normal( + mean=0.0, std=0.001)), bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) self.bbox_score_list.append(bbox_score) self.bbox_delta_list.append(bbox_delta) - def forward(self, body_feats, rois, spatial_scale, stage=0): - bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois, - spatial_scale, stage) - bbox_head_out = [] + def forward(self, + body_feats=None, + rois=None, + spatial_scale=None, + stage=0, + roi_stage=-1): + if rois is not None: + rois_feat, bbox_feat = self.bbox_feat(body_feats, rois, + spatial_scale, stage) + self.roi_feat_list[stage] = rois_feat + else: + rois_feat = self.roi_feat_list[roi_stage] + bbox_feat = self.bbox_feat.head_feat(rois_feat, stage) if self.with_pool: bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1) bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3]) @@ -170,8 +190,8 @@ class BBoxHead(nn.Layer): else: scores = self.bbox_score_list[stage](bbox_feat) deltas = self.bbox_delta_list[stage](bbox_feat) - bbox_head_out.append((scores, deltas)) - return bbox_feat, bbox_head_out, head_feat_func + bbox_head_out = (scores, deltas) + return bbox_feat, bbox_head_out, self.bbox_feat.head_feat def _get_head_loss(self, score, delta, target): # bbox cls @@ -198,38 +218,46 @@ class BBoxHead(nn.Layer): reg_name = 'loss_bbox_reg_{}'.format(lvl) loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta, target) - loss_bbox[cls_name] = loss_bbox_cls - loss_bbox[reg_name] = loss_bbox_reg + loss_weight = 1. / 2**lvl + loss_bbox[cls_name] = loss_bbox_cls * loss_weight + loss_bbox[reg_name] = loss_bbox_reg * loss_weight return loss_bbox def get_prediction(self, bbox_head_out, rois): - if len(bbox_head_out) == 1: - proposal, proposal_num = rois - score, delta = bbox_head_out[0] - bbox_prob = F.softmax(score) - delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) - else: - num_stage = len(rois) - proposal_list = [] - prob_list = [] - delta_list = [] - for stage, (proposals, bboxhead) in zip(rois, bboxheads): - score, delta = bboxhead - proposal, proposal_num = proposals - if stage in self.score_stage: - bbox_prob = F.softmax(score) - prob_list.append(bbox_prob) - if stage in self.delta_stage: - proposal_list.append(proposal) - delta_list.append(delta) - bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) - delta = paddle.mean(paddle.stack(delta_list), axis=0) - proposal = paddle.mean(paddle.stack(proposal_list), axis=0) - delta = paddle.reshape(delta, (-1, self.out_dim, 4)) - if self.cls_agnostic: - N, C, M = delta.shape - delta = delta[:, 1:2, :] - delta = paddle.expand(delta, [N, self.num_classes, M]) + proposal, proposal_num = rois + score, delta = bbox_head_out + bbox_prob = F.softmax(score) + delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) + bbox_pred = (delta, bbox_prob) + return bbox_pred, rois + + def get_cascade_prediction(self, bbox_head_out, rois): + proposal_list = [] + prob_list = [] + delta_list = [] + for stage in range(len(rois)): + proposals = rois[stage] + bboxhead = bbox_head_out[stage] + score, delta = bboxhead + proposal, proposal_num = proposals + if stage in self.score_stage: + if stage < 2: + _, head_out, _ = self(stage=stage, roi_stage=-1) + score = head_out[0] + + bbox_prob = F.softmax(score) + prob_list.append(bbox_prob) + if stage in self.delta_stage: + proposal_list.append(proposal) + delta_list.append(delta) + bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) + delta = paddle.mean(paddle.stack(delta_list), axis=0) + proposal = paddle.mean(paddle.stack(proposal_list), axis=0) + delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) + if self.cls_agnostic: + N, C, M = delta.shape + delta = delta[:, 1:2, :] + delta = paddle.expand(delta, [N, self.num_classes, M]) bboxes = (proposal, proposal_num) bbox_pred = (delta, bbox_prob) return bbox_pred, bboxes diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index 656f52ceba39637814335ebd797ab5c97dea737f..929e2c87f051e0d9b9630df53e48cb1b073c5ff4 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -158,6 +158,7 @@ class MaskHead(Layer): stage=0, bbox_head_feat_func=None): bbox, bbox_num = bboxes + if bbox.shape[0] == 0: mask_head_out = bbox else: diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py index ac52db5b7f30d678d970b7685a07669ee76dccb2..96b2d197563fd2f9cdda2d905558c1a11686d255 100644 --- a/ppdet/modeling/layers.py +++ b/ppdet/modeling/layers.py @@ -176,11 +176,10 @@ class ProposalTargetGenerator(object): fg_thresh=[.5, ], bg_thresh_hi=[.5, ], bg_thresh_lo=[0., ], - bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]], + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], num_classes=81, use_random=True, - is_cls_agnostic=False, - is_cascade_rcnn=False): + is_cls_agnostic=False): super(ProposalTargetGenerator, self).__init__() self.batch_size_per_im = batch_size_per_im self.fg_fraction = fg_fraction @@ -191,7 +190,6 @@ class ProposalTargetGenerator(object): self.num_classes = num_classes self.use_random = use_random self.is_cls_agnostic = is_cls_agnostic - self.is_cascade_rcnn = is_cascade_rcnn def __call__(self, rpn_rois, @@ -200,19 +198,25 @@ class ProposalTargetGenerator(object): is_crowd, gt_boxes, im_info, - stage=0): + stage=0, + max_overlap=None): rpn_rois = rpn_rois.numpy() rpn_rois_num = rpn_rois_num.numpy() gt_classes = gt_classes.numpy() gt_boxes = gt_boxes.numpy() is_crowd = is_crowd.numpy() im_info = im_info.numpy() + max_overlap = max_overlap if max_overlap is None else max_overlap.numpy( + ) + reg_weights = [i / (stage + 1) for i in self.bbox_reg_weights] + is_cascade = True if stage > 0 else False + num_classes = 2 if is_cascade else self.num_classes outs = generate_proposal_target( rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info, self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], - self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], - self.bbox_reg_weights[stage], self.num_classes, self.use_random, - self.is_cls_agnostic, self.is_cascade_rcnn) + self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], reg_weights, + num_classes, self.use_random, self.is_cls_agnostic, is_cascade, + max_overlap) outs = [to_tensor(v) for v in outs] for v in outs: v.stop_gradient = True @@ -268,7 +272,12 @@ class RCNNBox(object): self.box_normalized = box_normalized self.axis = axis - def __call__(self, bbox_head_out, rois, im_shape, scale_factor): + def __call__(self, + bbox_head_out, + rois, + im_shape, + scale_factor, + var_weight=1.): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois origin_shape = im_shape / scale_factor @@ -287,9 +296,10 @@ class RCNNBox(object): origin_shape = paddle.concat(origin_shape_list) bbox = roi / scale + prior_box_var = [i / var_weight for i in self.prior_box_var] bbox = ops.box_coder( prior_box=bbox, - prior_box_var=self.prior_box_var, + prior_box_var=prior_box_var, target_box=bbox_pred, code_type=self.code_type, box_normalized=self.box_normalized, diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py index 0b7eca1af744ad16e052da2d61f2f984ecf383c7..f88e6c66ba79df1996d566264358877764eab7ea 100644 --- a/ppdet/modeling/post_process.py +++ b/ppdet/modeling/post_process.py @@ -16,8 +16,14 @@ class BBoxPostProcess(object): self.decode = decode self.nms = nms - def __call__(self, head_out, rois, im_shape, scale_factor=None): - bboxes, score = self.decode(head_out, rois, im_shape, scale_factor) + def __call__(self, + head_out, + rois, + im_shape, + scale_factor=None, + var_weight=1.): + bboxes, score = self.decode(head_out, rois, im_shape, scale_factor, + var_weight) bbox_pred, bbox_num, _ = self.nms(bboxes, score) return bbox_pred, bbox_num diff --git a/ppdet/py_op/bbox.py b/ppdet/py_op/bbox.py index 33f0c8837658d73609a6e9d6be37de2eae399b51..dec8819169df8bfbc5561d960d5660e096e08795 100755 --- a/ppdet/py_op/bbox.py +++ b/ppdet/py_op/bbox.py @@ -72,7 +72,7 @@ def expand_bbox(bboxes, scale): w_half *= scale h_half *= scale - bboxes_exp = np.zeros(bboxes.shape) + bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32) bboxes_exp[:, 0] = x_c - w_half bboxes_exp[:, 2] = x_c + w_half bboxes_exp[:, 1] = y_c - h_half @@ -107,18 +107,20 @@ def bbox_overlaps(bboxes1, bboxes2): area1 = w1 * h1 area2 = w2 * h2 - overlaps = np.zeros((bboxes1.shape[0], bboxes2.shape[0])) - for ind1 in range(bboxes1.shape[0]): - for ind2 in range(bboxes2.shape[0]): - inter_x1 = np.maximum(bboxes1[ind1, 0], bboxes2[ind2, 0]) - inter_y1 = np.maximum(bboxes1[ind1, 1], bboxes2[ind2, 1]) - inter_x2 = np.minimum(bboxes1[ind1, 2], bboxes2[ind2, 2]) - inter_y2 = np.minimum(bboxes1[ind1, 3], bboxes2[ind2, 3]) - inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) - inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) - inter_area = inter_w * inter_h - iou = inter_area * 1.0 / (area1[ind1] + area2[ind2] - inter_area) - overlaps[ind1, ind2] = iou + boxes1_x1, boxes1_y1, boxes1_x2, boxes1_y2 = np.split(bboxes1, 4, axis=1) + boxes2_x1, boxes2_y1, boxes2_x2, boxes2_y2 = np.split(bboxes2, 4, axis=1) + + all_pairs_min_ymax = np.minimum(boxes1_y2, np.transpose(boxes2_y2)) + all_pairs_max_ymin = np.maximum(boxes1_y1, np.transpose(boxes2_y1)) + inter_h = np.maximum(all_pairs_min_ymax - all_pairs_max_ymin + 1, 0.) + all_pairs_min_xmax = np.minimum(boxes1_x2, np.transpose(boxes2_x2)) + all_pairs_max_xmin = np.maximum(boxes1_x1, np.transpose(boxes2_x1)) + inter_w = np.maximum(all_pairs_min_xmax - all_pairs_max_xmin + 1, 0.) + + inter_area = inter_w * inter_h + + union_area = np.expand_dims(area1, 1) + np.expand_dims(area2, 0) + overlaps = inter_area / (union_area - inter_area) return overlaps diff --git a/ppdet/py_op/target.py b/ppdet/py_op/target.py index 27f280902dcdf28321b612ba523a417cd82726ef..6278adf283ffc4c6a65690b6f4d420b7ab4a2913 100755 --- a/ppdet/py_op/target.py +++ b/ppdet/py_op/target.py @@ -90,7 +90,6 @@ def generate_rpn_anchor_target(anchors, @jit def label_anchor(anchors, gt_boxes): iou = bbox_overlaps(anchors, gt_boxes) - # every gt's anchor's index gt_bbox_anchor_inds = iou.argmax(axis=0) gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])] @@ -148,6 +147,16 @@ def sample_anchor(anchor_gt_bbox_iou, return fg_inds, bg_inds, fg_fake_inds, fake_num +@jit +def filter_roi(rois, max_overlap): + ws = rois[:, 2] - rois[:, 0] + 1 + hs = rois[:, 3] - rois[:, 1] + 1 + keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1))[0] + if len(keep) > 0: + return rois[keep, :] + return np.zeros((1, 4)).astype('float32') + + @jit def generate_proposal_target(rpn_rois, rpn_rois_num, @@ -164,42 +173,38 @@ def generate_proposal_target(rpn_rois, class_nums=81, use_random=True, is_cls_agnostic=False, - is_cascade_rcnn=False): + is_cascade_rcnn=False, + max_overlaps=None): rois = [] tgt_labels = [] tgt_deltas = [] rois_inside_weights = [] rois_outside_weights = [] + sampled_max_overlaps = [] new_rois_num = [] st_num = 0 end_num = 0 for im_i in range(len(rpn_rois_num)): length = rpn_rois_num[im_i] end_num += length - rpn_roi = rpn_rois[st_num:end_num] + max_overlap = max_overlaps[st_num:end_num] if is_cascade_rcnn else None im_scale = im_info[im_i][2] rpn_roi = rpn_roi / im_scale gt_bbox = gt_boxes[im_i] if is_cascade_rcnn: - rpn_roi = rpn_roi[gt_bbox.shape[0]:, :] - bbox = np.vstack([gt_bbox, rpn_roi]) + rpn_roi = filter_roi(rpn_roi, max_overlap) + bbox = np.vstack([gt_bbox, rpn_roi]).astype('float32') # Step1: label bbox - roi_gt_bbox_inds, roi_gt_bbox_iou, labels, = label_bbox( + roi_gt_bbox_inds, labels, max_overlap = label_bbox( bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i]) # Step2: sample bbox - if is_cascade_rcnn: - ws = bbox[:, 2] - bbox[:, 0] + 1 - hs = bbox[:, 3] - bbox[:, 1] + 1 - keep = np.where((ws > 0) & (hs > 0))[0] - bbox = bbox[keep] - fg_inds, bg_inds, fg_nums = sample_bbox( - roi_gt_bbox_iou, batch_size_per_im, fg_fraction, fg_thresh, + max_overlap, batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, use_random, is_cls_agnostic, is_cascade_rcnn) @@ -210,10 +215,12 @@ def generate_proposal_target(rpn_rois, sampled_labels[fg_nums:] = 0 sampled_boxes = bbox[sampled_inds] + sampled_max_overlap = max_overlap[sampled_inds] sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]] - sampled_gt_boxes[fg_nums:, :] = gt_bbox[0] + sampled_gt_boxes[fg_nums:, :] = 0 sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes, sampled_labels, bbox_reg_weights) + sampled_deltas[fg_nums:, :] = 0 sampled_deltas, bbox_inside_weights = expand_bbox_targets( sampled_deltas, class_nums, is_cls_agnostic) bbox_outside_weights = np.array( @@ -228,6 +235,7 @@ def generate_proposal_target(rpn_rois, tgt_deltas.append(sampled_deltas) rois_inside_weights.append(bbox_inside_weights) rois_outside_weights.append(bbox_outside_weights) + sampled_max_overlaps.append(sampled_max_overlap) rois = np.concatenate(rois, axis=0).astype(np.float32) tgt_labels = np.concatenate( @@ -237,23 +245,20 @@ def generate_proposal_target(rpn_rois, rois_inside_weights, axis=0).astype(np.float32) rois_outside_weights = np.concatenate( rois_outside_weights, axis=0).astype(np.float32) + sampled_max_overlaps = np.concatenate( + sampled_max_overlaps, axis=0).astype(np.float32) new_rois_num = np.asarray(new_rois_num, np.int32) - return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num + return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num, sampled_max_overlaps @jit -def label_bbox(boxes, - gt_boxes, - gt_classes, - is_crowd, - class_nums=81, - is_cascade_rcnn=False): +def label_bbox(boxes, gt_boxes, gt_classes, is_crowd, class_nums=81): iou = bbox_overlaps(boxes, gt_boxes) # every roi's gt box's index roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) - roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums)) + roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums), dtype=np.float32) iou_argmax = iou.argmax(axis=1) iou_max = iou.max(axis=1) @@ -267,13 +272,14 @@ def label_bbox(boxes, crowd_ind = np.where(is_crowd)[0] roi_gt_bbox_iou[crowd_ind] = -1 + max_overlap = roi_gt_bbox_iou.max(axis=1) labels = roi_gt_bbox_iou.argmax(axis=1) - return roi_gt_bbox_inds, roi_gt_bbox_iou, labels + return roi_gt_bbox_inds, labels, max_overlap @jit -def sample_bbox(roi_gt_bbox_iou, +def sample_bbox(max_overlap, batch_size_per_im, fg_fraction, fg_thresh, @@ -285,27 +291,26 @@ def sample_bbox(roi_gt_bbox_iou, is_cls_agnostic=False, is_cascade_rcnn=False): - roi_gt_bbox_iou_max = roi_gt_bbox_iou.max(axis=1) rois_per_image = int(batch_size_per_im) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) if is_cascade_rcnn: - fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0] - bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & ( - roi_gt_bbox_iou_max >= bg_thresh_lo))[0] + fg_inds = np.where(max_overlap >= fg_thresh)[0] + bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >= + bg_thresh_lo))[0] fg_nums = fg_inds.shape[0] bg_nums = bg_inds.shape[0] else: # sampe fg - fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0] + fg_inds = np.where(max_overlap >= fg_thresh)[0] fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0]) if (fg_inds.shape[0] > fg_nums) and use_random: fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False) fg_inds = fg_inds[:fg_nums] # sample bg - bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & ( - roi_gt_bbox_iou_max >= bg_thresh_lo))[0] + bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >= + bg_thresh_lo))[0] bg_nums = rois_per_image - fg_nums bg_nums = np.minimum(bg_nums, bg_inds.shape[0]) if (bg_inds.shape[0] > bg_nums) and use_random: diff --git a/tools/train.py b/tools/train.py index f9383dfa6e2857f42022ce2951c2e9ea310ae3fd..0cc3d249dec6b7c407f66dd5a48137614e70fa51 100755 --- a/tools/train.py +++ b/tools/train.py @@ -171,12 +171,7 @@ def run(FLAGS, cfg, place): # Model Backward loss = outputs['loss'] - if ParallelEnv().nranks > 1: - loss = model.scale_loss(loss) - loss.backward() - model.apply_collective_grads() - else: - loss.backward() + loss.backward() optimizer.step() curr_lr = optimizer.get_lr() lr.step()