diff --git a/configs/cascade_rcnn_cls_aware_r101_vd_fpn_ms_test.yml b/configs/cascade_rcnn_cls_aware_r101_vd_fpn_ms_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..c03c5474d1388383a5958a9fa71895c8c73117f0 --- /dev/null +++ b/configs/cascade_rcnn_cls_aware_r101_vd_fpn_ms_test.yml @@ -0,0 +1,157 @@ +architecture: CascadeRCNNClsAware +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar +weights: output/cascade_rcnn_cls_aware_r101_vd_fpn_ms_test/model_final +metric: COCO +num_classes: 81 + +CascadeRCNNClsAware: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: bn + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: d + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 14 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + class_aware: True + +CascadeBBoxHead: + head: CascadeTwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +CascadeTwoFCHead: + mlp_dim: 1024 + +MultiScaleTEST: + score_thresh: 0.05 + nms_thresh: 0.5 + detections_per_im: 100 + enable_voting: true + vote_thresh: 0.9 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.0 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +EvalReader: + batch_size: 1 + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + multi_scale: true + num_scales: 18 + use_flip: true + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !MultiscaleTestResize + origin_target_size: 800 + origin_max_size: 1333 + target_size: + - 400 + - 500 + - 600 + - 700 + - 900 + - 1000 + - 1100 + - 1200 + max_size: 2000 + use_flip: true + - !Permute + channel_first: true + to_bgr: false + - !PadMultiScaleTest + pad_to_stride: 32 + worker_num: 2 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index 55f72bc58e0c4be0af2d3f6d78540a2b9bd2ff4f..5445e4f87625ce7636e2136fad6a5b626502052f 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -65,6 +65,7 @@ The backbone models pretrained on ImageNet are available. All backbone models ar | SENet154-vd-FPN | Faster | 1 | 1.44x | 3.408 | 42.9 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | | SENet154-vd-FPN | Mask | 1 | 1.44x | 3.233 | 44.0 | 38.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | | ResNet101-vd-FPN | CascadeClsAware Faster | 2 | 1x | - | 44.7(softnms) | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.tar) | +| ResNet101-vd-FPN | CascadeClsAware Faster | 2 | 1x | - | 46.5(multi-scale test) | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.tar) | ### Deformable ConvNets v2 diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 30fcb25bf5e7d2dae918481af7fdb0840addb06e..8ac0983e7e30537a84335a8f5abed29b550861ba 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -62,7 +62,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | SENet154-vd-FPN | Faster | 1 | 1.44x | 3.408 | 42.9 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | | SENet154-vd-FPN | Mask | 1 | 1.44x | 3.233 | 44.0 | 38.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | | ResNet101-vd-FPN | CascadeClsAware Faster | 2 | 1x | - | 44.7(softnms) | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.tar) | - +| ResNet101-vd-FPN | CascadeClsAware Faster | 2 | 1x | - | 46.5(multi-scale test) | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.tar) | ### Deformable 卷积网络v2 diff --git a/ppdet/modeling/architectures/cascade_rcnn.py b/ppdet/modeling/architectures/cascade_rcnn.py index b239e4d6615497c98aca00788154c0ee2920e2b2..60e15ad1c87c542227ddf265c8bb50ae42b3e3ee 100644 --- a/ppdet/modeling/architectures/cascade_rcnn.py +++ b/ppdet/modeling/architectures/cascade_rcnn.py @@ -187,7 +187,6 @@ class CascadeRCNN(object): # backbone body_feats = self.backbone(im) result.update(body_feats) - body_feat_names = list(body_feats.keys()) # FPN if self.fpn is not None: diff --git a/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py b/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py index 4c3955fd502fd9626da1a0f15b348a9b3f6c63fc..364007ba8ea7c737361a52e3b602473666101a09 100644 --- a/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py +++ b/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py @@ -23,8 +23,8 @@ from collections import OrderedDict import copy import paddle.fluid as fluid - from ppdet.core.workspace import register +from .input_helper import multiscale_def __all__ = ['CascadeRCNNClsAware'] @@ -170,6 +170,94 @@ class CascadeRCNNClsAware(object): self.cascade_decoded_box, self.cascade_bbox_reg_weights) return pred + def build_multi_scale(self, feed_vars): + required_fields = ['image', 'im_shape', 'im_info'] + self._input_check(required_fields, feed_vars) + + result = {} + im_shape = feed_vars['im_shape'] + result['im_shape'] = im_shape + + for i in range(len(self.im_info_names) // 2): + im = feed_vars[self.im_info_names[2 * i]] + im_info = feed_vars[self.im_info_names[2 * i + 1]] + + # backbone + body_feats = self.backbone(im) + result.update(body_feats) + # FPN + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + + # rpn proposals + rpn_rois = self.rpn_head.get_proposals( + body_feats, im_info, mode="test") + + proposal_list = [] + roi_feat_list = [] + rcnn_pred_list = [] + rcnn_target_list = [] + + bbox_pred = None + + self.cascade_var_v = [] + for stage in range(3): + var_v = np.array( + self.cascade_bbox_reg_weights[stage], dtype="float32") + prior_box_var = fluid.layers.create_tensor(dtype="float32") + fluid.layers.assign(input=var_v, output=prior_box_var) + self.cascade_var_v.append(prior_box_var) + + self.cascade_decoded_box = [] + self.cascade_cls_prob = [] + + for stage in range(3): + if stage > 0: + pool_rois = decoded_assign_box + else: + pool_rois = rpn_rois + + # extract roi features + roi_feat = self.roi_extractor(body_feats, pool_rois, + spatial_scale) + roi_feat_list.append(roi_feat) + + # bbox head + cls_score, bbox_pred = self.bbox_head.get_output( + roi_feat, + cls_agnostic_bbox_reg=self.bbox_head.num_classes, + wb_scalar=1.0 / self.cascade_rcnn_loss_weight[stage], + name='_' + str(stage + 1)) + + cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) + + decoded_box, decoded_assign_box = fluid.layers.box_decoder_and_assign( + pool_rois, self.cascade_var_v[stage], bbox_pred, cls_prob, + self.bbox_clip) + + self.cascade_cls_prob.append(cls_prob) + self.cascade_decoded_box.append(decoded_box) + + rcnn_pred_list.append((cls_score, bbox_pred)) + + pred = self.bbox_head.get_prediction_cls_aware( + im_info, + im_shape, + self.cascade_cls_prob, + self.cascade_decoded_box, + self.cascade_bbox_reg_weights, + return_box_score=True) + + bbox_name = 'bbox_' + str(i) + score_name = 'score_' + str(i) + if 'flip' in im.name: + bbox_name += '_flip' + score_name += '_flip' + result[bbox_name] = pred['bbox'] + result[score_name] = pred['score'] + + return result + def _inputs_def(self, image_shape): im_shape = [None] + image_shape # yapf: disable @@ -192,9 +280,20 @@ class CascadeRCNNClsAware(object): 'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask' ], + multi_scale=False, + num_scales=-1, + use_flip=None, use_dataloader=True, iterable=False): inputs_def = self._inputs_def(image_shape) + fields = copy.deepcopy(fields) + if multi_scale: + ms_def, ms_fields = multiscale_def(image_shape, num_scales, + use_flip) + inputs_def.update(ms_def) + fields += ms_fields + self.im_info_names = ['image', 'im_info'] + ms_fields + feed_vars = OrderedDict([(key, fluid.data( name=key, shape=inputs_def[key]['shape'], @@ -207,10 +306,17 @@ class CascadeRCNNClsAware(object): iterable=iterable) if use_dataloader else None return feed_vars, loader + def _input_check(self, require_fields, feed_vars): + for var in require_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) + def train(self, feed_vars): return self.build(feed_vars, 'train') - def eval(self, feed_vars): + def eval(self, feed_vars, multi_scale=None): + if multi_scale: + return self.build_multi_scale(feed_vars) return self.build(feed_vars, 'test') def test(self, feed_vars): diff --git a/ppdet/modeling/roi_heads/cascade_head.py b/ppdet/modeling/roi_heads/cascade_head.py index e32584bd3c0374b6d3a5c39ed78b1032b1493f50..279db089747f5fe9195953c38dfd95081941642e 100644 --- a/ppdet/modeling/roi_heads/cascade_head.py +++ b/ppdet/modeling/roi_heads/cascade_head.py @@ -220,8 +220,13 @@ class CascadeBBoxHead(object): pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) return {"bbox": pred_result} - def get_prediction_cls_aware(self, im_info, im_shape, cascade_cls_prob, - cascade_decoded_box, cascade_bbox_reg_weights): + def get_prediction_cls_aware(self, + im_info, + im_shape, + cascade_cls_prob, + cascade_decoded_box, + cascade_bbox_reg_weights, + return_box_score=False): ''' get_prediction_cls_aware: predict bbox for each class ''' @@ -247,6 +252,8 @@ class CascadeBBoxHead(object): decoded_bbox, shape=(-1, self.num_classes, 4)) box_out = fluid.layers.box_clip(input=decoded_bbox, im_info=im_shape) + if return_box_score: + return {'bbox': box_out, 'score': sum_cascade_cls_prob} pred_result = self.nms(bboxes=box_out, scores=sum_cascade_cls_prob) return {"bbox": pred_result}