From 06e6afcf262ebd8cc843b7372e014a19ba4a2eca Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Fri, 6 Nov 2020 13:49:54 +0800 Subject: [PATCH] update roi extractor & post_process (#1664) --- configs/mask_rcnn_r50_fpn_1x.yml | 86 ++++++------ configs/yolov3_darknet.yml | 34 ++--- ppdet/modeling/__init__.py | 2 + ppdet/modeling/architecture/mask_rcnn.py | 14 +- ppdet/modeling/architecture/yolo.py | 10 +- ppdet/modeling/bbox.py | 118 +---------------- ppdet/modeling/head/__init__.py | 2 + ppdet/modeling/head/bbox_head.py | 40 +++++- ppdet/modeling/head/roi_extractor.py | 72 ++++++++++ ppdet/modeling/layers.py | 162 ++++++++++++++--------- ppdet/modeling/mask.py | 32 +---- ppdet/modeling/ops.py | 5 +- ppdet/modeling/post_process.py | 50 +++++++ ppdet/utils/eval_utils.py | 2 +- 14 files changed, 358 insertions(+), 271 deletions(-) create mode 100644 ppdet/modeling/head/roi_extractor.py create mode 100644 ppdet/modeling/post_process.py diff --git a/configs/mask_rcnn_r50_fpn_1x.yml b/configs/mask_rcnn_r50_fpn_1x.yml index c68d726cc..26bb1df9d 100644 --- a/configs/mask_rcnn_r50_fpn_1x.yml +++ b/configs/mask_rcnn_r50_fpn_1x.yml @@ -13,7 +13,7 @@ load_static_weights: True # Model Achitecture MaskRCNN: # model anchor info flow - anchor: AnchorRPN + anchor: Anchor proposal: Proposal mask: Mask # model feat info flow @@ -22,6 +22,9 @@ MaskRCNN: rpn_head: RPNHead bbox_head: BBoxHead mask_head: MaskHead + # post process + bbox_post_process: BBoxPostProcess + mask_post_process: MaskPostProcess ResNet: # index 0 stands for res2 @@ -38,7 +41,6 @@ FPN: max_level: 4 spatial_scale: [0.25, 0.125, 0.0625, 0.03125] - RPNHead: rpn_feat: name: RPNFeat @@ -47,33 +49,7 @@ RPNHead: anchor_per_position: 3 rpn_channel: 256 -BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIExtractor - resolution: 7 - sampling_ratio: 2 - head_feat: - name: TwoFCHead - in_dim: 256 - mlp_dim: 1024 - in_feat: 1024 - -MaskHead: - mask_feat: - name: MaskFeat - num_convs: 4 - feat_in: 256 - feat_out: 256 - mask_roi_extractor: - name: RoIExtractor - resolution: 14 - sampling_ratio: 2 - share_bbox_feat: False - feat_in: 256 - -AnchorRPN: +Anchor: anchor_generator: name: AnchorGeneratorRPN aspect_ratios: [0.5, 1.0, 2.0] @@ -104,22 +80,52 @@ Proposal: bg_thresh_lo: [0.0,] fg_thresh: [0.5,] fg_fraction: 0.25 - bbox_post_process: # used in infer - name: BBoxPostProcess - # decode -> clip -> nms - decode_clip_nms: - name: DecodeClipNms - keep_top_k: 100 - score_threshold: 0.05 - nms_threshold: 0.5 + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIAlign + resolution: 7 + sampling_ratio: 2 + head_feat: + name: TwoFCHead + in_dim: 256 + mlp_dim: 1024 + in_feat: 1024 + +BBoxPostProcess: + decode: + name: RCNNBox + num_classes: 81 + batch_size: 1 + nms: + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 Mask: mask_target_generator: name: MaskTargetGenerator mask_resolution: 28 - mask_post_process: - name: MaskPostProcess - mask_resolution: 28 + +MaskHead: + mask_feat: + name: MaskFeat + num_convs: 4 + feat_in: 256 + feat_out: 256 + mask_roi_extractor: + name: RoIAlign + resolution: 14 + sampling_ratio: 2 + share_bbox_feat: False + feat_in: 256 + + +MaskPostProcess: + mask_resolution: 28 # Train diff --git a/configs/yolov3_darknet.yml b/configs/yolov3_darknet.yml index ff0b88fa6..47392330e 100644 --- a/configs/yolov3_darknet.yml +++ b/configs/yolov3_darknet.yml @@ -15,6 +15,7 @@ YOLOv3: anchor: AnchorYOLO backbone: DarkNet yolo_head: YOLOv3Head + post_process: BBoxPostProcess DarkNet: depth: 53 @@ -29,27 +30,28 @@ YOLOv3Head: label_smooth: true anchor_per_position: 3 +BBoxPostProcess: + decode: + name: YOLOBox + conf_thresh: 0.005 + downsample_ratio: 32 + clip_bbox: True + nms: + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.01 + nms_threshold: 0.45 + nms_top_k: 1000 + normalized: false + background_label: -1 + + AnchorYOLO: anchor_generator: name: AnchorGeneratorYOLO anchors: [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchor_post_process: - name: BBoxPostProcessYOLO - # decode -> clip - yolo_box: - name: YOLOBox - conf_thresh: 0.005 - downsample_ratio: 32 - clip_bbox: True - nms: - name: MultiClassNMS - keep_top_k: 100 - score_threshold: 0.01 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - background_label: -1 + LearningRate: base_lr: 0.001 diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py index 1bcf4e779..8819fdf04 100644 --- a/ppdet/modeling/__init__.py +++ b/ppdet/modeling/__init__.py @@ -5,6 +5,7 @@ from . import backbone from . import neck from . import head from . import architecture +from . import post_process from .ops import * from .bbox import * @@ -13,3 +14,4 @@ from .backbone import * from .neck import * from .head import * from .architecture import * +from .post_process import * diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index 6880a55f9..615e88d0d 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -21,6 +21,8 @@ class MaskRCNN(BaseArch): 'rpn_head', 'bbox_head', 'mask_head', + 'bbox_post_process', + 'mask_post_process', ] def __init__(self, @@ -31,6 +33,8 @@ class MaskRCNN(BaseArch): rpn_head, bbox_head, mask_head, + bbox_post_process, + mask_post_process, neck=None): super(MaskRCNN, self).__init__() self.anchor = anchor @@ -41,6 +45,8 @@ class MaskRCNN(BaseArch): self.rpn_head = rpn_head self.bbox_head = bbox_head self.mask_head = mask_head + self.bbox_post_process = bbox_post_process + self.mask_post_process = mask_post_process def model_arch(self): # Backbone @@ -72,9 +78,11 @@ class MaskRCNN(BaseArch): rois_has_mask_int32 = None if self.inputs['mode'] == 'infer': + bbox_pred, bboxes = self.bbox_head.get_prediction( + self.bbox_head_out, rois) # Refine bbox by the output from bbox_head at test stage - self.bboxes = self.proposal.post_process(self.inputs, - self.bbox_head_out, rois) + self.bboxes = self.bbox_post_process(bbox_pred, bboxes, + self.inputs['im_info']) else: # Proposal RoI for Mask branch # bboxes update at training stage only @@ -111,7 +119,7 @@ class MaskRCNN(BaseArch): return loss def infer(self, ): - mask = self.mask.post_process(self.bboxes, self.mask_head_out, + mask = self.mask_post_process(self.bboxes, self.mask_head_out, self.inputs['im_info']) bbox, bbox_num = self.bboxes output = { diff --git a/ppdet/modeling/architecture/yolo.py b/ppdet/modeling/architecture/yolo.py index f5045f60e..7f274bc48 100644 --- a/ppdet/modeling/architecture/yolo.py +++ b/ppdet/modeling/architecture/yolo.py @@ -15,13 +15,15 @@ class YOLOv3(BaseArch): 'anchor', 'backbone', 'yolo_head', + 'post_process', ] - def __init__(self, anchor, backbone, yolo_head): + def __init__(self, anchor, backbone, yolo_head, post_process): super(YOLOv3, self).__init__() self.anchor = anchor self.backbone = backbone self.yolo_head = yolo_head + self.post_process = post_process def model_arch(self, ): # Backbone @@ -40,11 +42,11 @@ class YOLOv3(BaseArch): return yolo_loss def infer(self, ): - bbox, bbox_num = self.anchor.post_process( - self.inputs['im_size'], self.yolo_head_out, self.mask_anchors) + bbox, bbox_num = self.post_process( + self.yolo_head_out, self.mask_anchors, self.inputs['im_size']) outs = { "bbox": bbox.numpy(), - "bbox_num": bbox_num, + "bbox_num": bbox_num.numpy(), 'im_id': self.inputs['im_id'].numpy() } return outs diff --git a/ppdet/modeling/bbox.py b/ppdet/modeling/bbox.py index 68fb55246..78b7e787a 100644 --- a/ppdet/modeling/bbox.py +++ b/ppdet/modeling/bbox.py @@ -8,105 +8,11 @@ from . import ops @register -class BBoxPostProcess(object): - __shared__ = ['num_classes'] - __inject__ = ['decode_clip_nms'] - - def __init__(self, - decode_clip_nms, - num_classes=81, - cls_agnostic=False, - decode=None, - clip=None, - nms=None, - score_stage=[0, 1, 2], - delta_stage=[2]): - super(BBoxPostProcess, self).__init__() - self.num_classes = num_classes - self.decode = decode - self.clip = clip - self.nms = nms - self.decode_clip_nms = decode_clip_nms - self.score_stage = score_stage - self.delta_stage = delta_stage - self.out_dim = 2 if cls_agnostic else num_classes - self.cls_agnostic = cls_agnostic - - def __call__(self, inputs, bboxheads, rois): - # TODO: split into 3 steps - # TODO: modify related ops for deploying - # decode - # clip - # nms - if isinstance(rois, tuple): - proposal, proposal_num = rois - score, delta = bboxheads[0] - bbox_prob = fluid.layers.softmax(score) - delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4)) - else: - num_stage = len(rois) - proposal_list = [] - prob_list = [] - delta_list = [] - for stage, (proposals, bboxhead) in zip(rois, bboxheads): - score, delta = bboxhead - proposal, proposal_num = proposals - if stage in self.score_stage: - bbox_prob = fluid.layers.softmax(score) - prob_list.append(bbox_prob) - if stage in self.delta_stage: - proposal_list.append(proposal) - delta_list.append(delta) - bbox_prob = fluid.layers.mean(prob_list) - delta = fluid.layers.mean(delta_list) - proposal = fluid.layers.mean(proposal_list) - delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4)) - if self.cls_agnostic: - delta = delta[:, 1:2, :] - delta = fluid.layers.expand(delta, [1, self.num_classes, 1]) - bboxes = (proposal, proposal_num) - bboxes, bbox_nums = self.decode_clip_nms(bboxes, bbox_prob, delta, - inputs['im_info']) - return bboxes, bbox_nums - - -@register -class BBoxPostProcessYOLO(object): - __shared__ = ['num_classes'] - __inject__ = ['yolo_box', 'nms'] - - def __init__(self, yolo_box, nms, num_classes=80, decode=None, clip=None): - super(BBoxPostProcessYOLO, self).__init__() - self.yolo_box = yolo_box - self.nms = nms - self.num_classes = num_classes - self.decode = decode - self.clip = clip - - def __call__(self, im_size, yolo_head_out, mask_anchors): - # TODO: split yolo_box into 2 steps - # decode - # clip - boxes_list = [] - scores_list = [] - for i, head_out in enumerate(yolo_head_out): - boxes, scores = self.yolo_box(head_out, im_size, mask_anchors[i], - self.num_classes, i) - - boxes_list.append(boxes) - scores_list.append(paddle.transpose(scores, perm=[0, 2, 1])) - yolo_boxes = paddle.concat(boxes_list, axis=1) - yolo_scores = paddle.concat(scores_list, axis=2) - bbox, bbox_num = self.nms(bboxes=yolo_boxes, scores=yolo_scores) - return bbox, bbox_num - - -@register -class AnchorRPN(object): +class Anchor(object): __inject__ = ['anchor_generator', 'anchor_target_generator'] def __init__(self, anchor_generator, anchor_target_generator): - super(AnchorRPN, self).__init__() + super(Anchor, self).__init__() self.anchor_generator = anchor_generator self.anchor_target_generator = anchor_target_generator @@ -167,32 +73,24 @@ class AnchorRPN(object): @register class AnchorYOLO(object): - __inject__ = ['anchor_generator', 'anchor_post_process'] + __inject__ = ['anchor_generator'] - def __init__(self, anchor_generator, anchor_post_process): + def __init__(self, anchor_generator): super(AnchorYOLO, self).__init__() self.anchor_generator = anchor_generator - self.anchor_post_process = anchor_post_process def __call__(self): return self.anchor_generator() - def post_process(self, im_size, yolo_head_out, mask_anchors): - return self.anchor_post_process(im_size, yolo_head_out, mask_anchors) - @register class Proposal(object): - __inject__ = [ - 'proposal_generator', 'proposal_target_generator', 'bbox_post_process' - ] + __inject__ = ['proposal_generator', 'proposal_target_generator'] - def __init__(self, proposal_generator, proposal_target_generator, - bbox_post_process): + def __init__(self, proposal_generator, proposal_target_generator): super(Proposal, self).__init__() self.proposal_generator = proposal_generator self.proposal_target_generator = proposal_target_generator - self.bbox_post_process = bbox_post_process def generate_proposal(self, inputs, rpn_head_out, anchor_out): rpn_rois_list = [] @@ -294,7 +192,3 @@ class Proposal(object): def get_proposals(self): return self.proposals_list - - def post_process(self, inputs, bbox_head_out, rois): - bboxes = self.bbox_post_process(inputs, bbox_head_out, rois) - return bboxes diff --git a/ppdet/modeling/head/__init__.py b/ppdet/modeling/head/__init__.py index c60ce5de0..42324f0f4 100644 --- a/ppdet/modeling/head/__init__.py +++ b/ppdet/modeling/head/__init__.py @@ -2,8 +2,10 @@ from . import rpn_head from . import bbox_head from . import mask_head from . import yolo_head +from . import roi_extractor from .rpn_head import * from .bbox_head import * from .mask_head import * from .yolo_head import * +from .roi_extractor import * diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py index af768a2e6..7678039e8 100644 --- a/ppdet/modeling/head/bbox_head.py +++ b/ppdet/modeling/head/bbox_head.py @@ -1,3 +1,4 @@ +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Layer from paddle.fluid.param_attr import ParamAttr @@ -5,6 +6,7 @@ from paddle.fluid.initializer import Normal, Xavier from paddle.fluid.regularizer import L2Decay from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from ppdet.core.workspace import register +import paddle.nn.functional as F @register @@ -85,7 +87,9 @@ class BBoxHead(Layer): num_classes=81, cls_agnostic=False, num_stages=1, - with_pool=False): + with_pool=False, + score_stage=[0, 1, 2], + delta_stage=[2]): super(BBoxHead, self).__init__() self.num_classes = num_classes self.delta_dim = 2 if cls_agnostic else num_classes @@ -94,6 +98,8 @@ class BBoxHead(Layer): self.bbox_score_list = [] self.bbox_delta_list = [] self.with_pool = with_pool + self.score_stage = score_stage + self.delta_stage = delta_stage for stage in range(num_stages): score_name = 'bbox_score_{}'.format(stage) delta_name = 'bbox_delta_{}'.format(stage) @@ -169,3 +175,35 @@ class BBoxHead(Layer): loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox + + def get_prediction(self, bbox_head_out, rois): + if len(bbox_head_out) == 1: + proposal, proposal_num = rois + score, delta = bbox_head_out[0] + bbox_prob = F.softmax(score) + delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) + else: + num_stage = len(rois) + proposal_list = [] + prob_list = [] + delta_list = [] + for stage, (proposals, bboxhead) in zip(rois, bboxheads): + score, delta = bboxhead + proposal, proposal_num = proposals + if stage in self.score_stage: + bbox_prob = F.softmax(score) + prob_list.append(bbox_prob) + if stage in self.delta_stage: + proposal_list.append(proposal) + delta_list.append(delta) + bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) + delta = paddle.mean(paddle.stack(delta_list), axis=0) + proposal = paddle.mean(paddle.stack(proposal_list), axis=0) + delta = paddle.reshape(delta, (-1, self.out_dim, 4)) + if self.cls_agnostic: + N, C, M = delta.shape + delta = delta[:, 1:2, :] + delta = paddle.expand(delta, [N, self.num_classes, M]) + bboxes = (proposal, proposal_num) + bbox_pred = (delta, bbox_prob) + return bbox_pred, bboxes diff --git a/ppdet/modeling/head/roi_extractor.py b/ppdet/modeling/head/roi_extractor.py new file mode 100644 index 000000000..65bfb979c --- /dev/null +++ b/ppdet/modeling/head/roi_extractor.py @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from ppdet.core.workspace import register +from ppdet.modeling import ops + + +@register +class RoIAlign(object): + def __init__(self, + resolution=14, + sampling_ratio=0, + canconical_level=4, + canonical_size=224, + start_level=0, + end_level=3): + super(RoIAlign, self).__init__() + self.resolution = resolution + self.sampling_ratio = sampling_ratio + self.canconical_level = canconical_level + self.canonical_size = canonical_size + self.start_level = start_level + self.end_level = end_level + + def __call__(self, feats, rois, spatial_scale): + roi, rois_num = rois + cur_l = 0 + if self.start_level == self.end_level: + rois_feat = ops.roi_align( + feats[self.start_level], + roi, + self.resolution, + spatial_scale, + rois_num=rois_num) + return rois_feat + offset = 2 + k_min = self.start_level + offset + k_max = self.end_level + offset + rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals( + roi, + k_min, + k_max, + self.canconical_level, + self.canonical_size, + rois_num=rois_num) + + rois_feat_list = [] + for lvl in range(self.start_level, self.end_level + 1): + roi_feat = ops.roi_align( + feats[lvl], + rois_dist[lvl], + self.resolution, + spatial_scale[lvl], + sampling_ratio=self.sampling_ratio, + rois_num=rois_num_dist[lvl]) + rois_feat_list.append(roi_feat) + rois_feat_shuffle = paddle.concat(rois_feat_list) + rois_feat = paddle.gather(rois_feat_shuffle, restore_index) + + return rois_feat diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py index f61992b11..d3da712a3 100644 --- a/ppdet/modeling/layers.py +++ b/ppdet/modeling/layers.py @@ -14,12 +14,15 @@ import numpy as np from numbers import Integral + +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from ppdet.core.workspace import register, serializable from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target from ppdet.py_op.post_process import bbox_post_process from . import ops +import paddle.nn.functional as F @register @@ -278,58 +281,71 @@ class MaskTargetGenerator(object): @register -class RoIExtractor(object): +@serializable +class RCNNBox(object): + __shared__ = ['num_classes', 'batch_size'] + def __init__(self, - resolution=14, - sampling_ratio=0, - canconical_level=4, - canonical_size=224, - start_level=0, - end_level=3): - super(RoIExtractor, self).__init__() - self.resolution = resolution - self.sampling_ratio = sampling_ratio - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.start_level = start_level - self.end_level = end_level - - def __call__(self, feats, rois, spatial_scale): + num_classes=81, + batch_size=1, + prior_box_var=[0.1, 0.1, 0.2, 0.2], + code_type="decode_center_size", + box_normalized=False, + axis=1): + super(RCNNBox, self).__init__() + self.num_classes = num_classes + self.batch_size = batch_size + self.prior_box_var = prior_box_var + self.code_type = code_type + self.box_normalized = box_normalized + self.axis = axis + + def __call__(self, bbox_head_out, rois, im_shape, scale_factor): + bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois - cur_l = 0 - if self.start_level == self.end_level: - rois_feat = ops.roi_align( - feats[self.start_level], - roi, - self.resolution, - spatial_scale, - rois_num=rois_num) - return rois_feat - offset = 2 - k_min = self.start_level + offset - k_max = self.end_level + offset - rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals( - roi, - k_min, - k_max, - self.canconical_level, - self.canonical_size, - rois_num=rois_num) - - rois_feat_list = [] - for lvl in range(self.start_level, self.end_level + 1): - roi_feat = ops.roi_align( - feats[lvl], - rois_dist[lvl], - self.resolution, - spatial_scale[lvl], - sampling_ratio=self.sampling_ratio, - rois_num=rois_num_dist[lvl]) - rois_feat_list.append(roi_feat) - rois_feat_shuffle = fluid.layers.concat(rois_feat_list) - rois_feat = fluid.layers.gather(rois_feat_shuffle, restore_index) - - return rois_feat + origin_shape = im_shape / scale_factor + scale_list = [] + origin_shape_list = [] + for idx in range(self.batch_size): + scale = scale_factor[idx, :] + rois_num_per_im = rois_num[idx] + expand_scale = paddle.expand(scale, [rois_num_per_im, 1]) + scale_list.append(expand_scale) + expand_im_shape = paddle.expand(origin_shape[idx, :], + [rois_num_per_im, 2]) + origin_shape_list.append(expand_im_shape) + + scale = paddle.concat(scale_list) + origin_shape = paddle.concat(origin_shape_list) + + bbox = roi / scale + bbox = ops.box_coder( + prior_box=bbox, + prior_box_var=self.prior_box_var, + target_box=bbox_pred, + code_type=self.code_type, + box_normalized=self.box_normalized, + axis=self.axis) + # TODO: Updata box_clip + origin_h = origin_shape[:, 0] - 1 + origin_w = origin_shape[:, 1] - 1 + zeros = paddle.zeros(origin_h.shape, 'float32') + x1 = paddle.maximum( + paddle.minimum( + bbox[:, :, 0], origin_w, axis=0), zeros, axis=0) + y1 = paddle.maximum( + paddle.minimum( + bbox[:, :, 1], origin_h, axis=0), zeros, axis=0) + x2 = paddle.maximum( + paddle.minimum( + bbox[:, :, 2], origin_w, axis=0), zeros, axis=0) + y2 = paddle.maximum( + paddle.minimum( + bbox[:, :, 3], origin_h, axis=0), zeros, axis=0) + bbox = paddle.stack([x1, y1, x2, y2], axis=-1) + + bboxes = (bbox, rois_num) + return bboxes, cls_prob @register @@ -367,9 +383,6 @@ class DecodeClipNms(object): @register @serializable class MultiClassNMS(object): - __op__ = ops.multiclass_nms - __append_doc__ = True - def __init__(self, score_threshold=.05, nms_top_k=-1, @@ -387,6 +400,13 @@ class MultiClassNMS(object): self.nms_eta = nms_eta self.background_label = background_label + def __call__(self, bboxes, score): + kwargs = self.__dict__.copy() + if isinstance(bboxes, tuple): + bboxes, bbox_num = bboxes + kwargs.update({'rois_num': bbox_num}) + return ops.multiclass_nms(bboxes, score, **kwargs) + @register @serializable @@ -417,19 +437,37 @@ class MatrixNMS(object): @register @serializable class YOLOBox(object): - def __init__( - self, - conf_thresh=0.005, - downsample_ratio=32, - clip_bbox=True, ): + __shared__ = ['num_classes'] + + def __init__(self, + num_classes=80, + conf_thresh=0.005, + downsample_ratio=32, + clip_bbox=True, + scale_x_y=1.): + self.num_classes = num_classes self.conf_thresh = conf_thresh self.downsample_ratio = downsample_ratio self.clip_bbox = clip_bbox + self.scale_x_y = scale_x_y - def __call__(self, x, img_size, anchors, num_classes, stage=0): - outs = ops.yolo_box(x, img_size, anchors, num_classes, self.conf_thresh, - self.downsample_ratio // 2**stage, self.clip_bbox) - return outs + def __call__(self, yolo_head_out, anchors, im_shape, scale_factor=None): + boxes_list = [] + scores_list = [] + if scale_factor is not None: + origin_shape = im_shape / scale_factor + else: + origin_shape = im_shape + for i, head_out in enumerate(yolo_head_out): + boxes, scores = ops.yolo_box(head_out, origin_shape, anchors[i], + self.num_classes, self.conf_thresh, + self.downsample_ratio // 2**i, + self.clip_bbox, self.scale_x_y) + boxes_list.append(boxes) + scores_list.append(paddle.transpose(scores, perm=[0, 2, 1])) + yolo_boxes = paddle.concat(boxes_list, axis=1) + yolo_scores = paddle.concat(scores_list, axis=2) + return yolo_boxes, yolo_scores @register diff --git a/ppdet/modeling/mask.py b/ppdet/modeling/mask.py index e8dcf20e3..b8b17fdb0 100644 --- a/ppdet/modeling/mask.py +++ b/ppdet/modeling/mask.py @@ -2,38 +2,14 @@ import numpy as np import paddle.fluid as fluid from ppdet.core.workspace import register -# TODO: regitster mask_post_process op -from ppdet.py_op.post_process import mask_post_process - - -@register -class MaskPostProcess(object): - __shared__ = ['mask_resolution'] - - def __init__(self, mask_resolution=28, binary_thresh=0.5): - super(MaskPostProcess, self).__init__() - self.mask_resolution = mask_resolution - self.binary_thresh = binary_thresh - - def __call__(self, bboxes, mask_head_out, im_info): - # TODO: modify related ops for deploying - bboxes_np = (i.numpy() for i in bboxes) - mask = mask_post_process(bboxes_np, - mask_head_out.numpy(), - im_info.numpy(), self.mask_resolution, - self.binary_thresh) - mask = {'mask': mask} - return mask - @register class Mask(object): - __inject__ = ['mask_target_generator', 'mask_post_process'] + __inject__ = ['mask_target_generator'] - def __init__(self, mask_target_generator, mask_post_process): + def __init__(self, mask_target_generator): super(Mask, self).__init__() self.mask_target_generator = mask_target_generator - self.mask_post_process = mask_post_process def __call__(self, inputs, rois, targets): mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois, @@ -56,7 +32,3 @@ class Mask(object): def get_targets(self): return self.mask_int32 - - def post_process(self, bboxes, mask_head_out, im_info): - mask = self.mask_post_process(bboxes, mask_head_out, im_info) - return mask diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 457ef1088..6a2d5941d 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -1337,8 +1337,9 @@ def box_coder(prior_box, elif isinstance(prior_box_var, list): output_box = core.ops.box_coder( - prior_box, target_box, "code_type", code_type, "box_normalized", - box_normalized, "axis", axis, "variance", prior_box_var) + prior_box, None, target_box, "code_type", code_type, + "box_normalized", box_normalized, "axis", axis, "variance", + prior_box_var) else: raise TypeError( "Input variance of box_coder must be Variable or list") diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py new file mode 100644 index 000000000..5dd686d3a --- /dev/null +++ b/ppdet/modeling/post_process.py @@ -0,0 +1,50 @@ +import numpy as np +import paddle.fluid as fluid +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from ppdet.core.workspace import register +from ppdet.py_op.post_process import mask_post_process +from . import ops + + +@register +class BBoxPostProcess(object): + __inject__ = ['decode', 'nms'] + + def __init__(self, decode=None, nms=None): + super(BBoxPostProcess, self).__init__() + self.decode = decode + self.nms = nms + + def __call__(self, head_out, rois, im_shape, scale_factor=None): + # TODO: compatible for im_info + # remove after unify the im_shape. scale_factor + if im_shape.shape[1] > 2: + origin_shape = im_shape[:, :2] + scale_factor = im_shape[:, 2:] + else: + origin_shape = im_shape + bboxes, score = self.decode(head_out, rois, origin_shape, scale_factor) + bbox_pred, bbox_num = self.nms(bboxes, score) + return bbox_pred, bbox_num + + +@register +class MaskPostProcess(object): + __shared__ = ['mask_resolution'] + + def __init__(self, mask_resolution=28, binary_thresh=0.5): + super(MaskPostProcess, self).__init__() + self.mask_resolution = mask_resolution + self.binary_thresh = binary_thresh + + def __call__(self, bboxes, mask_head_out, im_info): + # TODO: modify related ops for deploying + bboxes_np = (i.numpy() for i in bboxes) + mask = mask_post_process(bboxes_np, + mask_head_out.numpy(), + im_info.numpy(), self.mask_resolution, + self.binary_thresh) + mask = {'mask': mask} + return mask diff --git a/ppdet/utils/eval_utils.py b/ppdet/utils/eval_utils.py index 91a725615..f6eba837d 100644 --- a/ppdet/utils/eval_utils.py +++ b/ppdet/utils/eval_utils.py @@ -85,7 +85,7 @@ def eval_results(res, metric, anno_file): json.dump(res['mask'], f) logger.info('The mask result is saved to mask.json.') - seg_stats = cocoapi_eval('mask.json', 'mask', anno_file=anno_file) + seg_stats = cocoapi_eval('mask.json', 'segm', anno_file=anno_file) eval_res.append(seg_stats) sys.stdout.flush() else: -- GitLab