未验证 提交 30d00a24 编写于 作者: W wangguanzhong 提交者: GitHub

cherry-pick update_cascade, test=dygraph (#2206)

上级 4a272e67
worker_num: 2 worker_num: 2
TrainReader: TrainReader:
sample_transforms: sample_transforms:
- DecodeOp: { } - DecodeOp: {}
- RandomFlipImage: {prob: 0.5} - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - RandomFlipOp: {prob: 0.5}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {to_bgr: false, channel_first: true} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true} - PadBatchOp: {pad_to_stride: 32, pad_gt: true}
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
drop_last: true drop_last: true
...@@ -15,12 +15,12 @@ TrainReader: ...@@ -15,12 +15,12 @@ TrainReader:
EvalReader: EvalReader:
sample_transforms: sample_transforms:
- DecodeOp: { } - DecodeOp: {}
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: { } - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false } - PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -29,12 +29,12 @@ EvalReader: ...@@ -29,12 +29,12 @@ EvalReader:
TestReader: TestReader:
sample_transforms: sample_transforms:
- DecodeOp: { } - DecodeOp: {}
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } - ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True } - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: { } - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false } - PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -2,12 +2,12 @@ worker_num: 2 ...@@ -2,12 +2,12 @@ worker_num: 2
TrainReader: TrainReader:
sample_transforms: sample_transforms:
- DecodeOp: {} - DecodeOp: {}
- RandomFlipImage: {prob: 0.5, is_mask_flip: true} - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - RandomFlipOp: {prob: 0.5}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {to_bgr: false, channel_first: true} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true} - PadBatchOp: {pad_to_stride: 32, pad_gt: true}
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
drop_last: true drop_last: true
...@@ -16,8 +16,8 @@ TrainReader: ...@@ -16,8 +16,8 @@ TrainReader:
EvalReader: EvalReader:
sample_transforms: sample_transforms:
- DecodeOp: {} - DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false} - PadBatchOp: {pad_to_stride: 32, pad_gt: false}
...@@ -30,8 +30,8 @@ EvalReader: ...@@ -30,8 +30,8 @@ EvalReader:
TestReader: TestReader:
sample_transforms: sample_transforms:
- DecodeOp: {} - DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false} - PadBatchOp: {pad_to_stride: 32, pad_gt: false}
......
architecture: CascadeRCNN architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN: CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet backbone: ResNet
neck: FPN neck: FPN
rpn_head: RPNHead rpn_head: RPNHead
bbox_head: BBoxHead bbox_head: CascadeHead
mask_head: MaskHead mask_head: MaskHead
# post process # post process
bbox_post_process: BBoxPostProcess bbox_post_process: BBoxPostProcess
...@@ -28,97 +22,78 @@ ResNet: ...@@ -28,97 +22,78 @@ ResNet:
num_stages: 4 num_stages: 4
FPN: FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256 out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead: RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator: anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0] aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32 anchor_sizes: [[32], [64], [128], [256], [512]]
stride: [4., 4.] strides: [4, 8, 16, 32, 64]
anchor_target_generator: rpn_target_assign:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256 batch_size_per_im: 256
fg_fraction: 0.5 fg_fraction: 0.5
negative_overlap: 0.3 negative_overlap: 0.3
positive_overlap: 0.7 positive_overlap: 0.7
straddle_thresh: 0.0 use_random: True
train_proposal:
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0 min_size: 0.0
nms_thresh: 0.7 nms_thresh: 0.7
train_pre_nms_top_n: 2000 pre_nms_top_n: 2000
train_post_nms_top_n: 2000 post_nms_top_n: 2000
infer_pre_nms_top_n: 1000 topk_after_collect: True
infer_post_nms_top_n: 1000 test_proposal:
proposal_target_generator: min_size: 0.0
name: ProposalTargetGenerator nms_thresh: 0.7
batch_size_per_im: 512 pre_nms_top_n: 1000
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] post_nms_top_n: 1000
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
is_cls_agnostic: true
BBoxHead: CascadeHead:
bbox_feat: head: CascadeTwoFCHead
name: BBoxFeat roi_extractor:
roi_extractor: resolution: 7
name: RoIAlign sampling_ratio: 0
resolution: 7 aligned: True
sampling_ratio: 2 bbox_assigner: BBoxAssigner
head_feat:
name: TwoFCHead BBoxAssigner:
in_dim: 256 batch_size_per_im: 512
mlp_dim: 1024 bg_thresh: 0.5
in_feat: 1024 fg_thresh: 0.5
cls_agnostic: true fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
CascadeTwoFCHead:
mlp_dim: 1024
BBoxPostProcess: BBoxPostProcess:
decode: decode:
name: RCNNBox name: RCNNBox
num_classes: 81 prior_box_var: [30.0, 30.0, 15.0, 15.0]
batch_size: 1
var_weight: 3.
nms: nms:
name: MultiClassNMS name: MultiClassNMS
keep_top_k: 100 keep_top_k: 100
score_threshold: 0.05 score_threshold: 0.05
nms_threshold: 0.5 nms_threshold: 0.5
normalized: true
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
MaskHead: MaskHead:
mask_feat: head: MaskFeat
name: MaskFeat roi_extractor:
num_convs: 4 resolution: 14
feat_in: 256 sampling_ratio: 0
feat_out: 256 aligned: True
mask_roi_extractor: mask_assigner: MaskAssigner
name: RoIAlign share_bbox_feat: False
resolution: 14
sampling_ratio: 2
share_bbox_feat: False
feat_in: 256
MaskFeat:
num_convs: 4
out_channels: 256
MaskPostProcess: MaskAssigner:
mask_resolution: 28 mask_resolution: 28
MaskPostProcess:
binary_thresh: 0.5
architecture: CascadeRCNN architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
load_static_weights: True load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN: CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
# model feat info flow
backbone: ResNet backbone: ResNet
neck: FPN neck: FPN
rpn_head: RPNHead rpn_head: RPNHead
bbox_head: BBoxHead bbox_head: CascadeHead
# post process # post process
bbox_post_process: BBoxPostProcess bbox_post_process: BBoxPostProcess
...@@ -25,75 +20,58 @@ ResNet: ...@@ -25,75 +20,58 @@ ResNet:
num_stages: 4 num_stages: 4
FPN: FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256 out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead: RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator: anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0] aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32 anchor_sizes: [[32], [64], [128], [256], [512]]
stride: [4., 4.] strides: [4, 8, 16, 32, 64]
anchor_target_generator: rpn_target_assign:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256 batch_size_per_im: 256
fg_fraction: 0.5 fg_fraction: 0.5
negative_overlap: 0.3 negative_overlap: 0.3
positive_overlap: 0.7 positive_overlap: 0.7
straddle_thresh: 0.0 use_random: True
train_proposal:
Proposal: min_size: 0.0
proposal_generator: nms_thresh: 0.7
name: ProposalGenerator pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0 min_size: 0.0
nms_thresh: 0.7 nms_thresh: 0.7
train_pre_nms_top_n: 2000 pre_nms_top_n: 1000
train_post_nms_top_n: 2000 post_nms_top_n: 1000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator: CascadeHead:
name: ProposalTargetGenerator head: CascadeTwoFCHead
batch_size_per_im: 512 roi_extractor:
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] resolution: 7
bg_thresh_hi: [0.5, 0.6, 0.7] sampling_ratio: 0
bg_thresh_lo: [0.0, 0.0, 0.0] aligned: True
fg_thresh: [0.5, 0.6, 0.7] bbox_assigner: BBoxAssigner
fg_fraction: 0.25
is_cls_agnostic: true BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
BBoxHead: CascadeTwoFCHead:
bbox_feat: mlp_dim: 1024
name: BBoxFeat
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
cls_agnostic: true
BBoxPostProcess: BBoxPostProcess:
decode: decode:
name: RCNNBox name: RCNNBox
num_classes: 81 prior_box_var: [30.0, 30.0, 15.0, 15.0]
batch_size: 1
var_weight: 3.
nms: nms:
name: MultiClassNMS name: MultiClassNMS
keep_top_k: 100 keep_top_k: 100
score_threshold: 0.05 score_threshold: 0.05
nms_threshold: 0.5 nms_threshold: 0.5
normalized: true
...@@ -7,8 +7,8 @@ LearningRate: ...@@ -7,8 +7,8 @@ LearningRate:
gamma: 0.1 gamma: 0.1
milestones: [8, 11] milestones: [8, 11]
- !LinearWarmup - !LinearWarmup
start_factor: 0.3333333333333333 start_factor: 0.001
steps: 500 steps: 1000
OptimizerBuilder: OptimizerBuilder:
optimizer: optimizer:
......
...@@ -53,8 +53,8 @@ BBoxHead: ...@@ -53,8 +53,8 @@ BBoxHead:
BBoxAssigner: BBoxAssigner:
batch_size_per_im: 512 batch_size_per_im: 512
bg_thresh: [0.5,] bg_thresh: 0.5
fg_thresh: [0.5,] fg_thresh: 0.5
fg_fraction: 0.25 fg_fraction: 0.25
use_random: True use_random: True
......
...@@ -56,8 +56,8 @@ BBoxHead: ...@@ -56,8 +56,8 @@ BBoxHead:
BBoxAssigner: BBoxAssigner:
batch_size_per_im: 512 batch_size_per_im: 512
bg_thresh: [0.5,] bg_thresh: 0.5
fg_thresh: [0.5,] fg_thresh: 0.5
fg_fraction: 0.25 fg_fraction: 0.25
use_random: True use_random: True
......
...@@ -3,7 +3,7 @@ TrainReader: ...@@ -3,7 +3,7 @@ TrainReader:
sample_transforms: sample_transforms:
- DecodeOp: {} - DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5, is_mask_flip: true} - RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {} - PermuteOp: {}
batch_transforms: batch_transforms:
......
...@@ -54,8 +54,8 @@ BBoxHead: ...@@ -54,8 +54,8 @@ BBoxHead:
BBoxAssigner: BBoxAssigner:
batch_size_per_im: 512 batch_size_per_im: 512
bg_thresh: [0.5,] bg_thresh: 0.5
fg_thresh: [0.5,] fg_thresh: 0.5
fg_fraction: 0.25 fg_fraction: 0.25
use_random: True use_random: True
......
...@@ -56,8 +56,8 @@ BBoxHead: ...@@ -56,8 +56,8 @@ BBoxHead:
BBoxAssigner: BBoxAssigner:
batch_size_per_im: 512 batch_size_per_im: 512
bg_thresh: [0.5,] bg_thresh: 0.5
fg_thresh: [0.5,] fg_thresh: 0.5
fg_fraction: 0.25 fg_fraction: 0.25
use_random: True use_random: True
......
...@@ -3,7 +3,7 @@ TrainReader: ...@@ -3,7 +3,7 @@ TrainReader:
sample_transforms: sample_transforms:
- DecodeOp: {} - DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} - RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5, is_mask_flip: true} - RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {} - PermuteOp: {}
batch_transforms: batch_transforms:
......
...@@ -484,17 +484,14 @@ class AutoAugmentOp(BaseOperator): ...@@ -484,17 +484,14 @@ class AutoAugmentOp(BaseOperator):
@register_op @register_op
class RandomFlipOp(BaseOperator): class RandomFlipOp(BaseOperator):
def __init__(self, prob=0.5, is_mask_flip=False): def __init__(self, prob=0.5):
""" """
Args: Args:
prob (float): the probability of flipping image prob (float): the probability of flipping image
is_mask_flip (bool): whether flip the segmentation
""" """
super(RandomFlipOp, self).__init__() super(RandomFlipOp, self).__init__()
self.prob = prob self.prob = prob
self.is_mask_flip = is_mask_flip if not isinstance(self.prob, float):
if not (isinstance(self.prob, float) and
isinstance(self.is_mask_flip, bool)):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def apply_segm(self, segms, height, width): def apply_segm(self, segms, height, width):
...@@ -557,8 +554,7 @@ class RandomFlipOp(BaseOperator): ...@@ -557,8 +554,7 @@ class RandomFlipOp(BaseOperator):
im = self.apply_image(im) im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width) sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
if self.is_mask_flip and 'gt_poly' in sample and len(sample[ if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
'gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height, sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
width) width)
if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0: if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
......
...@@ -17,7 +17,7 @@ from __future__ import division ...@@ -17,7 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle import paddle
from ppdet.core.workspace import register from ppdet.core.workspace import register, create
from .meta_arch import BaseArch from .meta_arch import BaseArch
__all__ = ['CascadeRCNN'] __all__ = ['CascadeRCNN']
...@@ -26,142 +26,106 @@ __all__ = ['CascadeRCNN'] ...@@ -26,142 +26,106 @@ __all__ = ['CascadeRCNN']
@register @register
class CascadeRCNN(BaseArch): class CascadeRCNN(BaseArch):
__category__ = 'architecture' __category__ = 'architecture'
__shared__ = ['roi_stages']
__inject__ = [ __inject__ = [
'anchor',
'proposal',
'mask',
'backbone',
'neck',
'rpn_head',
'bbox_head',
'mask_head',
'bbox_post_process', 'bbox_post_process',
'mask_post_process', 'mask_post_process',
] ]
def __init__(self, def __init__(self,
anchor,
proposal,
backbone, backbone,
rpn_head, rpn_head,
bbox_head, bbox_head,
bbox_post_process, bbox_post_process,
neck=None, neck=None,
mask=None,
mask_head=None, mask_head=None,
mask_post_process=None, mask_post_process=None):
roi_stages=3):
super(CascadeRCNN, self).__init__() super(CascadeRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.backbone = backbone self.backbone = backbone
self.rpn_head = rpn_head self.rpn_head = rpn_head
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process self.bbox_post_process = bbox_post_process
self.neck = neck self.neck = neck
self.mask = mask
self.mask_head = mask_head self.mask_head = mask_head
self.mask_post_process = mask_post_process self.mask_post_process = mask_post_process
self.roi_stages = roi_stages self.with_mask = mask_head is not None
self.with_mask = mask is not None
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
def model_arch(self, ): def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs) body_feats = self.backbone(self.inputs)
# Neck
if self.neck is not None: if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats) body_feats = self.neck(body_feats)
# RPN if self.training:
# rpn_head returns two list: rpn_feat, rpn_head_out rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
# each element in rpn_feats contains rpn feature on each level, bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
# and the length is 1 when the neck is not applied. self.inputs)
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta) rois, rois_num = self.bbox_head.get_assigned_rois()
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats) bbox_targets = self.bbox_head.get_assigned_targets()
if self.with_mask:
# Anchor mask_loss = self.mask_head(body_feats, rois, rois_num,
# anchor_out returns a list, self.inputs, bbox_targets, bbox_feat)
# each element contains (anchor, anchor_var) return rpn_loss, bbox_loss, mask_loss
self.anchor_out = self.anchor(rpn_feat) else:
return rpn_loss, bbox_loss, {}
# Proposal RoI else:
# compute targets here when training rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
rois = None preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
bbox_head_out = None refined_rois = self.bbox_head.get_refined_rois()
max_overlap = None
self.bbox_head_list = [] im_shape = self.inputs['im_shape']
rois_list = [] scale_factor = self.inputs['scale_factor']
for i in range(self.roi_stages):
# Proposal BBox bbox, bbox_num = self.bbox_post_process(
rois = self.proposal( preds, (refined_rois, rois_num), im_shape, scale_factor)
self.inputs, # rescale the prediction back to origin image
self.rpn_head_out, bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
self.anchor_out, im_shape, scale_factor)
self.training, if not self.with_mask:
i, return bbox_pred, bbox_num, None
rois, mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
bbox_head_out, origin_shape = self.bbox_post_process.get_origin_shape()
max_overlap=max_overlap) mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
rois_list.append(rois) bbox_num, origin_shape)
max_overlap = self.proposal.get_max_overlap() return bbox_pred, bbox_num, mask_pred
# BBox Head
bbox_feat, bbox_head_out, _ = self.bbox_head(body_feats, rois,
spatial_scale, i)
self.bbox_head_list.append(bbox_head_out)
if not self.training:
bbox_pred, bboxes = self.bbox_head.get_cascade_prediction(
self.bbox_head_list, rois_list)
self.bboxes = self.bbox_post_process(bbox_pred, bboxes,
self.inputs['im_shape'],
self.inputs['scale_factor'])
if self.with_mask:
rois = rois_list[-1]
rois_has_mask_int32 = None
if self.training:
bbox_targets = self.proposal.get_targets()[-1]
self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(
self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
def get_loss(self, ): def get_loss(self, ):
rpn_loss, bbox_loss, mask_loss = self._forward()
loss = {} loss = {}
loss.update(rpn_loss)
# RPN loss loss.update(bbox_loss)
rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs)
loss.update(loss_rpn)
# BBox loss
bbox_targets_list = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_list,
bbox_targets_list)
loss.update(loss_bbox)
if self.with_mask: if self.with_mask:
# Mask loss loss.update(mask_loss)
mask_targets = self.mask.get_targets()
loss_mask = self.mask_head.get_loss(self.mask_head_out,
mask_targets)
loss.update(loss_mask)
total_loss = paddle.add_n(list(loss.values())) total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss}) loss.update({'loss': total_loss})
return loss return loss
def get_pred(self): def get_pred(self):
bbox, bbox_num = self.bboxes bbox_pred, bbox_num, mask_pred = self._forward()
output = { output = {
'bbox': bbox, 'bbox': bbox_pred,
'bbox_num': bbox_num, 'bbox_num': bbox_num,
} }
if self.with_mask: if self.with_mask:
output.update({'mask': self.mask_head_out}) output.update({'mask': mask_pred})
return output return output
...@@ -547,11 +547,8 @@ class Res5Head(nn.Layer): ...@@ -547,11 +547,8 @@ class Res5Head(nn.Layer):
if depth < 50: if depth < 50:
feat_in = 256 feat_in = 256
na = NameAdapter(self) na = NameAdapter(self)
self.res5 = self.add_sublayer( self.res5 = Blocks(
'res5_roi_feat', depth, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)
Blocks(
depth, feat_in, feat_out, count=3, name_adapter=na,
stage_num=5))
self.feat_out = feat_out if depth < 50 else feat_out * 4 self.feat_out = feat_out if depth < 50 else feat_out * 4
@property @property
......
...@@ -64,7 +64,7 @@ def delta2bbox(deltas, boxes, weights): ...@@ -64,7 +64,7 @@ def delta2bbox(deltas, boxes, weights):
pred_boxes.append(pred_ctr_y - 0.5 * pred_h) pred_boxes.append(pred_ctr_y - 0.5 * pred_h)
pred_boxes.append(pred_ctr_x + 0.5 * pred_w) pred_boxes.append(pred_ctr_x + 0.5 * pred_w)
pred_boxes.append(pred_ctr_y + 0.5 * pred_h) pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
pred_boxes = paddle.stack(pred_boxes, axis=-1) pred_boxes = paddle.concat(pred_boxes, axis=-1)
return pred_boxes return pred_boxes
...@@ -88,7 +88,7 @@ def expand_bbox(bboxes, scale): ...@@ -88,7 +88,7 @@ def expand_bbox(bboxes, scale):
def clip_bbox(boxes, im_shape): def clip_bbox(boxes, im_shape):
h, w = im_shape h, w = im_shape[0], im_shape[1]
x1 = boxes[:, 0].clip(0, w) x1 = boxes[:, 0].clip(0, w)
y1 = boxes[:, 1].clip(0, h) y1 = boxes[:, 1].clip(0, h)
x2 = boxes[:, 2].clip(0, w) x2 = boxes[:, 2].clip(0, w)
......
...@@ -20,6 +20,7 @@ from . import ssd_head ...@@ -20,6 +20,7 @@ from . import ssd_head
from . import fcos_head from . import fcos_head
from . import solov2_head from . import solov2_head
from . import ttf_head from . import ttf_head
from . import cascade_head
from .bbox_head import * from .bbox_head import *
from .mask_head import * from .mask_head import *
...@@ -29,3 +30,4 @@ from .ssd_head import * ...@@ -29,3 +30,4 @@ from .ssd_head import *
from .fcos_head import * from .fcos_head import *
from .solov2_head import * from .solov2_head import *
from .ttf_head import * from .ttf_head import *
from .cascade_head import *
...@@ -33,19 +33,16 @@ class TwoFCHead(nn.Layer): ...@@ -33,19 +33,16 @@ class TwoFCHead(nn.Layer):
self.in_dim = in_dim self.in_dim = in_dim
self.mlp_dim = mlp_dim self.mlp_dim = mlp_dim
fan = in_dim * resolution * resolution fan = in_dim * resolution * resolution
lr_factor = 1.
self.fc6 = nn.Linear( self.fc6 = nn.Linear(
in_dim * resolution * resolution, in_dim * resolution * resolution,
mlp_dim, mlp_dim,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
learning_rate=lr_factor,
initializer=XavierUniform(fan_out=fan))) initializer=XavierUniform(fan_out=fan)))
self.fc7 = nn.Linear( self.fc7 = nn.Linear(
mlp_dim, mlp_dim,
mlp_dim, mlp_dim,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(initializer=XavierUniform()))
learning_rate=lr_factor, initializer=XavierUniform()))
@classmethod @classmethod
def from_config(cls, cfg, input_shape): def from_config(cls, cfg, input_shape):
...@@ -73,6 +70,12 @@ class BBoxHead(nn.Layer): ...@@ -73,6 +70,12 @@ class BBoxHead(nn.Layer):
""" """
head (nn.Layer): Extract feature in bbox head head (nn.Layer): Extract feature in bbox head
in_channel (int): Input channel after RoI extractor in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor
bbox_assigner (object): The module of Box Assigner, label and sample the
box.
with_pool (bool): Whether to use pooling for the RoI feature.
num_classes (int): The number of classes
bbox_weight (List[float]): The weight to get the decode box
""" """
def __init__(self, def __init__(self,
...@@ -98,17 +101,14 @@ class BBoxHead(nn.Layer): ...@@ -98,17 +101,14 @@ class BBoxHead(nn.Layer):
self.bbox_score = nn.Linear( self.bbox_score = nn.Linear(
in_channel, in_channel,
self.num_classes + 1, self.num_classes + 1,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(initializer=Normal(
learning_rate=lr_factor, initializer=Normal( mean=0.0, std=0.01)))
mean=0.0, std=0.01)))
self.bbox_delta = nn.Linear( self.bbox_delta = nn.Linear(
in_channel, in_channel,
4 * self.num_classes, 4 * self.num_classes,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(initializer=Normal(
learning_rate=lr_factor, mean=0.0, std=0.001)))
initializer=Normal(
mean=0.0, std=0.001)))
self.assigned_label = None self.assigned_label = None
self.assigned_rois = None self.assigned_rois = None
...@@ -128,14 +128,13 @@ class BBoxHead(nn.Layer): ...@@ -128,14 +128,13 @@ class BBoxHead(nn.Layer):
def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None): def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
""" """
body_feats (list[Tensor]): body_feats (list[Tensor]): Feature maps from backbone
rois (Tensor): rois (Tensor): RoIs generated from RPN module
rois_num (Tensor): rois_num (Tensor): The number of RoIs in each image
inputs (dict{Tensor}): inputs (dict{Tensor}): The ground-truth of image
""" """
if self.training: if self.training:
rois, rois_num, _, targets = self.bbox_assigner(rois, rois_num, rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
inputs)
self.assigned_rois = (rois, rois_num) self.assigned_rois = (rois, rois_num)
self.assigned_targets = targets self.assigned_targets = targets
...@@ -150,13 +149,14 @@ class BBoxHead(nn.Layer): ...@@ -150,13 +149,14 @@ class BBoxHead(nn.Layer):
deltas = self.bbox_delta(feat) deltas = self.bbox_delta(feat)
if self.training: if self.training:
loss = self.get_loss(scores, deltas, targets, rois) loss = self.get_loss(scores, deltas, targets, rois,
self.bbox_weight)
return loss, bbox_feat return loss, bbox_feat
else: else:
pred = self.get_prediction(scores, deltas) pred = self.get_prediction(scores, deltas)
return pred, self.head return pred, self.head
def get_loss(self, scores, deltas, targets, rois): def get_loss(self, scores, deltas, targets, rois, bbox_weight):
""" """
scores (Tensor): scores from bbox head outputs scores (Tensor): scores from bbox head outputs
deltas (Tensor): deltas from bbox head outputs deltas (Tensor): deltas from bbox head outputs
...@@ -179,6 +179,14 @@ class BBoxHead(nn.Layer): ...@@ -179,6 +179,14 @@ class BBoxHead(nn.Layer):
paddle.logical_and(tgt_labels >= 0, tgt_labels < paddle.logical_and(tgt_labels >= 0, tgt_labels <
self.num_classes)).flatten() self.num_classes)).flatten()
cls_name = 'loss_bbox_cls'
reg_name = 'loss_bbox_reg'
loss_bbox = {}
if fg_inds.numel() == 0:
loss_bbox[cls_name] = paddle.to_tensor(0., dtype='float32')
loss_bbox[reg_name] = paddle.to_tensor(0., dtype='float32')
return loss_bbox
if cls_agnostic_bbox_reg: if cls_agnostic_bbox_reg:
reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather(deltas, fg_inds)
else: else:
...@@ -198,16 +206,13 @@ class BBoxHead(nn.Layer): ...@@ -198,16 +206,13 @@ class BBoxHead(nn.Layer):
tgt_bboxes = paddle.concat(tgt_bboxes) if len( tgt_bboxes = paddle.concat(tgt_bboxes) if len(
tgt_bboxes) > 1 else tgt_bboxes[0] tgt_bboxes) > 1 else tgt_bboxes[0]
reg_target = bbox2delta(rois, tgt_bboxes, self.bbox_weight) reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight)
reg_target = paddle.gather(reg_target, fg_inds) reg_target = paddle.gather(reg_target, fg_inds)
reg_target.stop_gradient = True reg_target.stop_gradient = True
loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum( loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum(
) / tgt_labels.shape[0] ) / tgt_labels.shape[0]
cls_name = 'loss_bbox_cls'
reg_name = 'loss_bbox_reg'
loss_bbox = {}
loss_bbox[cls_name] = loss_bbox_cls loss_bbox[cls_name] = loss_bbox_cls
loss_bbox[reg_name] = loss_bbox_reg loss_bbox[reg_name] = loss_bbox_reg
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, XavierUniform
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, create
from ppdet.modeling import ops
from .bbox_head import BBoxHead, TwoFCHead
from .roi_extractor import RoIAlign
from ..shape_spec import ShapeSpec
from ..bbox_utils import bbox2delta, delta2bbox, clip_bbox, nonempty_bbox
@register
class CascadeTwoFCHead(nn.Layer):
__shared__ = ['num_cascade_stage']
def __init__(self,
in_dim=256,
mlp_dim=1024,
resolution=7,
num_cascade_stage=3):
super(CascadeTwoFCHead, self).__init__()
self.in_dim = in_dim
self.mlp_dim = mlp_dim
self.head_list = []
for stage in range(num_cascade_stage):
head_per_stage = self.add_sublayer(
str(stage), TwoFCHead(in_dim, mlp_dim, resolution))
self.head_list.append(head_per_stage)
@classmethod
def from_config(cls, cfg, input_shape):
s = input_shape
s = s[0] if isinstance(s, (list, tuple)) else s
return {'in_dim': s.channels}
@property
def out_shape(self):
return [ShapeSpec(channels=self.mlp_dim, )]
def forward(self, rois_feat, stage=0):
out = self.head_list[stage](rois_feat)
return out
@register
class CascadeHead(BBoxHead):
__shared__ = ['num_classes', 'num_cascade_stages']
__inject__ = ['bbox_assigner']
"""
head (nn.Layer): Extract feature in bbox head
in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor
bbox_assigner (object): The module of Box Assigner, label and sample the
box.
num_classes (int): The number of classes
bbox_weight (List[List[float]]): The weight to get the decode box and the
length of weight is the number of cascade
stage
num_cascade_stages (int): THe number of stage to refine the box
"""
def __init__(self,
head,
in_channel,
roi_extractor=RoIAlign().__dict__,
bbox_assigner='BboxAssigner',
num_classes=80,
bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
[30.0, 30.0, 15.0, 15.0]],
num_cascade_stages=3):
nn.Layer.__init__(self, )
self.head = head
self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict):
self.roi_extractor = RoIAlign(**roi_extractor)
self.bbox_assigner = bbox_assigner
self.num_classes = num_classes
self.bbox_weight = bbox_weight
self.num_cascade_stages = num_cascade_stages
self.bbox_score_list = []
self.bbox_delta_list = []
for i in range(num_cascade_stages):
score_name = 'bbox_score_stage{}'.format(i)
delta_name = 'bbox_delta_stage{}'.format(i)
bbox_score = self.add_sublayer(
score_name,
nn.Linear(
in_channel,
self.num_classes + 1,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0.0, std=0.01))))
bbox_delta = self.add_sublayer(
delta_name,
nn.Linear(
in_channel,
4,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0.0, std=0.001))))
self.bbox_score_list.append(bbox_score)
self.bbox_delta_list.append(bbox_delta)
self.assigned_label = None
self.assigned_rois = None
def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
"""
body_feats (list[Tensor]): Feature maps from backbone
rois (Tensor): RoIs generated from RPN module
rois_num (Tensor): The number of RoIs in each image
inputs (dict{Tensor}): The ground-truth of image
"""
targets = []
if self.training:
rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
targets_list = [targets]
self.assigned_rois = (rois, rois_num)
self.assigned_targets = targets
pred_bbox = None
head_out_list = []
for i in range(self.num_cascade_stages):
if i > 0:
rois, rois_num = self._get_rois_from_boxes(pred_bbox,
inputs['im_shape'])
if self.training:
rois, rois_num, targets = self.bbox_assigner(
rois, rois_num, inputs, i, is_cascade=True)
targets_list.append(targets)
rois_feat = self.roi_extractor(body_feats, rois, rois_num)
bbox_feat = self.head(rois_feat, i)
scores = self.bbox_score_list[i](bbox_feat)
deltas = self.bbox_delta_list[i](bbox_feat)
head_out_list.append([scores, deltas, rois])
pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i])
if self.training:
loss = {}
for stage, value in enumerate(zip(head_out_list, targets_list)):
(scores, deltas, rois), targets = value
loss_stage = self.get_loss(scores, deltas, targets, rois,
self.bbox_weight[stage])
for k, v in loss_stage.items():
loss[k + "_stage{}".format(
stage)] = v / self.num_cascade_stages
return loss, bbox_feat
else:
scores, deltas, self.refined_rois = self.get_prediction(
head_out_list)
return (deltas, scores), self.head
def _get_rois_from_boxes(self, boxes, im_shape):
rois = []
for i, boxes_per_image in enumerate(boxes):
clip_box = clip_bbox(boxes_per_image, im_shape[i])
if self.training:
keep = nonempty_bbox(clip_box)
clip_box = paddle.gather(clip_box, keep)
rois.append(clip_box)
rois_num = paddle.concat([paddle.shape(r)[0] for r in rois])
return rois, rois_num
def _get_pred_bbox(self, deltas, proposals, weights):
pred_proposals = paddle.concat(proposals) if len(
proposals) > 1 else proposals[0]
pred_bbox = delta2bbox(deltas, pred_proposals, weights)
num_prop = [p.shape[0] for p in proposals]
return pred_bbox.split(num_prop)
def get_prediction(self, head_out_list):
"""
head_out_list(List[Tensor]): scores, deltas, rois
"""
pred_list = []
scores_list = [F.softmax(head[0]) for head in head_out_list]
scores = paddle.add_n(scores_list) / self.num_cascade_stages
# Get deltas and rois from the last stage
_, deltas, rois = head_out_list[-1]
return scores, deltas, rois
def get_refined_rois(self, ):
return self.refined_rois
...@@ -291,14 +291,18 @@ class AnchorGeneratorSSD(object): ...@@ -291,14 +291,18 @@ class AnchorGeneratorSSD(object):
@register @register
@serializable @serializable
class RCNNBox(object): class RCNNBox(object):
__shared__ = ['num_classes']
def __init__(self, def __init__(self,
prior_box_var=[10., 10., 5., 5.], prior_box_var=[10., 10., 5., 5.],
code_type="decode_center_size", code_type="decode_center_size",
box_normalized=False): box_normalized=False,
num_classes=80):
super(RCNNBox, self).__init__() super(RCNNBox, self).__init__()
self.prior_box_var = prior_box_var self.prior_box_var = prior_box_var
self.code_type = code_type self.code_type = code_type
self.box_normalized = box_normalized self.box_normalized = box_normalized
self.num_classes = num_classes
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
bbox_pred, cls_prob = bbox_head_out bbox_pred, cls_prob = bbox_head_out
...@@ -322,6 +326,13 @@ class RCNNBox(object): ...@@ -322,6 +326,13 @@ class RCNNBox(object):
bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
scores = cls_prob[:, :-1] scores = cls_prob[:, :-1]
# [N*C, 4]
bbox_num_class = bbox.shape[1] // 4
bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4])
if bbox_num_class == 1:
bbox = paddle.tile(bbox, [1, self.num_classes, 1])
origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1)
origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1)
zeros = paddle.zeros_like(origin_h) zeros = paddle.zeros_like(origin_h)
......
...@@ -239,7 +239,7 @@ def roi_align(input, ...@@ -239,7 +239,7 @@ def roi_align(input,
align_out = core.ops.roi_align( align_out = core.ops.roi_align(
input, rois, rois_num, "pooled_height", pooled_height, input, rois, rois_num, "pooled_height", pooled_height,
"pooled_width", pooled_width, "spatial_scale", spatial_scale, "pooled_width", pooled_width, "spatial_scale", spatial_scale,
"sampling_ratio", sampling_ratio) #, "aligned", aligned) "sampling_ratio", sampling_ratio, "aligned", aligned)
return align_out return align_out
else: else:
...@@ -265,7 +265,7 @@ def roi_align(input, ...@@ -265,7 +265,7 @@ def roi_align(input,
"pooled_width": pooled_width, "pooled_width": pooled_width,
"spatial_scale": spatial_scale, "spatial_scale": spatial_scale,
"sampling_ratio": sampling_ratio, "sampling_ratio": sampling_ratio,
#"aligned": aligned, "aligned": aligned,
}) })
return align_out return align_out
......
...@@ -37,7 +37,7 @@ def rpn_anchor_target(anchors, ...@@ -37,7 +37,7 @@ def rpn_anchor_target(anchors,
gt_bbox = gt_boxes[i] gt_bbox = gt_boxes[i]
# Step1: match anchor and gt_bbox # Step1: match anchor and gt_bbox
matches, match_labels, matched_vals = label_box( matches, match_labels = label_box(
anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True) anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True)
# Step2: sample anchor # Step2: sample anchor
fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
...@@ -84,8 +84,7 @@ def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, ...@@ -84,8 +84,7 @@ def label_box(anchors, gt_boxes, positive_overlap, negative_overlap,
matches = matches.flatten() matches = matches.flatten()
match_labels = match_labels.flatten() match_labels = match_labels.flatten()
matched_vals = matched_vals.flatten() return matches, match_labels
return matches, match_labels, matched_vals
def subsample_labels(labels, def subsample_labels(labels,
...@@ -118,16 +117,6 @@ def subsample_labels(labels, ...@@ -118,16 +117,6 @@ def subsample_labels(labels,
return fg_inds, bg_inds return fg_inds, bg_inds
def filter_roi(rois, max_overlap):
ws = rois[:, 2] - rois[:, 0]
hs = rois[:, 3] - rois[:, 1]
valid_mask = paddle.logical_and(ws > 0, hs > 0, max_overlap < 1)
keep = paddle.nonzero(valid_mask)
if keep.numel() > 0:
return rois[keep[:, 1]]
return paddle.zeros((1, 4), dtype='float32')
def generate_proposal_target(rpn_rois, def generate_proposal_target(rpn_rois,
gt_classes, gt_classes,
gt_boxes, gt_boxes,
...@@ -137,67 +126,68 @@ def generate_proposal_target(rpn_rois, ...@@ -137,67 +126,68 @@ def generate_proposal_target(rpn_rois,
bg_thresh, bg_thresh,
num_classes, num_classes,
use_random=True, use_random=True,
is_cascade_rcnn=False, is_cascade=False,
max_overlaps=None): cascade_iou=0.5):
rois_with_gt = [] rois_with_gt = []
tgt_labels = [] tgt_labels = []
tgt_bboxes = [] tgt_bboxes = []
sampled_max_overlaps = []
tgt_gt_inds = [] tgt_gt_inds = []
new_rois_num = [] new_rois_num = []
fg_thresh = cascade_iou if is_cascade else fg_thresh
bg_thresh = cascade_iou if is_cascade else bg_thresh
for i, rpn_roi in enumerate(rpn_rois): for i, rpn_roi in enumerate(rpn_rois):
max_overlap = max_overlaps[i] if is_cascade_rcnn else None
gt_bbox = gt_boxes[i] gt_bbox = gt_boxes[i]
gt_class = gt_classes[i] gt_class = gt_classes[i]
if is_cascade_rcnn: if not is_cascade:
rpn_roi = filter_roi(rpn_roi, max_overlap) bbox = paddle.concat([rpn_roi, gt_bbox])
bbox = paddle.concat([rpn_roi, gt_bbox]) else:
bbox = rpn_roi
# Step1: label bbox
matches, match_labels, matched_vals = label_box( # Step1: label bbox
bbox, gt_bbox, fg_thresh, bg_thresh, False) matches, match_labels = label_box(bbox, gt_bbox, fg_thresh, bg_thresh,
False)
# Step2: sample bbox # Step2: sample bbox
sampled_inds, sampled_gt_classes = sample_bbox( sampled_inds, sampled_gt_classes = sample_bbox(
matches, match_labels, gt_class, batch_size_per_im, fg_fraction, matches, match_labels, gt_class, batch_size_per_im, fg_fraction,
num_classes, use_random) num_classes, use_random, is_cascade)
# Step3: make output # Step3: make output
rois_per_image = paddle.gather(bbox, sampled_inds) rois_per_image = bbox if is_cascade else paddle.gather(bbox,
sampled_gt_ind = paddle.gather(matches, sampled_inds) sampled_inds)
sampled_gt_ind = matches if is_cascade else paddle.gather(matches,
sampled_inds)
sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind) sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind)
sampled_overlap = paddle.gather(matched_vals, sampled_inds)
rois_per_image.stop_gradient = True rois_per_image.stop_gradient = True
sampled_gt_ind.stop_gradient = True sampled_gt_ind.stop_gradient = True
sampled_bbox.stop_gradient = True sampled_bbox.stop_gradient = True
sampled_overlap.stop_gradient = True
tgt_labels.append(sampled_gt_classes) tgt_labels.append(sampled_gt_classes)
tgt_bboxes.append(sampled_bbox) tgt_bboxes.append(sampled_bbox)
rois_with_gt.append(rois_per_image) rois_with_gt.append(rois_per_image)
sampled_max_overlaps.append(sampled_overlap)
tgt_gt_inds.append(sampled_gt_ind) tgt_gt_inds.append(sampled_gt_ind)
new_rois_num.append(paddle.shape(sampled_inds)[0]) new_rois_num.append(paddle.shape(sampled_inds)[0])
new_rois_num = paddle.concat(new_rois_num) new_rois_num = paddle.concat(new_rois_num)
return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num, sampled_max_overlaps return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
def sample_bbox( def sample_bbox(matches,
matches, match_labels,
match_labels, gt_classes,
gt_classes, batch_size_per_im,
batch_size_per_im, fg_fraction,
fg_fraction, num_classes,
num_classes, use_random=True,
use_random=True, ): is_cascade=False):
gt_classes = paddle.gather(gt_classes, matches) gt_classes = paddle.gather(gt_classes, matches)
gt_classes = paddle.where(match_labels == 0, gt_classes = paddle.where(match_labels == 0,
paddle.ones_like(gt_classes) * num_classes, paddle.ones_like(gt_classes) * num_classes,
gt_classes) gt_classes)
gt_classes = paddle.where(match_labels == -1, gt_classes = paddle.where(match_labels == -1,
paddle.ones_like(gt_classes) * -1, gt_classes) paddle.ones_like(gt_classes) * -1, gt_classes)
if is_cascade:
return matches, gt_classes
rois_per_image = int(batch_size_per_im) rois_per_image = int(batch_size_per_im)
fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction, fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction,
......
...@@ -58,10 +58,11 @@ class BBoxAssigner(object): ...@@ -58,10 +58,11 @@ class BBoxAssigner(object):
def __init__(self, def __init__(self,
batch_size_per_im=512, batch_size_per_im=512,
fg_fraction=.25, fg_fraction=.25,
fg_thresh=[.5, ], fg_thresh=.5,
bg_thresh=[.5, ], bg_thresh=.5,
use_random=True, use_random=True,
is_cls_agnostic=False, is_cls_agnostic=False,
cascade_iou=[0.5, 0.6, 0.7],
num_classes=80): num_classes=80):
super(BBoxAssigner, self).__init__() super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im self.batch_size_per_im = batch_size_per_im
...@@ -70,6 +71,7 @@ class BBoxAssigner(object): ...@@ -70,6 +71,7 @@ class BBoxAssigner(object):
self.bg_thresh = bg_thresh self.bg_thresh = bg_thresh
self.use_random = use_random self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic self.is_cls_agnostic = is_cls_agnostic
self.cascade_iou = cascade_iou
self.num_classes = num_classes self.num_classes = num_classes
def __call__(self, def __call__(self,
...@@ -77,22 +79,21 @@ class BBoxAssigner(object): ...@@ -77,22 +79,21 @@ class BBoxAssigner(object):
rpn_rois_num, rpn_rois_num,
inputs, inputs,
stage=0, stage=0,
max_overlap=None): is_cascade=False):
is_cascade = True if stage > 0 else False is_cascade = True if stage > 0 else False
gt_classes = inputs['gt_class'] gt_classes = inputs['gt_class']
gt_boxes = inputs['gt_bbox'] gt_boxes = inputs['gt_bbox']
# rois, tgt_labels, tgt_bboxes, tgt_gt_inds # rois, tgt_labels, tgt_bboxes, tgt_gt_inds
# new_rois_num, sampled_max_overlaps # new_rois_num
outs = generate_proposal_target( outs = generate_proposal_target(
rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im, rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im,
self.fg_fraction, self.fg_thresh[stage], self.bg_thresh[stage], self.fg_fraction, self.fg_thresh, self.bg_thresh, self.num_classes,
self.num_classes, self.use_random, is_cascade, max_overlap) self.use_random, is_cascade, self.cascade_iou[stage])
rois = outs[0] rois = outs[0]
rois_num = outs[-2] rois_num = outs[-1]
max_overlaps = outs[-1]
# tgt_labels, tgt_bboxes, tgt_gt_inds # tgt_labels, tgt_bboxes, tgt_gt_inds
targets = outs[1:4] targets = outs[1:4]
return rois, rois_num, max_overlaps, targets return rois, rois_num, targets
@register @register
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册