未验证 提交 9b279ee3 编写于 作者: W wangguanzhong 提交者: GitHub

[Dygraph]Add cascade (#1859)

* add cascade mask

* add cascade rcnn

* update code

* remove comments
上级 1c7592a0
architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
is_cls_agnostic: true
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
cls_agnostic: true
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 4
feat_in: 256
feat_out: 256
mask_roi_extractor:
name: RoIAlign
resolution: 14
sampling_ratio: 2
share_bbox_feat: False
feat_in: 256
MaskPostProcess:
mask_resolution: 28
architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
is_cls_agnostic: true
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
cls_agnostic: true
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
...@@ -59,7 +59,7 @@ Proposal: ...@@ -59,7 +59,7 @@ Proposal:
proposal_target_generator: proposal_target_generator:
name: ProposalTargetGenerator name: ProposalTargetGenerator
batch_size_per_im: 512 batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,] bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,] bg_thresh_lo: [0.0,]
fg_thresh: [0.5,] fg_thresh: [0.5,]
......
...@@ -66,7 +66,7 @@ Proposal: ...@@ -66,7 +66,7 @@ Proposal:
proposal_target_generator: proposal_target_generator:
name: ProposalTargetGenerator name: ProposalTargetGenerator
batch_size_per_im: 512 batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,] bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,] bg_thresh_lo: [0.0,]
fg_thresh: [0.5,] fg_thresh: [0.5,]
......
...@@ -60,7 +60,7 @@ Proposal: ...@@ -60,7 +60,7 @@ Proposal:
proposal_target_generator: proposal_target_generator:
name: ProposalTargetGenerator name: ProposalTargetGenerator
batch_size_per_im: 512 batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,] bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,] bg_thresh_lo: [0.0,]
fg_thresh: [0.5,] fg_thresh: [0.5,]
......
...@@ -68,7 +68,7 @@ Proposal: ...@@ -68,7 +68,7 @@ Proposal:
proposal_target_generator: proposal_target_generator:
name: ProposalTargetGenerator name: ProposalTargetGenerator
batch_size_per_im: 512 batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,] bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,] bg_thresh_lo: [0.0,]
fg_thresh: [0.5,] fg_thresh: [0.5,]
......
...@@ -4,7 +4,6 @@ TrainReader: ...@@ -4,7 +4,6 @@ TrainReader:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
sample_transforms: sample_transforms:
- DecodeImage: {to_rgb: true} - DecodeImage: {to_rgb: true}
# check
- RandomFlipImage: {prob: 0.5} - RandomFlipImage: {prob: 0.5}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
......
use_gpu: true use_gpu: true
log_iter: 20 log_iter: 20
save_dir: output save_dir: output
snapshot_epoch: 2 snapshot_epoch: 1
_BASE_: [
'./_base_/models/cascade_mask_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/mask_fpn_reader.yml',
'./_base_/runtime.yml',
]
_BASE_: [
'./_base_/models/cascade_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/faster_fpn_reader.yml',
'./_base_/runtime.yml',
]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from paddle import fluid import paddle
from ppdet.core.workspace import register from ppdet.core.workspace import register
from .meta_arch import BaseArch from .meta_arch import BaseArch
...@@ -12,115 +26,149 @@ __all__ = ['CascadeRCNN'] ...@@ -12,115 +26,149 @@ __all__ = ['CascadeRCNN']
@register @register
class CascadeRCNN(BaseArch): class CascadeRCNN(BaseArch):
__category__ = 'architecture' __category__ = 'architecture'
__shared__ = ['num_stages'] __shared__ = ['roi_stages']
__inject__ = [ __inject__ = [
'anchor', 'anchor',
'proposal', 'proposal',
'mask', 'mask',
'backbone', 'backbone',
'neck',
'rpn_head', 'rpn_head',
'bbox_head', 'bbox_head',
'mask_head', 'mask_head',
'bbox_post_process',
'mask_post_process',
] ]
def __init__(self, def __init__(self,
anchor, anchor,
proposal, proposal,
mask,
backbone, backbone,
rpn_head, rpn_head,
bbox_head, bbox_head,
mask_head, bbox_post_process,
num_stages=3, neck=None,
*args, mask=None,
**kwargs): mask_head=None,
super(CascadeRCNN, self).__init__(*args, **kwargs) mask_post_process=None,
roi_stages=3):
super(CascadeRCNN, self).__init__()
self.anchor = anchor self.anchor = anchor
self.proposal = proposal self.proposal = proposal
self.mask = mask
self.backbone = backbone self.backbone = backbone
self.rpn_head = rpn_head self.rpn_head = rpn_head
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
self.neck = neck
self.mask = mask
self.mask_head = mask_head self.mask_head = mask_head
self.num_stages = num_stages self.mask_post_process = mask_post_process
self.roi_stages = roi_stages
self.with_mask = mask is not None
def model_arch(self, ): def model_arch(self, ):
# Backbone # Backbone
bb_out = self.backbone(self.gbd) body_feats = self.backbone(self.inputs)
self.gbd.update(bb_out)
# Neck
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN # RPN
rpn_head_out = self.rpn_head(self.gbd) # rpn_head returns two list: rpn_feat, rpn_head_out
self.gbd.update(rpn_head_out) # each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor # Anchor
anchor_out = self.anchor(self.gbd) # anchor_out returns a list,
self.gbd.update(anchor_out) # each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
self.gbd['stage'] = 0
for i in range(self.num_stages): # Proposal RoI
self.gbd.update_v('stage', i) # compute targets here when training
rois = None
bbox_head_out = None
max_overlap = None
self.bbox_head_list = []
rois_list = []
for i in range(self.roi_stages):
# Proposal BBox # Proposal BBox
proposal_out = self.proposal(self.gbd) rois = self.proposal(
self.gbd.update({"proposal_" + str(i): proposal_out}) self.inputs,
self.rpn_head_out,
self.anchor_out,
i,
rois,
bbox_head_out,
max_overlap=max_overlap)
rois_list.append(rois)
max_overlap = self.proposal.get_max_overlap()
# BBox Head # BBox Head
bbox_head_out = self.bbox_head(self.gbd) bbox_feat, bbox_head_out, _ = self.bbox_head(body_feats, rois,
self.gbd.update({'bbox_head_' + str(i): bbox_head_out}) spatial_scale, i)
self.bbox_head_list.append(bbox_head_out)
refine_bbox_out = self.proposal.refine_bbox(self.gbd)
self.gbd['proposal_' + str(i)].update(refine_bbox_out) if self.inputs['mode'] == 'infer':
bbox_pred, bboxes = self.bbox_head.get_cascade_prediction(
if self.gbd['mode'] == 'infer': self.bbox_head_list, rois_list)
bbox_out = self.proposal.post_process(self.gbd) self.bboxes = self.bbox_post_process(
self.gbd.update(bbox_out) bbox_pred,
bboxes,
# Mask self.inputs['im_shape'],
mask_out = self.mask(self.gbd) self.inputs['scale_factor'],
self.gbd.update(mask_out) var_weight=3.)
# Mask Head if self.with_mask:
mask_head_out = self.mask_head(self.gbd) rois = rois_list[-1]
self.gbd.update(mask_head_out) rois_has_mask_int32 = None
if self.inputs['mode'] == 'train':
if self.gbd['mode'] == 'infer': bbox_targets = self.proposal.get_targets()[-1]
mask_out = self.mask.post_process(self.gbd) self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
self.gbd.update(mask_out) bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(
self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
def get_loss(self, ): def get_loss(self, ):
outs = {} loss = {}
losses = []
# RPN loss
rpn_cls_loss, rpn_reg_loss = self.rpn_head.get_loss(self.gbd) rpn_loss_inputs = self.anchor.generate_loss_inputs(
outs['loss_rpn_cls'] = rpn_cls_loss self.inputs, self.rpn_head_out, self.anchor_out)
outs['loss_rpn_reg'] = rpn_reg_loss loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs)
losses.extend([rpn_cls_loss, rpn_reg_loss]) loss.update(loss_rpn)
bbox_cls_loss_list = [] # BBox loss
bbox_reg_loss_list = [] bbox_targets_list = self.proposal.get_targets()
for i in range(self.num_stages): loss_bbox = self.bbox_head.get_loss(self.bbox_head_list,
self.gbd.update_v('stage', i) bbox_targets_list)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.get_loss(self.gbd) loss.update(loss_bbox)
bbox_cls_loss_list.append(bbox_cls_loss)
bbox_reg_loss_list.append(bbox_reg_loss) if self.with_mask:
outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss # Mask loss
outs['loss_bbox_reg_' + str(i)] = bbox_reg_loss mask_targets = self.mask.get_targets()
losses.extend(bbox_cls_loss_list) loss_mask = self.mask_head.get_loss(self.mask_head_out,
losses.extend(bbox_reg_loss_list) mask_targets)
loss.update(loss_mask)
mask_loss = self.mask_head.get_loss(self.gbd)
outs['mask_loss'] = mask_loss total_loss = paddle.add_n(list(loss.values()))
losses.append(mask_loss) loss.update({'loss': total_loss})
return loss
loss = fluid.layers.sum(losses)
outs['loss'] = loss def get_pred(self, return_numpy=True):
return outs bbox, bbox_num = self.bboxes
output = {
def get_pred(self, ): 'bbox': bbox.numpy(),
outs = { 'bbox_num': bbox_num.numpy(),
'bbox': self.gbd['predicted_bbox'].numpy(), 'im_id': self.inputs['im_id'].numpy(),
'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(),
'mask': self.gbd['predicted_mask'].numpy(),
'im_id': self.gbd['im_id'].numpy(),
} }
return inputs
if self.with_mask:
mask = self.mask_post_process(self.bboxes, self.mask_head_out,
self.inputs['im_shape'],
self.inputs['scale_factor'])
output.update(mask)
return output
...@@ -86,7 +86,7 @@ class FasterRCNN(BaseArch): ...@@ -86,7 +86,7 @@ class FasterRCNN(BaseArch):
# BBox loss # BBox loss
bbox_targets = self.proposal.get_targets() bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets) loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox) loss.update(loss_bbox)
total_loss = paddle.add_n(list(loss.values())) total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss}) loss.update({'loss': total_loss})
......
...@@ -121,7 +121,7 @@ class MaskRCNN(BaseArch): ...@@ -121,7 +121,7 @@ class MaskRCNN(BaseArch):
# BBox loss # BBox loss
bbox_targets = self.proposal.get_targets() bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets) loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox) loss.update(loss_bbox)
# Mask loss # Mask loss
......
...@@ -166,16 +166,16 @@ class BottleNeck(nn.Layer): ...@@ -166,16 +166,16 @@ class BottleNeck(nn.Layer):
name=conv_name3) name=conv_name3)
def forward(self, inputs): def forward(self, inputs):
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = self.branch2a(inputs) out = self.branch2a(inputs)
out = self.branch2b(out) out = self.branch2b(out)
out = self.branch2c(out) out = self.branch2c(out)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = paddle.add(x=short, y=out) out = paddle.add(x=out, y=short)
out = F.relu(out) out = F.relu(out)
return out return out
......
...@@ -127,7 +127,12 @@ class Proposal(object): ...@@ -127,7 +127,12 @@ class Proposal(object):
rois_num_per_level=rpn_rois_num_list) rois_num_per_level=rpn_rois_num_list)
return rois_collect, rois_num_collect return rois_collect, rois_num_collect
def generate_proposal_target(self, inputs, rois, rois_num, stage=0): def generate_proposal_target(self,
inputs,
rois,
rois_num,
stage=0,
max_overlap=None):
outs = self.proposal_target_generator( outs = self.proposal_target_generator(
rpn_rois=rois, rpn_rois=rois,
rpn_rois_num=rois_num, rpn_rois_num=rois_num,
...@@ -135,32 +140,36 @@ class Proposal(object): ...@@ -135,32 +140,36 @@ class Proposal(object):
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'], gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'], im_info=inputs['im_info'],
stage=stage) stage=stage,
max_overlap=max_overlap)
rois = outs[0] rois = outs[0]
rois_num = outs[-1] max_overlap = outs[-1]
rois_num = outs[-2]
targets = { targets = {
'labels_int32': outs[1], 'labels_int32': outs[1],
'bbox_targets': outs[2], 'bbox_targets': outs[2],
'bbox_inside_weights': outs[3], 'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4] 'bbox_outside_weights': outs[4]
} }
return rois, rois_num, targets return rois, rois_num, targets, max_overlap
def refine_bbox(self, rois, bbox_delta, stage=0): def refine_bbox(self, roi, bbox_delta, stage=1):
out_dim = bbox_delta.shape[1] / 4 out_dim = bbox_delta.shape[1] // 4
bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4)) bbox_delta_r = paddle.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = fluid.layers.slice( bbox_delta_s = paddle.slice(
bbox_delta_r, axes=[1], starts=[1], ends=[2]) bbox_delta_r, axes=[1], starts=[1], ends=[2])
reg_weights = [
i / stage for i in self.proposal_target_generator.bbox_reg_weights
]
refined_bbox = ops.box_coder( refined_bbox = ops.box_coder(
prior_box=rois, prior_box=roi,
prior_box_var=self.proposal_target_generator.bbox_reg_weights[ prior_box_var=reg_weights,
stage],
target_box=bbox_delta_s, target_box=bbox_delta_s,
code_type='decode_center_size', code_type='decode_center_size',
box_normalized=False, box_normalized=False,
axis=1) axis=1)
refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) refined_bbox = paddle.reshape(refined_bbox, shape=[-1, 4])
return refined_bbox return refined_bbox
def __call__(self, def __call__(self,
...@@ -169,30 +178,26 @@ class Proposal(object): ...@@ -169,30 +178,26 @@ class Proposal(object):
anchor_out, anchor_out,
stage=0, stage=0,
proposal_out=None, proposal_out=None,
bbox_head_outs=None, bbox_head_out=None,
refined=False): max_overlap=None):
if refined:
assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None."
return proposal_out
if stage == 0: if stage == 0:
roi, rois_num = self.generate_proposal(inputs, rpn_head_out, roi, rois_num = self.generate_proposal(inputs, rpn_head_out,
anchor_out) anchor_out)
self.proposals_list = []
self.targets_list = [] self.targets_list = []
self.max_overlap = None
else: else:
bbox_delta = bbox_head_outs[stage][0] bbox_delta = bbox_head_out[1]
roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1) roi = self.refine_bbox(proposal_out[0], bbox_delta, stage)
rois_num = proposal_out[1] rois_num = proposal_out[1]
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
roi, rois_num, targets = self.generate_proposal_target( roi, rois_num, targets, self.max_overlap = self.generate_proposal_target(
inputs, roi, rois_num, stage) inputs, roi, rois_num, stage, self.max_overlap)
self.targets_list.append(targets) self.targets_list.append(targets)
self.proposals_list.append((roi, rois_num))
return roi, rois_num return roi, rois_num
def get_targets(self): def get_targets(self):
return self.targets_list return self.targets_list
def get_proposals(self): def get_max_overlap(self):
return self.proposals_list return self.max_overlap
...@@ -29,39 +29,42 @@ from ..backbone.resnet import Blocks ...@@ -29,39 +29,42 @@ from ..backbone.resnet import Blocks
@register @register
class TwoFCHead(nn.Layer): class TwoFCHead(nn.Layer):
__shared__ = ['num_stages'] __shared__ = ['roi_stages']
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1): def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, roi_stages=1):
super(TwoFCHead, self).__init__() super(TwoFCHead, self).__init__()
self.in_dim = in_dim self.in_dim = in_dim
self.mlp_dim = mlp_dim self.mlp_dim = mlp_dim
self.num_stages = num_stages self.roi_stages = roi_stages
fan = in_dim * resolution * resolution fan = in_dim * resolution * resolution
self.fc6_list = [] self.fc6_list = []
self.fc6_relu_list = [] self.fc6_relu_list = []
self.fc7_list = [] self.fc7_list = []
self.fc7_relu_list = [] self.fc7_relu_list = []
for stage in range(num_stages): for stage in range(roi_stages):
fc6_name = 'fc6_{}'.format(stage) fc6_name = 'fc6_{}'.format(stage)
fc7_name = 'fc7_{}'.format(stage) fc7_name = 'fc7_{}'.format(stage)
lr_factor = 2**stage
fc6 = self.add_sublayer( fc6 = self.add_sublayer(
fc6_name, fc6_name,
nn.Linear( nn.Linear(
in_dim * resolution * resolution, in_dim * resolution * resolution,
mlp_dim, mlp_dim,
weight_attr=ParamAttr( weight_attr=ParamAttr(
learning_rate=lr_factor,
initializer=XavierUniform(fan_out=fan)), initializer=XavierUniform(fan_out=fan)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))) learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU()) fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU())
fc7 = self.add_sublayer( fc7 = self.add_sublayer(
fc7_name, fc7_name,
nn.Linear( nn.Linear(
mlp_dim, mlp_dim,
mlp_dim, mlp_dim,
weight_attr=ParamAttr(initializer=XavierUniform()), weight_attr=ParamAttr(
learning_rate=lr_factor, initializer=XavierUniform()),
bias_attr=ParamAttr( bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))) learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU()) fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU())
self.fc6_list.append(fc6) self.fc6_list.append(fc6)
self.fc6_relu_list.append(fc6_relu) self.fc6_relu_list.append(fc6_relu)
...@@ -102,16 +105,17 @@ class BBoxFeat(nn.Layer): ...@@ -102,16 +105,17 @@ class BBoxFeat(nn.Layer):
super(BBoxFeat, self).__init__() super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
self.head_feat = head_feat self.head_feat = head_feat
self.rois_feat_list = []
def forward(self, body_feats, rois, spatial_scale, stage=0): def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale) rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage) bbox_feat = self.head_feat(rois_feat, stage)
return bbox_feat, self.head_feat return rois_feat, bbox_feat
@register @register
class BBoxHead(nn.Layer): class BBoxHead(nn.Layer):
__shared__ = ['num_classes', 'num_stages'] __shared__ = ['num_classes', 'roi_stages']
__inject__ = ['bbox_feat'] __inject__ = ['bbox_feat']
def __init__(self, def __init__(self,
...@@ -119,49 +123,65 @@ class BBoxHead(nn.Layer): ...@@ -119,49 +123,65 @@ class BBoxHead(nn.Layer):
in_feat=1024, in_feat=1024,
num_classes=81, num_classes=81,
cls_agnostic=False, cls_agnostic=False,
num_stages=1, roi_stages=1,
with_pool=False, with_pool=False,
score_stage=[0, 1, 2], score_stage=[0, 1, 2],
delta_stage=[2]): delta_stage=[2]):
super(BBoxHead, self).__init__() super(BBoxHead, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.cls_agnostic = cls_agnostic
self.delta_dim = 2 if cls_agnostic else num_classes self.delta_dim = 2 if cls_agnostic else num_classes
self.bbox_feat = bbox_feat self.bbox_feat = bbox_feat
self.num_stages = num_stages self.roi_stages = roi_stages
self.bbox_score_list = [] self.bbox_score_list = []
self.bbox_delta_list = [] self.bbox_delta_list = []
self.roi_feat_list = [[] for i in range(roi_stages)]
self.with_pool = with_pool self.with_pool = with_pool
self.score_stage = score_stage self.score_stage = score_stage
self.delta_stage = delta_stage self.delta_stage = delta_stage
for stage in range(num_stages): for stage in range(roi_stages):
score_name = 'bbox_score_{}'.format(stage) score_name = 'bbox_score_{}'.format(stage)
delta_name = 'bbox_delta_{}'.format(stage) delta_name = 'bbox_delta_{}'.format(stage)
lr_factor = 2**stage
bbox_score = self.add_sublayer( bbox_score = self.add_sublayer(
score_name, score_name,
nn.Linear( nn.Linear(
in_feat, in_feat,
1 * self.num_classes, 1 * self.num_classes,
weight_attr=ParamAttr(initializer=Normal( weight_attr=ParamAttr(
mean=0.0, std=0.01)), learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))) learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
bbox_delta = self.add_sublayer( bbox_delta = self.add_sublayer(
delta_name, delta_name,
nn.Linear( nn.Linear(
in_feat, in_feat,
4 * self.delta_dim, 4 * self.delta_dim,
weight_attr=ParamAttr(initializer=Normal( weight_attr=ParamAttr(
mean=0.0, std=0.001)), learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.001)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.)))) learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
self.bbox_score_list.append(bbox_score) self.bbox_score_list.append(bbox_score)
self.bbox_delta_list.append(bbox_delta) self.bbox_delta_list.append(bbox_delta)
def forward(self, body_feats, rois, spatial_scale, stage=0): def forward(self,
bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois, body_feats=None,
spatial_scale, stage) rois=None,
bbox_head_out = [] spatial_scale=None,
stage=0,
roi_stage=-1):
if rois is not None:
rois_feat, bbox_feat = self.bbox_feat(body_feats, rois,
spatial_scale, stage)
self.roi_feat_list[stage] = rois_feat
else:
rois_feat = self.roi_feat_list[roi_stage]
bbox_feat = self.bbox_feat.head_feat(rois_feat, stage)
if self.with_pool: if self.with_pool:
bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1) bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3]) bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
...@@ -170,8 +190,8 @@ class BBoxHead(nn.Layer): ...@@ -170,8 +190,8 @@ class BBoxHead(nn.Layer):
else: else:
scores = self.bbox_score_list[stage](bbox_feat) scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat) deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out.append((scores, deltas)) bbox_head_out = (scores, deltas)
return bbox_feat, bbox_head_out, head_feat_func return bbox_feat, bbox_head_out, self.bbox_feat.head_feat
def _get_head_loss(self, score, delta, target): def _get_head_loss(self, score, delta, target):
# bbox cls # bbox cls
...@@ -198,38 +218,46 @@ class BBoxHead(nn.Layer): ...@@ -198,38 +218,46 @@ class BBoxHead(nn.Layer):
reg_name = 'loss_bbox_reg_{}'.format(lvl) reg_name = 'loss_bbox_reg_{}'.format(lvl)
loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta, loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
target) target)
loss_bbox[cls_name] = loss_bbox_cls loss_weight = 1. / 2**lvl
loss_bbox[reg_name] = loss_bbox_reg loss_bbox[cls_name] = loss_bbox_cls * loss_weight
loss_bbox[reg_name] = loss_bbox_reg * loss_weight
return loss_bbox return loss_bbox
def get_prediction(self, bbox_head_out, rois): def get_prediction(self, bbox_head_out, rois):
if len(bbox_head_out) == 1: proposal, proposal_num = rois
proposal, proposal_num = rois score, delta = bbox_head_out
score, delta = bbox_head_out[0] bbox_prob = F.softmax(score)
bbox_prob = F.softmax(score) delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) bbox_pred = (delta, bbox_prob)
else: return bbox_pred, rois
num_stage = len(rois)
proposal_list = [] def get_cascade_prediction(self, bbox_head_out, rois):
prob_list = [] proposal_list = []
delta_list = [] prob_list = []
for stage, (proposals, bboxhead) in zip(rois, bboxheads): delta_list = []
score, delta = bboxhead for stage in range(len(rois)):
proposal, proposal_num = proposals proposals = rois[stage]
if stage in self.score_stage: bboxhead = bbox_head_out[stage]
bbox_prob = F.softmax(score) score, delta = bboxhead
prob_list.append(bbox_prob) proposal, proposal_num = proposals
if stage in self.delta_stage: if stage in self.score_stage:
proposal_list.append(proposal) if stage < 2:
delta_list.append(delta) _, head_out, _ = self(stage=stage, roi_stage=-1)
bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) score = head_out[0]
delta = paddle.mean(paddle.stack(delta_list), axis=0)
proposal = paddle.mean(paddle.stack(proposal_list), axis=0) bbox_prob = F.softmax(score)
delta = paddle.reshape(delta, (-1, self.out_dim, 4)) prob_list.append(bbox_prob)
if self.cls_agnostic: if stage in self.delta_stage:
N, C, M = delta.shape proposal_list.append(proposal)
delta = delta[:, 1:2, :] delta_list.append(delta)
delta = paddle.expand(delta, [N, self.num_classes, M]) bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0)
delta = paddle.mean(paddle.stack(delta_list), axis=0)
proposal = paddle.mean(paddle.stack(proposal_list), axis=0)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
if self.cls_agnostic:
N, C, M = delta.shape
delta = delta[:, 1:2, :]
delta = paddle.expand(delta, [N, self.num_classes, M])
bboxes = (proposal, proposal_num) bboxes = (proposal, proposal_num)
bbox_pred = (delta, bbox_prob) bbox_pred = (delta, bbox_prob)
return bbox_pred, bboxes return bbox_pred, bboxes
...@@ -158,6 +158,7 @@ class MaskHead(Layer): ...@@ -158,6 +158,7 @@ class MaskHead(Layer):
stage=0, stage=0,
bbox_head_feat_func=None): bbox_head_feat_func=None):
bbox, bbox_num = bboxes bbox, bbox_num = bboxes
if bbox.shape[0] == 0: if bbox.shape[0] == 0:
mask_head_out = bbox mask_head_out = bbox
else: else:
......
...@@ -176,11 +176,10 @@ class ProposalTargetGenerator(object): ...@@ -176,11 +176,10 @@ class ProposalTargetGenerator(object):
fg_thresh=[.5, ], fg_thresh=[.5, ],
bg_thresh_hi=[.5, ], bg_thresh_hi=[.5, ],
bg_thresh_lo=[0., ], bg_thresh_lo=[0., ],
bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]], bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
num_classes=81, num_classes=81,
use_random=True, use_random=True,
is_cls_agnostic=False, is_cls_agnostic=False):
is_cascade_rcnn=False):
super(ProposalTargetGenerator, self).__init__() super(ProposalTargetGenerator, self).__init__()
self.batch_size_per_im = batch_size_per_im self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction self.fg_fraction = fg_fraction
...@@ -191,7 +190,6 @@ class ProposalTargetGenerator(object): ...@@ -191,7 +190,6 @@ class ProposalTargetGenerator(object):
self.num_classes = num_classes self.num_classes = num_classes
self.use_random = use_random self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic self.is_cls_agnostic = is_cls_agnostic
self.is_cascade_rcnn = is_cascade_rcnn
def __call__(self, def __call__(self,
rpn_rois, rpn_rois,
...@@ -200,19 +198,25 @@ class ProposalTargetGenerator(object): ...@@ -200,19 +198,25 @@ class ProposalTargetGenerator(object):
is_crowd, is_crowd,
gt_boxes, gt_boxes,
im_info, im_info,
stage=0): stage=0,
max_overlap=None):
rpn_rois = rpn_rois.numpy() rpn_rois = rpn_rois.numpy()
rpn_rois_num = rpn_rois_num.numpy() rpn_rois_num = rpn_rois_num.numpy()
gt_classes = gt_classes.numpy() gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy() gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
im_info = im_info.numpy() im_info = im_info.numpy()
max_overlap = max_overlap if max_overlap is None else max_overlap.numpy(
)
reg_weights = [i / (stage + 1) for i in self.bbox_reg_weights]
is_cascade = True if stage > 0 else False
num_classes = 2 if is_cascade else self.num_classes
outs = generate_proposal_target( outs = generate_proposal_target(
rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info, rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], reg_weights,
self.bbox_reg_weights[stage], self.num_classes, self.use_random, num_classes, self.use_random, self.is_cls_agnostic, is_cascade,
self.is_cls_agnostic, self.is_cascade_rcnn) max_overlap)
outs = [to_tensor(v) for v in outs] outs = [to_tensor(v) for v in outs]
for v in outs: for v in outs:
v.stop_gradient = True v.stop_gradient = True
...@@ -268,7 +272,12 @@ class RCNNBox(object): ...@@ -268,7 +272,12 @@ class RCNNBox(object):
self.box_normalized = box_normalized self.box_normalized = box_normalized
self.axis = axis self.axis = axis
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): def __call__(self,
bbox_head_out,
rois,
im_shape,
scale_factor,
var_weight=1.):
bbox_pred, cls_prob = bbox_head_out bbox_pred, cls_prob = bbox_head_out
roi, rois_num = rois roi, rois_num = rois
origin_shape = im_shape / scale_factor origin_shape = im_shape / scale_factor
...@@ -287,9 +296,10 @@ class RCNNBox(object): ...@@ -287,9 +296,10 @@ class RCNNBox(object):
origin_shape = paddle.concat(origin_shape_list) origin_shape = paddle.concat(origin_shape_list)
bbox = roi / scale bbox = roi / scale
prior_box_var = [i / var_weight for i in self.prior_box_var]
bbox = ops.box_coder( bbox = ops.box_coder(
prior_box=bbox, prior_box=bbox,
prior_box_var=self.prior_box_var, prior_box_var=prior_box_var,
target_box=bbox_pred, target_box=bbox_pred,
code_type=self.code_type, code_type=self.code_type,
box_normalized=self.box_normalized, box_normalized=self.box_normalized,
......
...@@ -16,8 +16,14 @@ class BBoxPostProcess(object): ...@@ -16,8 +16,14 @@ class BBoxPostProcess(object):
self.decode = decode self.decode = decode
self.nms = nms self.nms = nms
def __call__(self, head_out, rois, im_shape, scale_factor=None): def __call__(self,
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor) head_out,
rois,
im_shape,
scale_factor=None,
var_weight=1.):
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor,
var_weight)
bbox_pred, bbox_num, _ = self.nms(bboxes, score) bbox_pred, bbox_num, _ = self.nms(bboxes, score)
return bbox_pred, bbox_num return bbox_pred, bbox_num
......
...@@ -72,7 +72,7 @@ def expand_bbox(bboxes, scale): ...@@ -72,7 +72,7 @@ def expand_bbox(bboxes, scale):
w_half *= scale w_half *= scale
h_half *= scale h_half *= scale
bboxes_exp = np.zeros(bboxes.shape) bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
bboxes_exp[:, 0] = x_c - w_half bboxes_exp[:, 0] = x_c - w_half
bboxes_exp[:, 2] = x_c + w_half bboxes_exp[:, 2] = x_c + w_half
bboxes_exp[:, 1] = y_c - h_half bboxes_exp[:, 1] = y_c - h_half
...@@ -107,18 +107,20 @@ def bbox_overlaps(bboxes1, bboxes2): ...@@ -107,18 +107,20 @@ def bbox_overlaps(bboxes1, bboxes2):
area1 = w1 * h1 area1 = w1 * h1
area2 = w2 * h2 area2 = w2 * h2
overlaps = np.zeros((bboxes1.shape[0], bboxes2.shape[0])) boxes1_x1, boxes1_y1, boxes1_x2, boxes1_y2 = np.split(bboxes1, 4, axis=1)
for ind1 in range(bboxes1.shape[0]): boxes2_x1, boxes2_y1, boxes2_x2, boxes2_y2 = np.split(bboxes2, 4, axis=1)
for ind2 in range(bboxes2.shape[0]):
inter_x1 = np.maximum(bboxes1[ind1, 0], bboxes2[ind2, 0]) all_pairs_min_ymax = np.minimum(boxes1_y2, np.transpose(boxes2_y2))
inter_y1 = np.maximum(bboxes1[ind1, 1], bboxes2[ind2, 1]) all_pairs_max_ymin = np.maximum(boxes1_y1, np.transpose(boxes2_y1))
inter_x2 = np.minimum(bboxes1[ind1, 2], bboxes2[ind2, 2]) inter_h = np.maximum(all_pairs_min_ymax - all_pairs_max_ymin + 1, 0.)
inter_y2 = np.minimum(bboxes1[ind1, 3], bboxes2[ind2, 3]) all_pairs_min_xmax = np.minimum(boxes1_x2, np.transpose(boxes2_x2))
inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) all_pairs_max_xmin = np.maximum(boxes1_x1, np.transpose(boxes2_x1))
inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) inter_w = np.maximum(all_pairs_min_xmax - all_pairs_max_xmin + 1, 0.)
inter_area = inter_w * inter_h
iou = inter_area * 1.0 / (area1[ind1] + area2[ind2] - inter_area) inter_area = inter_w * inter_h
overlaps[ind1, ind2] = iou
union_area = np.expand_dims(area1, 1) + np.expand_dims(area2, 0)
overlaps = inter_area / (union_area - inter_area)
return overlaps return overlaps
......
...@@ -90,7 +90,6 @@ def generate_rpn_anchor_target(anchors, ...@@ -90,7 +90,6 @@ def generate_rpn_anchor_target(anchors,
@jit @jit
def label_anchor(anchors, gt_boxes): def label_anchor(anchors, gt_boxes):
iou = bbox_overlaps(anchors, gt_boxes) iou = bbox_overlaps(anchors, gt_boxes)
# every gt's anchor's index # every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0) gt_bbox_anchor_inds = iou.argmax(axis=0)
gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])] gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])]
...@@ -148,6 +147,16 @@ def sample_anchor(anchor_gt_bbox_iou, ...@@ -148,6 +147,16 @@ def sample_anchor(anchor_gt_bbox_iou,
return fg_inds, bg_inds, fg_fake_inds, fake_num return fg_inds, bg_inds, fg_fake_inds, fake_num
@jit
def filter_roi(rois, max_overlap):
ws = rois[:, 2] - rois[:, 0] + 1
hs = rois[:, 3] - rois[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1))[0]
if len(keep) > 0:
return rois[keep, :]
return np.zeros((1, 4)).astype('float32')
@jit @jit
def generate_proposal_target(rpn_rois, def generate_proposal_target(rpn_rois,
rpn_rois_num, rpn_rois_num,
...@@ -164,42 +173,38 @@ def generate_proposal_target(rpn_rois, ...@@ -164,42 +173,38 @@ def generate_proposal_target(rpn_rois,
class_nums=81, class_nums=81,
use_random=True, use_random=True,
is_cls_agnostic=False, is_cls_agnostic=False,
is_cascade_rcnn=False): is_cascade_rcnn=False,
max_overlaps=None):
rois = [] rois = []
tgt_labels = [] tgt_labels = []
tgt_deltas = [] tgt_deltas = []
rois_inside_weights = [] rois_inside_weights = []
rois_outside_weights = [] rois_outside_weights = []
sampled_max_overlaps = []
new_rois_num = [] new_rois_num = []
st_num = 0 st_num = 0
end_num = 0 end_num = 0
for im_i in range(len(rpn_rois_num)): for im_i in range(len(rpn_rois_num)):
length = rpn_rois_num[im_i] length = rpn_rois_num[im_i]
end_num += length end_num += length
rpn_roi = rpn_rois[st_num:end_num] rpn_roi = rpn_rois[st_num:end_num]
max_overlap = max_overlaps[st_num:end_num] if is_cascade_rcnn else None
im_scale = im_info[im_i][2] im_scale = im_info[im_i][2]
rpn_roi = rpn_roi / im_scale rpn_roi = rpn_roi / im_scale
gt_bbox = gt_boxes[im_i] gt_bbox = gt_boxes[im_i]
if is_cascade_rcnn: if is_cascade_rcnn:
rpn_roi = rpn_roi[gt_bbox.shape[0]:, :] rpn_roi = filter_roi(rpn_roi, max_overlap)
bbox = np.vstack([gt_bbox, rpn_roi]) bbox = np.vstack([gt_bbox, rpn_roi]).astype('float32')
# Step1: label bbox # Step1: label bbox
roi_gt_bbox_inds, roi_gt_bbox_iou, labels, = label_bbox( roi_gt_bbox_inds, labels, max_overlap = label_bbox(
bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i]) bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i])
# Step2: sample bbox # Step2: sample bbox
if is_cascade_rcnn:
ws = bbox[:, 2] - bbox[:, 0] + 1
hs = bbox[:, 3] - bbox[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0]
bbox = bbox[keep]
fg_inds, bg_inds, fg_nums = sample_bbox( fg_inds, bg_inds, fg_nums = sample_bbox(
roi_gt_bbox_iou, batch_size_per_im, fg_fraction, fg_thresh, max_overlap, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
use_random, is_cls_agnostic, is_cascade_rcnn) use_random, is_cls_agnostic, is_cascade_rcnn)
...@@ -210,10 +215,12 @@ def generate_proposal_target(rpn_rois, ...@@ -210,10 +215,12 @@ def generate_proposal_target(rpn_rois,
sampled_labels[fg_nums:] = 0 sampled_labels[fg_nums:] = 0
sampled_boxes = bbox[sampled_inds] sampled_boxes = bbox[sampled_inds]
sampled_max_overlap = max_overlap[sampled_inds]
sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]] sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]]
sampled_gt_boxes[fg_nums:, :] = gt_bbox[0] sampled_gt_boxes[fg_nums:, :] = 0
sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes, sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes,
sampled_labels, bbox_reg_weights) sampled_labels, bbox_reg_weights)
sampled_deltas[fg_nums:, :] = 0
sampled_deltas, bbox_inside_weights = expand_bbox_targets( sampled_deltas, bbox_inside_weights = expand_bbox_targets(
sampled_deltas, class_nums, is_cls_agnostic) sampled_deltas, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array( bbox_outside_weights = np.array(
...@@ -228,6 +235,7 @@ def generate_proposal_target(rpn_rois, ...@@ -228,6 +235,7 @@ def generate_proposal_target(rpn_rois,
tgt_deltas.append(sampled_deltas) tgt_deltas.append(sampled_deltas)
rois_inside_weights.append(bbox_inside_weights) rois_inside_weights.append(bbox_inside_weights)
rois_outside_weights.append(bbox_outside_weights) rois_outside_weights.append(bbox_outside_weights)
sampled_max_overlaps.append(sampled_max_overlap)
rois = np.concatenate(rois, axis=0).astype(np.float32) rois = np.concatenate(rois, axis=0).astype(np.float32)
tgt_labels = np.concatenate( tgt_labels = np.concatenate(
...@@ -237,23 +245,20 @@ def generate_proposal_target(rpn_rois, ...@@ -237,23 +245,20 @@ def generate_proposal_target(rpn_rois,
rois_inside_weights, axis=0).astype(np.float32) rois_inside_weights, axis=0).astype(np.float32)
rois_outside_weights = np.concatenate( rois_outside_weights = np.concatenate(
rois_outside_weights, axis=0).astype(np.float32) rois_outside_weights, axis=0).astype(np.float32)
sampled_max_overlaps = np.concatenate(
sampled_max_overlaps, axis=0).astype(np.float32)
new_rois_num = np.asarray(new_rois_num, np.int32) new_rois_num = np.asarray(new_rois_num, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num, sampled_max_overlaps
@jit @jit
def label_bbox(boxes, def label_bbox(boxes, gt_boxes, gt_classes, is_crowd, class_nums=81):
gt_boxes,
gt_classes,
is_crowd,
class_nums=81,
is_cascade_rcnn=False):
iou = bbox_overlaps(boxes, gt_boxes) iou = bbox_overlaps(boxes, gt_boxes)
# every roi's gt box's index # every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums)) roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums), dtype=np.float32)
iou_argmax = iou.argmax(axis=1) iou_argmax = iou.argmax(axis=1)
iou_max = iou.max(axis=1) iou_max = iou.max(axis=1)
...@@ -267,13 +272,14 @@ def label_bbox(boxes, ...@@ -267,13 +272,14 @@ def label_bbox(boxes,
crowd_ind = np.where(is_crowd)[0] crowd_ind = np.where(is_crowd)[0]
roi_gt_bbox_iou[crowd_ind] = -1 roi_gt_bbox_iou[crowd_ind] = -1
max_overlap = roi_gt_bbox_iou.max(axis=1)
labels = roi_gt_bbox_iou.argmax(axis=1) labels = roi_gt_bbox_iou.argmax(axis=1)
return roi_gt_bbox_inds, roi_gt_bbox_iou, labels return roi_gt_bbox_inds, labels, max_overlap
@jit @jit
def sample_bbox(roi_gt_bbox_iou, def sample_bbox(max_overlap,
batch_size_per_im, batch_size_per_im,
fg_fraction, fg_fraction,
fg_thresh, fg_thresh,
...@@ -285,27 +291,26 @@ def sample_bbox(roi_gt_bbox_iou, ...@@ -285,27 +291,26 @@ def sample_bbox(roi_gt_bbox_iou,
is_cls_agnostic=False, is_cls_agnostic=False,
is_cascade_rcnn=False): is_cascade_rcnn=False):
roi_gt_bbox_iou_max = roi_gt_bbox_iou.max(axis=1)
rois_per_image = int(batch_size_per_im) rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
if is_cascade_rcnn: if is_cascade_rcnn:
fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0] fg_inds = np.where(max_overlap >= fg_thresh)[0]
bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & ( bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
roi_gt_bbox_iou_max >= bg_thresh_lo))[0] bg_thresh_lo))[0]
fg_nums = fg_inds.shape[0] fg_nums = fg_inds.shape[0]
bg_nums = bg_inds.shape[0] bg_nums = bg_inds.shape[0]
else: else:
# sampe fg # sampe fg
fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0] fg_inds = np.where(max_overlap >= fg_thresh)[0]
fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0]) fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0])
if (fg_inds.shape[0] > fg_nums) and use_random: if (fg_inds.shape[0] > fg_nums) and use_random:
fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False) fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False)
fg_inds = fg_inds[:fg_nums] fg_inds = fg_inds[:fg_nums]
# sample bg # sample bg
bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & ( bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
roi_gt_bbox_iou_max >= bg_thresh_lo))[0] bg_thresh_lo))[0]
bg_nums = rois_per_image - fg_nums bg_nums = rois_per_image - fg_nums
bg_nums = np.minimum(bg_nums, bg_inds.shape[0]) bg_nums = np.minimum(bg_nums, bg_inds.shape[0])
if (bg_inds.shape[0] > bg_nums) and use_random: if (bg_inds.shape[0] > bg_nums) and use_random:
......
...@@ -171,12 +171,7 @@ def run(FLAGS, cfg, place): ...@@ -171,12 +171,7 @@ def run(FLAGS, cfg, place):
# Model Backward # Model Backward
loss = outputs['loss'] loss = outputs['loss']
if ParallelEnv().nranks > 1: loss.backward()
loss = model.scale_loss(loss)
loss.backward()
model.apply_collective_grads()
else:
loss.backward()
optimizer.step() optimizer.step()
curr_lr = optimizer.get_lr() curr_lr = optimizer.get_lr()
lr.step() lr.step()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册