未验证 提交 9b279ee3 编写于 作者: W wangguanzhong 提交者: GitHub

[Dygraph]Add cascade (#1859)

* add cascade mask

* add cascade rcnn

* update code

* remove comments
上级 1c7592a0
architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
is_cls_agnostic: true
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
cls_agnostic: true
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 4
feat_in: 256
feat_out: 256
mask_roi_extractor:
name: RoIAlign
resolution: 14
sampling_ratio: 2
share_bbox_feat: False
feat_in: 256
MaskPostProcess:
mask_resolution: 28
architecture: CascadeRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
load_static_weights: True
roi_stages: 3
# Model Achitecture
CascadeRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
is_cls_agnostic: true
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIAlign
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
cls_agnostic: true
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
......@@ -59,7 +59,7 @@ Proposal:
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
......
......@@ -66,7 +66,7 @@ Proposal:
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
......
......@@ -60,7 +60,7 @@ Proposal:
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
......
......@@ -68,7 +68,7 @@ Proposal:
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
......
......@@ -4,7 +4,6 @@ TrainReader:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
sample_transforms:
- DecodeImage: {to_rgb: true}
# check
- RandomFlipImage: {prob: 0.5}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
......
use_gpu: true
log_iter: 20
save_dir: output
snapshot_epoch: 2
snapshot_epoch: 1
_BASE_: [
'./_base_/models/cascade_mask_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/mask_fpn_reader.yml',
'./_base_/runtime.yml',
]
_BASE_: [
'./_base_/models/cascade_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/faster_fpn_reader.yml',
'./_base_/runtime.yml',
]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
import paddle
from ppdet.core.workspace import register
from .meta_arch import BaseArch
......@@ -12,115 +26,149 @@ __all__ = ['CascadeRCNN']
@register
class CascadeRCNN(BaseArch):
__category__ = 'architecture'
__shared__ = ['num_stages']
__shared__ = ['roi_stages']
__inject__ = [
'anchor',
'proposal',
'mask',
'backbone',
'neck',
'rpn_head',
'bbox_head',
'mask_head',
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
anchor,
proposal,
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
num_stages=3,
*args,
**kwargs):
super(CascadeRCNN, self).__init__(*args, **kwargs)
bbox_post_process,
neck=None,
mask=None,
mask_head=None,
mask_post_process=None,
roi_stages=3):
super(CascadeRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.mask = mask
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
self.neck = neck
self.mask = mask
self.mask_head = mask_head
self.num_stages = num_stages
self.mask_post_process = mask_post_process
self.roi_stages = roi_stages
self.with_mask = mask is not None
def model_arch(self, ):
# Backbone
bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out)
body_feats = self.backbone(self.inputs)
# Neck
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN
rpn_head_out = self.rpn_head(self.gbd)
self.gbd.update(rpn_head_out)
# rpn_head returns two list: rpn_feat, rpn_head_out
# each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor
anchor_out = self.anchor(self.gbd)
self.gbd.update(anchor_out)
self.gbd['stage'] = 0
for i in range(self.num_stages):
self.gbd.update_v('stage', i)
# anchor_out returns a list,
# each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
# Proposal RoI
# compute targets here when training
rois = None
bbox_head_out = None
max_overlap = None
self.bbox_head_list = []
rois_list = []
for i in range(self.roi_stages):
# Proposal BBox
proposal_out = self.proposal(self.gbd)
self.gbd.update({"proposal_" + str(i): proposal_out})
rois = self.proposal(
self.inputs,
self.rpn_head_out,
self.anchor_out,
i,
rois,
bbox_head_out,
max_overlap=max_overlap)
rois_list.append(rois)
max_overlap = self.proposal.get_max_overlap()
# BBox Head
bbox_head_out = self.bbox_head(self.gbd)
self.gbd.update({'bbox_head_' + str(i): bbox_head_out})
refine_bbox_out = self.proposal.refine_bbox(self.gbd)
self.gbd['proposal_' + str(i)].update(refine_bbox_out)
if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out)
# Mask
mask_out = self.mask(self.gbd)
self.gbd.update(mask_out)
# Mask Head
mask_head_out = self.mask_head(self.gbd)
self.gbd.update(mask_head_out)
if self.gbd['mode'] == 'infer':
mask_out = self.mask.post_process(self.gbd)
self.gbd.update(mask_out)
bbox_feat, bbox_head_out, _ = self.bbox_head(body_feats, rois,
spatial_scale, i)
self.bbox_head_list.append(bbox_head_out)
if self.inputs['mode'] == 'infer':
bbox_pred, bboxes = self.bbox_head.get_cascade_prediction(
self.bbox_head_list, rois_list)
self.bboxes = self.bbox_post_process(
bbox_pred,
bboxes,
self.inputs['im_shape'],
self.inputs['scale_factor'],
var_weight=3.)
if self.with_mask:
rois = rois_list[-1]
rois_has_mask_int32 = None
if self.inputs['mode'] == 'train':
bbox_targets = self.proposal.get_targets()[-1]
self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(
self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
def get_loss(self, ):
outs = {}
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.get_loss(self.gbd)
outs['loss_rpn_cls'] = rpn_cls_loss
outs['loss_rpn_reg'] = rpn_reg_loss
losses.extend([rpn_cls_loss, rpn_reg_loss])
bbox_cls_loss_list = []
bbox_reg_loss_list = []
for i in range(self.num_stages):
self.gbd.update_v('stage', i)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.get_loss(self.gbd)
bbox_cls_loss_list.append(bbox_cls_loss)
bbox_reg_loss_list.append(bbox_reg_loss)
outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss
outs['loss_bbox_reg_' + str(i)] = bbox_reg_loss
losses.extend(bbox_cls_loss_list)
losses.extend(bbox_reg_loss_list)
mask_loss = self.mask_head.get_loss(self.gbd)
outs['mask_loss'] = mask_loss
losses.append(mask_loss)
loss = fluid.layers.sum(losses)
outs['loss'] = loss
return outs
def get_pred(self, ):
outs = {
'bbox': self.gbd['predicted_bbox'].numpy(),
'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(),
'mask': self.gbd['predicted_mask'].numpy(),
'im_id': self.gbd['im_id'].numpy(),
loss = {}
# RPN loss
rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs)
loss.update(loss_rpn)
# BBox loss
bbox_targets_list = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_list,
bbox_targets_list)
loss.update(loss_bbox)
if self.with_mask:
# Mask loss
mask_targets = self.mask.get_targets()
loss_mask = self.mask_head.get_loss(self.mask_head_out,
mask_targets)
loss.update(loss_mask)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self, return_numpy=True):
bbox, bbox_num = self.bboxes
output = {
'bbox': bbox.numpy(),
'bbox_num': bbox_num.numpy(),
'im_id': self.inputs['im_id'].numpy(),
}
return inputs
if self.with_mask:
mask = self.mask_post_process(self.bboxes, self.mask_head_out,
self.inputs['im_shape'],
self.inputs['scale_factor'])
output.update(mask)
return output
......@@ -86,7 +86,7 @@ class FasterRCNN(BaseArch):
# BBox loss
bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets)
loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
......
......@@ -121,7 +121,7 @@ class MaskRCNN(BaseArch):
# BBox loss
bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets)
loss_bbox = self.bbox_head.get_loss([self.bbox_head_out], bbox_targets)
loss.update(loss_bbox)
# Mask loss
......
......@@ -166,16 +166,16 @@ class BottleNeck(nn.Layer):
name=conv_name3)
def forward(self, inputs):
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = self.branch2a(inputs)
out = self.branch2b(out)
out = self.branch2c(out)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = paddle.add(x=short, y=out)
out = paddle.add(x=out, y=short)
out = F.relu(out)
return out
......
......@@ -127,7 +127,12 @@ class Proposal(object):
rois_num_per_level=rpn_rois_num_list)
return rois_collect, rois_num_collect
def generate_proposal_target(self, inputs, rois, rois_num, stage=0):
def generate_proposal_target(self,
inputs,
rois,
rois_num,
stage=0,
max_overlap=None):
outs = self.proposal_target_generator(
rpn_rois=rois,
rpn_rois_num=rois_num,
......@@ -135,32 +140,36 @@ class Proposal(object):
is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'],
stage=stage)
stage=stage,
max_overlap=max_overlap)
rois = outs[0]
rois_num = outs[-1]
max_overlap = outs[-1]
rois_num = outs[-2]
targets = {
'labels_int32': outs[1],
'bbox_targets': outs[2],
'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4]
}
return rois, rois_num, targets
return rois, rois_num, targets, max_overlap
def refine_bbox(self, rois, bbox_delta, stage=0):
out_dim = bbox_delta.shape[1] / 4
bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = fluid.layers.slice(
def refine_bbox(self, roi, bbox_delta, stage=1):
out_dim = bbox_delta.shape[1] // 4
bbox_delta_r = paddle.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = paddle.slice(
bbox_delta_r, axes=[1], starts=[1], ends=[2])
reg_weights = [
i / stage for i in self.proposal_target_generator.bbox_reg_weights
]
refined_bbox = ops.box_coder(
prior_box=rois,
prior_box_var=self.proposal_target_generator.bbox_reg_weights[
stage],
prior_box=roi,
prior_box_var=reg_weights,
target_box=bbox_delta_s,
code_type='decode_center_size',
box_normalized=False,
axis=1)
refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4])
refined_bbox = paddle.reshape(refined_bbox, shape=[-1, 4])
return refined_bbox
def __call__(self,
......@@ -169,30 +178,26 @@ class Proposal(object):
anchor_out,
stage=0,
proposal_out=None,
bbox_head_outs=None,
refined=False):
if refined:
assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None."
return proposal_out
bbox_head_out=None,
max_overlap=None):
if stage == 0:
roi, rois_num = self.generate_proposal(inputs, rpn_head_out,
anchor_out)
self.proposals_list = []
self.targets_list = []
self.max_overlap = None
else:
bbox_delta = bbox_head_outs[stage][0]
roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1)
bbox_delta = bbox_head_out[1]
roi = self.refine_bbox(proposal_out[0], bbox_delta, stage)
rois_num = proposal_out[1]
if inputs['mode'] == 'train':
roi, rois_num, targets = self.generate_proposal_target(
inputs, roi, rois_num, stage)
roi, rois_num, targets, self.max_overlap = self.generate_proposal_target(
inputs, roi, rois_num, stage, self.max_overlap)
self.targets_list.append(targets)
self.proposals_list.append((roi, rois_num))
return roi, rois_num
def get_targets(self):
return self.targets_list
def get_proposals(self):
return self.proposals_list
def get_max_overlap(self):
return self.max_overlap
......@@ -29,39 +29,42 @@ from ..backbone.resnet import Blocks
@register
class TwoFCHead(nn.Layer):
__shared__ = ['num_stages']
__shared__ = ['roi_stages']
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1):
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, roi_stages=1):
super(TwoFCHead, self).__init__()
self.in_dim = in_dim
self.mlp_dim = mlp_dim
self.num_stages = num_stages
self.roi_stages = roi_stages
fan = in_dim * resolution * resolution
self.fc6_list = []
self.fc6_relu_list = []
self.fc7_list = []
self.fc7_relu_list = []
for stage in range(num_stages):
for stage in range(roi_stages):
fc6_name = 'fc6_{}'.format(stage)
fc7_name = 'fc7_{}'.format(stage)
lr_factor = 2**stage
fc6 = self.add_sublayer(
fc6_name,
nn.Linear(
in_dim * resolution * resolution,
mlp_dim,
weight_attr=ParamAttr(
learning_rate=lr_factor,
initializer=XavierUniform(fan_out=fan)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc6_relu = self.add_sublayer(fc6_name + 'act', ReLU())
fc7 = self.add_sublayer(
fc7_name,
nn.Linear(
mlp_dim,
mlp_dim,
weight_attr=ParamAttr(initializer=XavierUniform()),
weight_attr=ParamAttr(
learning_rate=lr_factor, initializer=XavierUniform()),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
fc7_relu = self.add_sublayer(fc7_name + 'act', ReLU())
self.fc6_list.append(fc6)
self.fc6_relu_list.append(fc6_relu)
......@@ -102,16 +105,17 @@ class BBoxFeat(nn.Layer):
super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor
self.head_feat = head_feat
self.rois_feat_list = []
def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage)
return bbox_feat, self.head_feat
return rois_feat, bbox_feat
@register
class BBoxHead(nn.Layer):
__shared__ = ['num_classes', 'num_stages']
__shared__ = ['num_classes', 'roi_stages']
__inject__ = ['bbox_feat']
def __init__(self,
......@@ -119,49 +123,65 @@ class BBoxHead(nn.Layer):
in_feat=1024,
num_classes=81,
cls_agnostic=False,
num_stages=1,
roi_stages=1,
with_pool=False,
score_stage=[0, 1, 2],
delta_stage=[2]):
super(BBoxHead, self).__init__()
self.num_classes = num_classes
self.cls_agnostic = cls_agnostic
self.delta_dim = 2 if cls_agnostic else num_classes
self.bbox_feat = bbox_feat
self.num_stages = num_stages
self.roi_stages = roi_stages
self.bbox_score_list = []
self.bbox_delta_list = []
self.roi_feat_list = [[] for i in range(roi_stages)]
self.with_pool = with_pool
self.score_stage = score_stage
self.delta_stage = delta_stage
for stage in range(num_stages):
for stage in range(roi_stages):
score_name = 'bbox_score_{}'.format(stage)
delta_name = 'bbox_delta_{}'.format(stage)
lr_factor = 2**stage
bbox_score = self.add_sublayer(
score_name,
nn.Linear(
in_feat,
1 * self.num_classes,
weight_attr=ParamAttr(initializer=Normal(
mean=0.0, std=0.01)),
weight_attr=ParamAttr(
learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
bbox_delta = self.add_sublayer(
delta_name,
nn.Linear(
in_feat,
4 * self.delta_dim,
weight_attr=ParamAttr(initializer=Normal(
mean=0.0, std=0.001)),
weight_attr=ParamAttr(
learning_rate=lr_factor,
initializer=Normal(
mean=0.0, std=0.001)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
learning_rate=2. * lr_factor, regularizer=L2Decay(0.))))
self.bbox_score_list.append(bbox_score)
self.bbox_delta_list.append(bbox_delta)
def forward(self, body_feats, rois, spatial_scale, stage=0):
bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois,
spatial_scale, stage)
bbox_head_out = []
def forward(self,
body_feats=None,
rois=None,
spatial_scale=None,
stage=0,
roi_stage=-1):
if rois is not None:
rois_feat, bbox_feat = self.bbox_feat(body_feats, rois,
spatial_scale, stage)
self.roi_feat_list[stage] = rois_feat
else:
rois_feat = self.roi_feat_list[roi_stage]
bbox_feat = self.bbox_feat.head_feat(rois_feat, stage)
if self.with_pool:
bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
......@@ -170,8 +190,8 @@ class BBoxHead(nn.Layer):
else:
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out.append((scores, deltas))
return bbox_feat, bbox_head_out, head_feat_func
bbox_head_out = (scores, deltas)
return bbox_feat, bbox_head_out, self.bbox_feat.head_feat
def _get_head_loss(self, score, delta, target):
# bbox cls
......@@ -198,38 +218,46 @@ class BBoxHead(nn.Layer):
reg_name = 'loss_bbox_reg_{}'.format(lvl)
loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
target)
loss_bbox[cls_name] = loss_bbox_cls
loss_bbox[reg_name] = loss_bbox_reg
loss_weight = 1. / 2**lvl
loss_bbox[cls_name] = loss_bbox_cls * loss_weight
loss_bbox[reg_name] = loss_bbox_reg * loss_weight
return loss_bbox
def get_prediction(self, bbox_head_out, rois):
if len(bbox_head_out) == 1:
proposal, proposal_num = rois
score, delta = bbox_head_out[0]
bbox_prob = F.softmax(score)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
else:
num_stage = len(rois)
proposal_list = []
prob_list = []
delta_list = []
for stage, (proposals, bboxhead) in zip(rois, bboxheads):
score, delta = bboxhead
proposal, proposal_num = proposals
if stage in self.score_stage:
bbox_prob = F.softmax(score)
prob_list.append(bbox_prob)
if stage in self.delta_stage:
proposal_list.append(proposal)
delta_list.append(delta)
bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0)
delta = paddle.mean(paddle.stack(delta_list), axis=0)
proposal = paddle.mean(paddle.stack(proposal_list), axis=0)
delta = paddle.reshape(delta, (-1, self.out_dim, 4))
if self.cls_agnostic:
N, C, M = delta.shape
delta = delta[:, 1:2, :]
delta = paddle.expand(delta, [N, self.num_classes, M])
proposal, proposal_num = rois
score, delta = bbox_head_out
bbox_prob = F.softmax(score)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
bbox_pred = (delta, bbox_prob)
return bbox_pred, rois
def get_cascade_prediction(self, bbox_head_out, rois):
proposal_list = []
prob_list = []
delta_list = []
for stage in range(len(rois)):
proposals = rois[stage]
bboxhead = bbox_head_out[stage]
score, delta = bboxhead
proposal, proposal_num = proposals
if stage in self.score_stage:
if stage < 2:
_, head_out, _ = self(stage=stage, roi_stage=-1)
score = head_out[0]
bbox_prob = F.softmax(score)
prob_list.append(bbox_prob)
if stage in self.delta_stage:
proposal_list.append(proposal)
delta_list.append(delta)
bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0)
delta = paddle.mean(paddle.stack(delta_list), axis=0)
proposal = paddle.mean(paddle.stack(proposal_list), axis=0)
delta = paddle.reshape(delta, (-1, self.delta_dim, 4))
if self.cls_agnostic:
N, C, M = delta.shape
delta = delta[:, 1:2, :]
delta = paddle.expand(delta, [N, self.num_classes, M])
bboxes = (proposal, proposal_num)
bbox_pred = (delta, bbox_prob)
return bbox_pred, bboxes
......@@ -158,6 +158,7 @@ class MaskHead(Layer):
stage=0,
bbox_head_feat_func=None):
bbox, bbox_num = bboxes
if bbox.shape[0] == 0:
mask_head_out = bbox
else:
......
......@@ -176,11 +176,10 @@ class ProposalTargetGenerator(object):
fg_thresh=[.5, ],
bg_thresh_hi=[.5, ],
bg_thresh_lo=[0., ],
bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]],
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
num_classes=81,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
is_cls_agnostic=False):
super(ProposalTargetGenerator, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
......@@ -191,7 +190,6 @@ class ProposalTargetGenerator(object):
self.num_classes = num_classes
self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic
self.is_cascade_rcnn = is_cascade_rcnn
def __call__(self,
rpn_rois,
......@@ -200,19 +198,25 @@ class ProposalTargetGenerator(object):
is_crowd,
gt_boxes,
im_info,
stage=0):
stage=0,
max_overlap=None):
rpn_rois = rpn_rois.numpy()
rpn_rois_num = rpn_rois_num.numpy()
gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
max_overlap = max_overlap if max_overlap is None else max_overlap.numpy(
)
reg_weights = [i / (stage + 1) for i in self.bbox_reg_weights]
is_cascade = True if stage > 0 else False
num_classes = 2 if is_cascade else self.num_classes
outs = generate_proposal_target(
rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage],
self.bbox_reg_weights[stage], self.num_classes, self.use_random,
self.is_cls_agnostic, self.is_cascade_rcnn)
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], reg_weights,
num_classes, self.use_random, self.is_cls_agnostic, is_cascade,
max_overlap)
outs = [to_tensor(v) for v in outs]
for v in outs:
v.stop_gradient = True
......@@ -268,7 +272,12 @@ class RCNNBox(object):
self.box_normalized = box_normalized
self.axis = axis
def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
def __call__(self,
bbox_head_out,
rois,
im_shape,
scale_factor,
var_weight=1.):
bbox_pred, cls_prob = bbox_head_out
roi, rois_num = rois
origin_shape = im_shape / scale_factor
......@@ -287,9 +296,10 @@ class RCNNBox(object):
origin_shape = paddle.concat(origin_shape_list)
bbox = roi / scale
prior_box_var = [i / var_weight for i in self.prior_box_var]
bbox = ops.box_coder(
prior_box=bbox,
prior_box_var=self.prior_box_var,
prior_box_var=prior_box_var,
target_box=bbox_pred,
code_type=self.code_type,
box_normalized=self.box_normalized,
......
......@@ -16,8 +16,14 @@ class BBoxPostProcess(object):
self.decode = decode
self.nms = nms
def __call__(self, head_out, rois, im_shape, scale_factor=None):
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
def __call__(self,
head_out,
rois,
im_shape,
scale_factor=None,
var_weight=1.):
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor,
var_weight)
bbox_pred, bbox_num, _ = self.nms(bboxes, score)
return bbox_pred, bbox_num
......
......@@ -72,7 +72,7 @@ def expand_bbox(bboxes, scale):
w_half *= scale
h_half *= scale
bboxes_exp = np.zeros(bboxes.shape)
bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
bboxes_exp[:, 0] = x_c - w_half
bboxes_exp[:, 2] = x_c + w_half
bboxes_exp[:, 1] = y_c - h_half
......@@ -107,18 +107,20 @@ def bbox_overlaps(bboxes1, bboxes2):
area1 = w1 * h1
area2 = w2 * h2
overlaps = np.zeros((bboxes1.shape[0], bboxes2.shape[0]))
for ind1 in range(bboxes1.shape[0]):
for ind2 in range(bboxes2.shape[0]):
inter_x1 = np.maximum(bboxes1[ind1, 0], bboxes2[ind2, 0])
inter_y1 = np.maximum(bboxes1[ind1, 1], bboxes2[ind2, 1])
inter_x2 = np.minimum(bboxes1[ind1, 2], bboxes2[ind2, 2])
inter_y2 = np.minimum(bboxes1[ind1, 3], bboxes2[ind2, 3])
inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0)
inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0)
inter_area = inter_w * inter_h
iou = inter_area * 1.0 / (area1[ind1] + area2[ind2] - inter_area)
overlaps[ind1, ind2] = iou
boxes1_x1, boxes1_y1, boxes1_x2, boxes1_y2 = np.split(bboxes1, 4, axis=1)
boxes2_x1, boxes2_y1, boxes2_x2, boxes2_y2 = np.split(bboxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(boxes1_y2, np.transpose(boxes2_y2))
all_pairs_max_ymin = np.maximum(boxes1_y1, np.transpose(boxes2_y1))
inter_h = np.maximum(all_pairs_min_ymax - all_pairs_max_ymin + 1, 0.)
all_pairs_min_xmax = np.minimum(boxes1_x2, np.transpose(boxes2_x2))
all_pairs_max_xmin = np.maximum(boxes1_x1, np.transpose(boxes2_x1))
inter_w = np.maximum(all_pairs_min_xmax - all_pairs_max_xmin + 1, 0.)
inter_area = inter_w * inter_h
union_area = np.expand_dims(area1, 1) + np.expand_dims(area2, 0)
overlaps = inter_area / (union_area - inter_area)
return overlaps
......
......@@ -90,7 +90,6 @@ def generate_rpn_anchor_target(anchors,
@jit
def label_anchor(anchors, gt_boxes):
iou = bbox_overlaps(anchors, gt_boxes)
# every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0)
gt_bbox_anchor_iou = iou[gt_bbox_anchor_inds, np.arange(iou.shape[1])]
......@@ -148,6 +147,16 @@ def sample_anchor(anchor_gt_bbox_iou,
return fg_inds, bg_inds, fg_fake_inds, fake_num
@jit
def filter_roi(rois, max_overlap):
ws = rois[:, 2] - rois[:, 0] + 1
hs = rois[:, 3] - rois[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1))[0]
if len(keep) > 0:
return rois[keep, :]
return np.zeros((1, 4)).astype('float32')
@jit
def generate_proposal_target(rpn_rois,
rpn_rois_num,
......@@ -164,42 +173,38 @@ def generate_proposal_target(rpn_rois,
class_nums=81,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
is_cascade_rcnn=False,
max_overlaps=None):
rois = []
tgt_labels = []
tgt_deltas = []
rois_inside_weights = []
rois_outside_weights = []
sampled_max_overlaps = []
new_rois_num = []
st_num = 0
end_num = 0
for im_i in range(len(rpn_rois_num)):
length = rpn_rois_num[im_i]
end_num += length
rpn_roi = rpn_rois[st_num:end_num]
max_overlap = max_overlaps[st_num:end_num] if is_cascade_rcnn else None
im_scale = im_info[im_i][2]
rpn_roi = rpn_roi / im_scale
gt_bbox = gt_boxes[im_i]
if is_cascade_rcnn:
rpn_roi = rpn_roi[gt_bbox.shape[0]:, :]
bbox = np.vstack([gt_bbox, rpn_roi])
rpn_roi = filter_roi(rpn_roi, max_overlap)
bbox = np.vstack([gt_bbox, rpn_roi]).astype('float32')
# Step1: label bbox
roi_gt_bbox_inds, roi_gt_bbox_iou, labels, = label_bbox(
roi_gt_bbox_inds, labels, max_overlap = label_bbox(
bbox, gt_bbox, gt_classes[im_i], is_crowd[im_i])
# Step2: sample bbox
if is_cascade_rcnn:
ws = bbox[:, 2] - bbox[:, 0] + 1
hs = bbox[:, 3] - bbox[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0]
bbox = bbox[keep]
fg_inds, bg_inds, fg_nums = sample_bbox(
roi_gt_bbox_iou, batch_size_per_im, fg_fraction, fg_thresh,
max_overlap, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
use_random, is_cls_agnostic, is_cascade_rcnn)
......@@ -210,10 +215,12 @@ def generate_proposal_target(rpn_rois,
sampled_labels[fg_nums:] = 0
sampled_boxes = bbox[sampled_inds]
sampled_max_overlap = max_overlap[sampled_inds]
sampled_gt_boxes = gt_bbox[roi_gt_bbox_inds[sampled_inds]]
sampled_gt_boxes[fg_nums:, :] = gt_bbox[0]
sampled_gt_boxes[fg_nums:, :] = 0
sampled_deltas = compute_bbox_targets(sampled_boxes, sampled_gt_boxes,
sampled_labels, bbox_reg_weights)
sampled_deltas[fg_nums:, :] = 0
sampled_deltas, bbox_inside_weights = expand_bbox_targets(
sampled_deltas, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array(
......@@ -228,6 +235,7 @@ def generate_proposal_target(rpn_rois,
tgt_deltas.append(sampled_deltas)
rois_inside_weights.append(bbox_inside_weights)
rois_outside_weights.append(bbox_outside_weights)
sampled_max_overlaps.append(sampled_max_overlap)
rois = np.concatenate(rois, axis=0).astype(np.float32)
tgt_labels = np.concatenate(
......@@ -237,23 +245,20 @@ def generate_proposal_target(rpn_rois,
rois_inside_weights, axis=0).astype(np.float32)
rois_outside_weights = np.concatenate(
rois_outside_weights, axis=0).astype(np.float32)
sampled_max_overlaps = np.concatenate(
sampled_max_overlaps, axis=0).astype(np.float32)
new_rois_num = np.asarray(new_rois_num, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num, sampled_max_overlaps
@jit
def label_bbox(boxes,
gt_boxes,
gt_classes,
is_crowd,
class_nums=81,
is_cascade_rcnn=False):
def label_bbox(boxes, gt_boxes, gt_classes, is_crowd, class_nums=81):
iou = bbox_overlaps(boxes, gt_boxes)
# every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums))
roi_gt_bbox_iou = np.zeros((boxes.shape[0], class_nums), dtype=np.float32)
iou_argmax = iou.argmax(axis=1)
iou_max = iou.max(axis=1)
......@@ -267,13 +272,14 @@ def label_bbox(boxes,
crowd_ind = np.where(is_crowd)[0]
roi_gt_bbox_iou[crowd_ind] = -1
max_overlap = roi_gt_bbox_iou.max(axis=1)
labels = roi_gt_bbox_iou.argmax(axis=1)
return roi_gt_bbox_inds, roi_gt_bbox_iou, labels
return roi_gt_bbox_inds, labels, max_overlap
@jit
def sample_bbox(roi_gt_bbox_iou,
def sample_bbox(max_overlap,
batch_size_per_im,
fg_fraction,
fg_thresh,
......@@ -285,27 +291,26 @@ def sample_bbox(roi_gt_bbox_iou,
is_cls_agnostic=False,
is_cascade_rcnn=False):
roi_gt_bbox_iou_max = roi_gt_bbox_iou.max(axis=1)
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
if is_cascade_rcnn:
fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0]
bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & (
roi_gt_bbox_iou_max >= bg_thresh_lo))[0]
fg_inds = np.where(max_overlap >= fg_thresh)[0]
bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
bg_thresh_lo))[0]
fg_nums = fg_inds.shape[0]
bg_nums = bg_inds.shape[0]
else:
# sampe fg
fg_inds = np.where(roi_gt_bbox_iou_max >= fg_thresh)[0]
fg_inds = np.where(max_overlap >= fg_thresh)[0]
fg_nums = np.minimum(fg_rois_per_im, fg_inds.shape[0])
if (fg_inds.shape[0] > fg_nums) and use_random:
fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False)
fg_inds = fg_inds[:fg_nums]
# sample bg
bg_inds = np.where((roi_gt_bbox_iou_max < bg_thresh_hi) & (
roi_gt_bbox_iou_max >= bg_thresh_lo))[0]
bg_inds = np.where((max_overlap < bg_thresh_hi) & (max_overlap >=
bg_thresh_lo))[0]
bg_nums = rois_per_image - fg_nums
bg_nums = np.minimum(bg_nums, bg_inds.shape[0])
if (bg_inds.shape[0] > bg_nums) and use_random:
......
......@@ -171,12 +171,7 @@ def run(FLAGS, cfg, place):
# Model Backward
loss = outputs['loss']
if ParallelEnv().nranks > 1:
loss = model.scale_loss(loss)
loss.backward()
model.apply_collective_grads()
else:
loss.backward()
loss.backward()
optimizer.step()
curr_lr = optimizer.get_lr()
lr.step()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册