提交 3a877a6f 编写于 作者: W wangguanzhong 提交者: GitHub

Refine cascade models (#3219)

* refine cascade models
上级 b8069475
......@@ -53,6 +53,7 @@ class CascadeMaskRCNN(object):
bbox_assigner='CascadeBBoxAssigner',
mask_assigner='MaskAssigner',
mask_head='MaskHead',
rpn_only=False,
fpn='FPN'):
super(CascadeMaskRCNN, self).__init__()
assert fpn is not None, "cascade RCNN requires FPN"
......@@ -64,6 +65,7 @@ class CascadeMaskRCNN(object):
self.bbox_head = bbox_head
self.mask_assigner = mask_assigner
self.mask_head = mask_head
self.rpn_only = rpn_only
# Cascade local cfg
self.cls_agnostic_bbox_reg = 2
(brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
......@@ -191,8 +193,9 @@ class CascadeMaskRCNN(object):
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_pred = self.bbox_head.get_prediction(
im_info, roi_feat_list, rcnn_pred_list, proposal_list,
self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
proposal_list, self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg)
bbox_pred = bbox_pred['bbox']
......@@ -204,7 +207,11 @@ class CascadeMaskRCNN(object):
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1], value=0.0, dtype='float32', persistable=False)
shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name='mask_pred')
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
......
......@@ -72,7 +72,19 @@ class CascadeRCNN(object):
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
assert mode in ['train', 'test'], \
"only 'train' and 'test' mode is supported"
if mode == 'train':
required_fields = [
'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
]
else:
required_fields = ['im_shape', 'im_info']
for var in required_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
im_info = feed_vars['im_info']
if mode == 'train':
gt_box = feed_vars['gt_box']
is_crowd = feed_vars['is_crowd']
......@@ -92,7 +104,8 @@ class CascadeRCNN(object):
rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
......@@ -143,8 +156,9 @@ class CascadeRCNN(object):
return loss
else:
pred = self.bbox_head.get_prediction(
im_info, roi_feat_list, rcnn_pred_list, proposal_list,
self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
proposal_list, self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg)
return pred
def _decode_box(self, proposals, bbox_pred, curr_stage):
......
......@@ -274,11 +274,14 @@ class BBoxHead(object):
Get prediction bounding box in test stage.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height,
im_width, im_scale.
cls_score (Variable), bbox_pred(Variable): Output of get_output.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
......
......@@ -139,6 +139,7 @@ class CascadeBBoxHead(object):
def get_prediction(self,
im_info,
im_shape,
roi_feat_list,
rcnn_pred_list,
proposal_list,
......@@ -151,6 +152,9 @@ class CascadeBBoxHead(object):
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists
of im_height, im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
rois_feat_list (List): RoI feature from RoIExtractor.
rcnn_pred_list (Variable): Cascade rcnn's head's output
including bbox_pred and cls_score
......@@ -197,7 +201,8 @@ class CascadeBBoxHead(object):
# only use fg box delta to decode box
bbox_pred_new = fluid.layers.slice(
bbox_pred_new, axes=[1], starts=[1], ends=[2])
bbox_pred_new = fluid.layers.expand(bbox_pred_new, [1, self.num_classes, 1])
bbox_pred_new = fluid.layers.expand(bbox_pred_new,
[1, self.num_classes, 1])
decoded_box = fluid.layers.box_coder(
prior_box=proposals_boxes,
prior_box_var=bbox_reg_w,
......@@ -206,8 +211,7 @@ class CascadeBBoxHead(object):
box_normalized=False,
axis=1)
# TODO: notice detectron use img.shape
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_info)
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册