未验证 提交 2ef93ad2 编写于 作者: W wangguanzhong 提交者: GitHub

Refine cascade models (#3219)

* refine cascade models
上级 19122bd5
...@@ -53,6 +53,7 @@ class CascadeMaskRCNN(object): ...@@ -53,6 +53,7 @@ class CascadeMaskRCNN(object):
bbox_assigner='CascadeBBoxAssigner', bbox_assigner='CascadeBBoxAssigner',
mask_assigner='MaskAssigner', mask_assigner='MaskAssigner',
mask_head='MaskHead', mask_head='MaskHead',
rpn_only=False,
fpn='FPN'): fpn='FPN'):
super(CascadeMaskRCNN, self).__init__() super(CascadeMaskRCNN, self).__init__()
assert fpn is not None, "cascade RCNN requires FPN" assert fpn is not None, "cascade RCNN requires FPN"
...@@ -64,6 +65,7 @@ class CascadeMaskRCNN(object): ...@@ -64,6 +65,7 @@ class CascadeMaskRCNN(object):
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.mask_assigner = mask_assigner self.mask_assigner = mask_assigner
self.mask_head = mask_head self.mask_head = mask_head
self.rpn_only = rpn_only
# Cascade local cfg # Cascade local cfg
self.cls_agnostic_bbox_reg = 2 self.cls_agnostic_bbox_reg = 2
(brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
...@@ -191,8 +193,9 @@ class CascadeMaskRCNN(object): ...@@ -191,8 +193,9 @@ class CascadeMaskRCNN(object):
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_pred = self.bbox_head.get_prediction( bbox_pred = self.bbox_head.get_prediction(
im_info, roi_feat_list, rcnn_pred_list, proposal_list, im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg) proposal_list, self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg)
bbox_pred = bbox_pred['bbox'] bbox_pred = bbox_pred['bbox']
...@@ -204,7 +207,11 @@ class CascadeMaskRCNN(object): ...@@ -204,7 +207,11 @@ class CascadeMaskRCNN(object):
cond = fluid.layers.less_than(x=bbox_size, y=size) cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var( mask_pred = fluid.layers.create_global_var(
shape=[1], value=0.0, dtype='float32', persistable=False) shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name='mask_pred')
with fluid.layers.control_flow.Switch() as switch: with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond): with switch.case(cond):
......
...@@ -72,7 +72,19 @@ class CascadeRCNN(object): ...@@ -72,7 +72,19 @@ class CascadeRCNN(object):
def build(self, feed_vars, mode='train'): def build(self, feed_vars, mode='train'):
im = feed_vars['image'] im = feed_vars['image']
assert mode in ['train', 'test'], \
"only 'train' and 'test' mode is supported"
if mode == 'train':
required_fields = [
'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
]
else:
required_fields = ['im_shape', 'im_info']
for var in required_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
im_info = feed_vars['im_info'] im_info = feed_vars['im_info']
if mode == 'train': if mode == 'train':
gt_box = feed_vars['gt_box'] gt_box = feed_vars['gt_box']
is_crowd = feed_vars['is_crowd'] is_crowd = feed_vars['is_crowd']
...@@ -92,7 +104,8 @@ class CascadeRCNN(object): ...@@ -92,7 +104,8 @@ class CascadeRCNN(object):
rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
else: else:
if self.rpn_only: if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois) im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale rois = rois / im_scale
return {'proposal': rois} return {'proposal': rois}
...@@ -143,8 +156,9 @@ class CascadeRCNN(object): ...@@ -143,8 +156,9 @@ class CascadeRCNN(object):
return loss return loss
else: else:
pred = self.bbox_head.get_prediction( pred = self.bbox_head.get_prediction(
im_info, roi_feat_list, rcnn_pred_list, proposal_list, im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg) proposal_list, self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg)
return pred return pred
def _decode_box(self, proposals, bbox_pred, curr_stage): def _decode_box(self, proposals, bbox_pred, curr_stage):
......
...@@ -274,11 +274,14 @@ class BBoxHead(object): ...@@ -274,11 +274,14 @@ class BBoxHead(object):
Get prediction bounding box in test stage. Get prediction bounding box in test stage.
Args: Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head. rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height, number of input images, each element consists of im_height,
im_width, im_scale. im_width, im_scale.
cls_score (Variable), bbox_pred(Variable): Output of get_output. im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns: Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each pred_result(Variable): Prediction result with shape [N, 6]. Each
......
...@@ -139,6 +139,7 @@ class CascadeBBoxHead(object): ...@@ -139,6 +139,7 @@ class CascadeBBoxHead(object):
def get_prediction(self, def get_prediction(self,
im_info, im_info,
im_shape,
roi_feat_list, roi_feat_list,
rcnn_pred_list, rcnn_pred_list,
proposal_list, proposal_list,
...@@ -151,6 +152,9 @@ class CascadeBBoxHead(object): ...@@ -151,6 +152,9 @@ class CascadeBBoxHead(object):
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists number of input images, each element consists
of im_height, im_width, im_scale. of im_height, im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
rois_feat_list (List): RoI feature from RoIExtractor. rois_feat_list (List): RoI feature from RoIExtractor.
rcnn_pred_list (Variable): Cascade rcnn's head's output rcnn_pred_list (Variable): Cascade rcnn's head's output
including bbox_pred and cls_score including bbox_pred and cls_score
...@@ -197,7 +201,8 @@ class CascadeBBoxHead(object): ...@@ -197,7 +201,8 @@ class CascadeBBoxHead(object):
# only use fg box delta to decode box # only use fg box delta to decode box
bbox_pred_new = fluid.layers.slice( bbox_pred_new = fluid.layers.slice(
bbox_pred_new, axes=[1], starts=[1], ends=[2]) bbox_pred_new, axes=[1], starts=[1], ends=[2])
bbox_pred_new = fluid.layers.expand(bbox_pred_new, [1, self.num_classes, 1]) bbox_pred_new = fluid.layers.expand(bbox_pred_new,
[1, self.num_classes, 1])
decoded_box = fluid.layers.box_coder( decoded_box = fluid.layers.box_coder(
prior_box=proposals_boxes, prior_box=proposals_boxes,
prior_box_var=bbox_reg_w, prior_box_var=bbox_reg_w,
...@@ -206,8 +211,7 @@ class CascadeBBoxHead(object): ...@@ -206,8 +211,7 @@ class CascadeBBoxHead(object):
box_normalized=False, box_normalized=False,
axis=1) axis=1)
# TODO: notice detectron use img.shape box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_info)
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result} return {"bbox": pred_result}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册