提交 6b09ec35 编写于 作者: Y Yang Zhang 提交者: qingqing01

Unify interface of detectors (#2503)

上级 44c2837e
...@@ -16,8 +16,6 @@ from __future__ import absolute_import ...@@ -16,8 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import OrderedDict
from paddle import fluid from paddle import fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
...@@ -64,11 +62,16 @@ class MaskRCNN(object): ...@@ -64,11 +62,16 @@ class MaskRCNN(object):
self.mask_head = mask_head self.mask_head = mask_head
self.fpn = fpn self.fpn = fpn
def train(self, feed_vars): def build(self, feed_vars, mode='train'):
im = feed_vars['image'] im = feed_vars['image']
assert mode in ['train', 'test'], "only support 'train' and 'test' mode"
if mode == 'train':
required_fields = ['gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info']
else:
required_fields = ['im_shape', 'im_info']
for var in required_fields:
assert var in feed_vars, "{} has no {} field".format(feed_vars, var)
im_info = feed_vars['im_info'] im_info = feed_vars['im_info']
gt_box = feed_vars['gt_box']
is_crowd = feed_vars['is_crowd']
body_feats = self.backbone(im) body_feats = self.backbone(im)
...@@ -76,117 +79,94 @@ class MaskRCNN(object): ...@@ -76,117 +79,94 @@ class MaskRCNN(object):
if self.fpn is not None: if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats) body_feats, spatial_scale = self.fpn.get_output(body_feats)
# rpn proposals # RPN proposals
rois = self.rpn_head.get_proposals(body_feats, im_info) rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
for var in ['gt_label', 'is_crowd', 'gt_box', 'im_info']:
assert var in feed_vars, "{} has no {}".format(feed_vars, var)
outs = self.bbox_assigner(
rpn_rois=rois,
gt_classes=feed_vars['gt_label'],
is_crowd=feed_vars['is_crowd'],
gt_boxes=feed_vars['gt_box'],
im_info=feed_vars['im_info'])
rois = outs[0]
labels_int32 = outs[1]
bbox_targets = outs[2]
bbox_inside_weights = outs[3]
bbox_outside_weights = outs[4]
if self.fpn is None: if self.fpn is None:
# in models without FPN, roi extractor only uses the last level of
# feature maps. And list(body_feats.keys())[-1] represents the name of
# last feature map.
last_feat = body_feats[list(body_feats.keys())[-1]] last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois) roi_feat = self.roi_extractor(last_feat, rois)
else: else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
loss = self.bbox_head.get_loss(roi_feat, labels_int32, bbox_targets, if mode == 'train':
bbox_inside_weights, rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_box'],
bbox_outside_weights) feed_vars['is_crowd'])
loss.update(rpn_loss)
outs = self.bbox_assigner(
assert 'gt_mask' in feed_vars, "{} has no gt_mask".format(feed_vars) rpn_rois=rois,
outs = self.mask_assigner( gt_classes=feed_vars['gt_label'],
rois=rois, is_crowd=feed_vars['is_crowd'],
gt_classes=feed_vars['gt_label'], gt_boxes=feed_vars['gt_box'],
is_crowd=feed_vars['is_crowd'], im_info=feed_vars['im_info'])
gt_segms=feed_vars['gt_mask'], rois = outs[0]
im_info=feed_vars['im_info'], labels_int32 = outs[1]
labels_int32=labels_int32)
mask_rois, roi_has_mask_int32, mask_int32 = outs loss = self.bbox_head.get_loss(roi_feat, labels_int32, *outs[2:])
if self.fpn is None: loss.update(rpn_loss)
bbox_head_feat = self.bbox_head.get_head_feat()
feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32) mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner(
rois=rois,
gt_classes=feed_vars['gt_label'],
is_crowd=feed_vars['is_crowd'],
gt_segms=feed_vars['gt_mask'],
im_info=feed_vars['im_info'],
labels_int32=labels_int32)
if self.fpn is None:
bbox_head_feat = self.bbox_head.get_head_feat()
feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32)
else:
feat = self.roi_extractor(body_feats, mask_rois, spatial_scale,
is_mask=True)
mask_loss = self.mask_head.get_loss(feat, mask_int32)
loss.update(mask_loss)
total_loss = fluid.layers.sum(list(loss.values()))
loss.update({'loss': total_loss})
return loss
else: else:
feat = self.roi_extractor(body_feats, mask_rois, spatial_scale, bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
True) feed_vars['im_shape'])
bbox_pred = bbox_pred['bbox']
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1], value=0.0, dtype='float32', persistable=False)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(last_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(body_feats, mask_rois,
spatial_scale, is_mask=True)
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return {'bbox': bbox_pred, 'mask': mask_pred}
mask_loss = self.mask_head.get_loss(feat, mask_int32) def train(self, feed_vars):
loss.update(mask_loss) return self.build(feed_vars, 'train')
total_loss = fluid.layers.sum(list(loss.values())) def eval(self, feed_vars):
loss.update({'loss': total_loss}) return self.build(feed_vars, 'test')
return loss
def test(self, feed_vars): def test(self, feed_vars):
im = feed_vars['image'] return self.build(feed_vars, 'test')
im_info = feed_vars['im_info']
im_shape = feed_vars['im_shape']
body_feats = self.backbone(im)
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
if self.fpn is None:
body_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(body_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale,
False)
bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
bbox_pred = bbox_pred['bbox']
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1], value=0.0, dtype='float32', persistable=False)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(body_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(body_feats, mask_rois,
spatial_scale, True)
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return {'bbox': bbox_pred, 'mask': mask_pred}
def eval(self, feed_vars):
self.test(feed_vars)
...@@ -43,7 +43,7 @@ class RetinaNet(object): ...@@ -43,7 +43,7 @@ class RetinaNet(object):
self.fpn = fpn self.fpn = fpn
self.retina_head = retina_head self.retina_head = retina_head
def _forward(self, feed_vars, mode='train'): def build(self, feed_vars, mode='train'):
im = feed_vars['image'] im = feed_vars['image']
im_info = feed_vars['im_info'] im_info = feed_vars['im_info']
if mode == 'train': if mode == 'train':
...@@ -69,10 +69,10 @@ class RetinaNet(object): ...@@ -69,10 +69,10 @@ class RetinaNet(object):
return pred return pred
def train(self, feed_vars): def train(self, feed_vars):
return self._forward(feed_vars, 'train') return self.build(feed_vars, 'train')
def eval(self, feed_vars): def eval(self, feed_vars):
return self._forward(feed_vars, 'test') return self.build(feed_vars, 'test')
def test(self, feed_vars): def test(self, feed_vars):
return self._forward(feed_vars, 'test') return self.build(feed_vars, 'test')
...@@ -41,7 +41,7 @@ class YOLOv3(object): ...@@ -41,7 +41,7 @@ class YOLOv3(object):
self.backbone = backbone self.backbone = backbone
self.yolo_head = yolo_head self.yolo_head = yolo_head
def _forward(self, feed_vars, mode='train'): def build(self, feed_vars, mode='train'):
im = feed_vars['image'] im = feed_vars['image']
body_feats = self.backbone(im) body_feats = self.backbone(im)
...@@ -63,10 +63,10 @@ class YOLOv3(object): ...@@ -63,10 +63,10 @@ class YOLOv3(object):
return self.yolo_head.get_prediction(body_feats, im_shape) return self.yolo_head.get_prediction(body_feats, im_shape)
def train(self, feed_vars): def train(self, feed_vars):
return self._forward(feed_vars, mode='train') return self.build(feed_vars, mode='train')
def eval(self, feed_vars): def eval(self, feed_vars):
return self._forward(feed_vars, mode='test') return self.build(feed_vars, mode='test')
def test(self, feed_vars): def test(self, feed_vars):
return self._forward(feed_vars, mode='test') return self.build(feed_vars, mode='test')
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册