未验证 提交 dcf97ccd 编写于 作者: G Guanghua Yu 提交者: GitHub

adapt new im_shape im Mask R-CNN-FPN (#1760)

* adapt new im_shape im Mask R-CNN-FPN

* fix training
上级 50599e26
...@@ -17,14 +17,14 @@ TrainReader: ...@@ -17,14 +17,14 @@ TrainReader:
EvalReader: EvalReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id'] fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms: sample_transforms:
- DecodeImage: {to_rgb: true} - DecodeOp: {}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} - ResizeOp: {interp: 1, target_size: [800, 1333]}
- Permute: {channel_first: true, to_bgr: false} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: false} - PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
......
...@@ -141,7 +141,7 @@ class DecodeOp(BaseOperator): ...@@ -141,7 +141,7 @@ class DecodeOp(BaseOperator):
"image width.".format(im.shape[1], sample['w'])) "image width.".format(im.shape[1], sample['w']))
sample['w'] = im.shape[1] sample['w'] = im.shape[1]
sample['im_shape'] = np.array(im.shape[:2], dtype=np.int32) sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
sample['scale_factor'] = np.array([1., 1.], dtype=np.float32) sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return sample return sample
...@@ -666,8 +666,8 @@ class ResizeOp(BaseOperator): ...@@ -666,8 +666,8 @@ class ResizeOp(BaseOperator):
im_scale = min(target_size_min / im_size_min, im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max) target_size_max / im_size_max)
resize_h = int(im_scale * im_shape[0]) resize_h = im_scale * float(im_shape[0])
resize_w = int(im_scale * im_shape[1]) resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale im_scale_x = im_scale
im_scale_y = im_scale im_scale_y = im_scale
...@@ -678,14 +678,14 @@ class ResizeOp(BaseOperator): ...@@ -678,14 +678,14 @@ class ResizeOp(BaseOperator):
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y]) im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im sample['image'] = im
sample['im_shape'] = np.array([resize_h, resize_w], dtype=np.int32) sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample: if 'scale_factor' in sample:
scale_factor = sample['scale_factor'] scale_factor = sample['scale_factor']
sample['scale_factor'] = np.array( sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x], [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32) dtype=np.float32)
else: else:
sample['scale_factor'] = np.array( sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32) [im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox # apply bbox
...@@ -1397,8 +1397,8 @@ class RandomScaledCropOp(BaseOperator): ...@@ -1397,8 +1397,8 @@ class RandomScaledCropOp(BaseOperator):
random_dim = int(dim * random_scale) random_dim = int(dim * random_scale)
dim_max = max(h, w) dim_max = max(h, w)
scale = random_dim / dim_max scale = random_dim / dim_max
resize_w = int(round(w * scale)) resize_w = w * scale
resize_h = int(round(h * scale)) resize_h = h * scale
offset_x = int(max(0, np.random.uniform(0., resize_w - dim))) offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
offset_y = int(max(0, np.random.uniform(0., resize_h - dim))) offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
...@@ -1408,9 +1408,9 @@ class RandomScaledCropOp(BaseOperator): ...@@ -1408,9 +1408,9 @@ class RandomScaledCropOp(BaseOperator):
canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[ canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
offset_y:offset_y + dim, offset_x:offset_x + dim, :] offset_y:offset_y + dim, offset_x:offset_x + dim, :]
sample['image'] = canvas sample['image'] = canvas
sample['im_shape'] = np.array([resize_h, resize_w], dtype=np.int32) sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
scale_factor = sample['sacle_factor'] scale_factor = sample['sacle_factor']
sample['scale_factor'] = np.array( sample['scale_factor'] = np.asarray(
[scale_factor[0] * scale, scale_factor[1] * scale], [scale_factor[0] * scale, scale_factor[1] * scale],
dtype=np.float32) dtype=np.float32)
......
...@@ -96,7 +96,8 @@ class MaskRCNN(BaseArch): ...@@ -96,7 +96,8 @@ class MaskRCNN(BaseArch):
self.bbox_head_out, rois) self.bbox_head_out, rois)
# Refine bbox by the output from bbox_head at test stage # Refine bbox by the output from bbox_head at test stage
self.bboxes = self.bbox_post_process(bbox_pred, bboxes, self.bboxes = self.bbox_post_process(bbox_pred, bboxes,
self.inputs['im_info']) self.inputs['im_shape'],
self.inputs['scale_factor'])
else: else:
# Proposal RoI for Mask branch # Proposal RoI for Mask branch
# bboxes update at training stage only # bboxes update at training stage only
...@@ -134,7 +135,8 @@ class MaskRCNN(BaseArch): ...@@ -134,7 +135,8 @@ class MaskRCNN(BaseArch):
def get_pred(self, ): def get_pred(self, ):
mask = self.mask_post_process(self.bboxes, self.mask_head_out, mask = self.mask_post_process(self.bboxes, self.mask_head_out,
self.inputs['im_info']) self.inputs['im_shape'],
self.inputs['scale_factor'])
bbox, bbox_num = self.bboxes bbox, bbox_num = self.bboxes
output = { output = {
'bbox': bbox.numpy(), 'bbox': bbox.numpy(),
......
...@@ -93,6 +93,11 @@ class Proposal(object): ...@@ -93,6 +93,11 @@ class Proposal(object):
self.proposal_target_generator = proposal_target_generator self.proposal_target_generator = proposal_target_generator
def generate_proposal(self, inputs, rpn_head_out, anchor_out): def generate_proposal(self, inputs, rpn_head_out, anchor_out):
# TODO: delete im_info
try:
im_shape = inputs['im_info']
except:
im_shape = inputs['im_shape']
rpn_rois_list = [] rpn_rois_list = []
rpn_prob_list = [] rpn_prob_list = []
rpn_rois_num_list = [] rpn_rois_num_list = []
...@@ -104,7 +109,7 @@ class Proposal(object): ...@@ -104,7 +109,7 @@ class Proposal(object):
bbox_deltas=rpn_delta, bbox_deltas=rpn_delta,
anchors=anchor, anchors=anchor,
variances=var, variances=var,
im_info=inputs['im_info'], im_shape=im_shape,
mode=inputs['mode']) mode=inputs['mode'])
if len(rpn_head_out) == 1: if len(rpn_head_out) == 1:
return rpn_rois, rpn_rois_num return rpn_rois, rpn_rois_num
......
...@@ -138,7 +138,7 @@ class MaskHead(Layer): ...@@ -138,7 +138,7 @@ class MaskHead(Layer):
return mask_head_out return mask_head_out
def forward_test(self, def forward_test(self,
im_info, scale_factor,
body_feats, body_feats,
bboxes, bboxes,
bbox_feat, bbox_feat,
...@@ -149,12 +149,14 @@ class MaskHead(Layer): ...@@ -149,12 +149,14 @@ class MaskHead(Layer):
if bbox.shape[0] == 0: if bbox.shape[0] == 0:
mask_head_out = bbox mask_head_out = bbox
else: else:
im_info_expand = [] scale_factor_list = []
for idx, num in enumerate(bbox_num): for idx, num in enumerate(bbox_num):
for n in range(num): for n in range(num):
im_info_expand.append(im_info[idx, -1]) scale_factor_list.append(scale_factor[idx, 0])
im_info_expand = paddle.concat(im_info_expand) scale_factor_list = paddle.cast(
scaled_bbox = paddle.multiply(bbox[:, 2:], im_info_expand, axis=0) paddle.concat(scale_factor_list), 'float32')
scaled_bbox = paddle.multiply(
bbox[:, 2:], scale_factor_list, axis=0)
scaled_bboxes = (scaled_bbox, bbox_num) scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat, mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat,
mask_index, spatial_scale, stage) mask_index, spatial_scale, stage)
...@@ -174,8 +176,8 @@ class MaskHead(Layer): ...@@ -174,8 +176,8 @@ class MaskHead(Layer):
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat, mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage) mask_index, spatial_scale, stage)
else: else:
im_info = inputs['im_info'] scale_factor = inputs['scale_factor']
mask_head_out = self.forward_test(im_info, body_feats, bboxes, mask_head_out = self.forward_test(scale_factor, body_feats, bboxes,
bbox_feat, mask_index, bbox_feat, mask_index,
spatial_scale, stage) spatial_scale, stage)
return mask_head_out return mask_head_out
......
...@@ -16,8 +16,7 @@ import numpy as np ...@@ -16,8 +16,7 @@ import numpy as np
from numbers import Integral from numbers import Integral
import paddle import paddle
import paddle.fluid as fluid from paddle import to_tensor
from paddle.fluid.dygraph.base import to_variable
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target
from ppdet.py_op.post_process import bbox_post_process from ppdet.py_op.post_process import bbox_post_process
...@@ -86,20 +85,20 @@ class AnchorTargetGeneratorRPN(object): ...@@ -86,20 +85,20 @@ class AnchorTargetGeneratorRPN(object):
self.batch_size_per_im, self.positive_overlap, self.batch_size_per_im, self.positive_overlap,
self.negative_overlap, self.fg_fraction, self.use_random) self.negative_overlap, self.fg_fraction, self.use_random)
loc_indexes = to_variable(loc_indexes) loc_indexes = to_tensor(loc_indexes)
score_indexes = to_variable(score_indexes) score_indexes = to_tensor(score_indexes)
tgt_labels = to_variable(tgt_labels) tgt_labels = to_tensor(tgt_labels)
tgt_bboxes = to_variable(tgt_bboxes) tgt_bboxes = to_tensor(tgt_bboxes)
bbox_inside_weights = to_variable(bbox_inside_weights) bbox_inside_weights = to_tensor(bbox_inside_weights)
loc_indexes.stop_gradient = True loc_indexes.stop_gradient = True
score_indexes.stop_gradient = True score_indexes.stop_gradient = True
tgt_labels.stop_gradient = True tgt_labels.stop_gradient = True
cls_logits = fluid.layers.reshape(x=cls_logits, shape=(-1, )) cls_logits = paddle.reshape(x=cls_logits, shape=(-1, ))
bbox_pred = fluid.layers.reshape(x=bbox_pred, shape=(-1, 4)) bbox_pred = paddle.reshape(x=bbox_pred, shape=(-1, 4))
pred_cls_logits = fluid.layers.gather(cls_logits, score_indexes) pred_cls_logits = paddle.gather(cls_logits, score_indexes)
pred_bbox_pred = fluid.layers.gather(bbox_pred, loc_indexes) pred_bbox_pred = paddle.gather(bbox_pred, loc_indexes)
return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights
...@@ -131,14 +130,30 @@ class ProposalGenerator(object): ...@@ -131,14 +130,30 @@ class ProposalGenerator(object):
bbox_deltas, bbox_deltas,
anchors, anchors,
variances, variances,
im_info, im_shape,
mode='train'): mode='train'):
pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n
post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n
# TODO delete im_info
if im_shape.shape[1] > 2:
import paddle.fluid as fluid
rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals( rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals(
scores, scores,
bbox_deltas, bbox_deltas,
im_info, im_shape,
anchors,
variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=True)
else:
rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals(
scores,
bbox_deltas,
im_shape,
anchors, anchors,
variances, variances,
pre_nms_top_n=pre_nms_top_n, pre_nms_top_n=pre_nms_top_n,
...@@ -198,7 +213,7 @@ class ProposalTargetGenerator(object): ...@@ -198,7 +213,7 @@ class ProposalTargetGenerator(object):
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], self.bg_thresh_hi[stage], self.bg_thresh_lo[stage],
self.bbox_reg_weights[stage], self.num_classes, self.use_random, self.bbox_reg_weights[stage], self.num_classes, self.use_random,
self.is_cls_agnostic, self.is_cascade_rcnn) self.is_cls_agnostic, self.is_cascade_rcnn)
outs = [to_variable(v) for v in outs] outs = [to_tensor(v) for v in outs]
for v in outs: for v in outs:
v.stop_gradient = True v.stop_gradient = True
return outs return outs
...@@ -227,7 +242,7 @@ class MaskTargetGenerator(object): ...@@ -227,7 +242,7 @@ class MaskTargetGenerator(object):
rois, rois_num, labels_int32, rois, rois_num, labels_int32,
self.num_classes, self.mask_resolution) self.num_classes, self.mask_resolution)
outs = [to_variable(v) for v in outs] outs = [to_tensor(v) for v in outs]
for v in outs: for v in outs:
v.stop_gradient = True v.stop_gradient = True
return outs return outs
...@@ -260,7 +275,7 @@ class RCNNBox(object): ...@@ -260,7 +275,7 @@ class RCNNBox(object):
scale_list = [] scale_list = []
origin_shape_list = [] origin_shape_list = []
for idx in range(self.batch_size): for idx in range(self.batch_size):
scale = scale_factor[idx, :] scale = scale_factor[idx, :][0]
rois_num_per_im = rois_num[idx] rois_num_per_im = rois_num[idx]
expand_scale = paddle.expand(scale, [rois_num_per_im, 1]) expand_scale = paddle.expand(scale, [rois_num_per_im, 1])
scale_list.append(expand_scale) scale_list.append(expand_scale)
...@@ -327,7 +342,7 @@ class DecodeClipNms(object): ...@@ -327,7 +342,7 @@ class DecodeClipNms(object):
im_info.numpy(), self.keep_top_k, im_info.numpy(), self.keep_top_k,
self.score_threshold, self.nms_threshold, self.score_threshold, self.nms_threshold,
self.num_classes) self.num_classes)
outs = [to_variable(v) for v in outs] outs = [to_tensor(v) for v in outs]
for v in outs: for v in outs:
v.stop_gradient = True v.stop_gradient = True
return outs return outs
...@@ -407,7 +422,6 @@ class YOLOBox(object): ...@@ -407,7 +422,6 @@ class YOLOBox(object):
def __call__(self, yolo_head_out, anchors, im_shape, scale_factor=None): def __call__(self, yolo_head_out, anchors, im_shape, scale_factor=None):
boxes_list = [] boxes_list = []
scores_list = [] scores_list = []
im_shape = paddle.cast(im_shape, 'float32')
if scale_factor is not None: if scale_factor is not None:
origin_shape = im_shape / scale_factor origin_shape = im_shape / scale_factor
else: else:
......
...@@ -17,14 +17,7 @@ class BBoxPostProcess(object): ...@@ -17,14 +17,7 @@ class BBoxPostProcess(object):
self.nms = nms self.nms = nms
def __call__(self, head_out, rois, im_shape, scale_factor=None): def __call__(self, head_out, rois, im_shape, scale_factor=None):
# TODO: compatible for im_info bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
# remove after unify the im_shape. scale_factor
if im_shape.shape[1] > 2:
origin_shape = im_shape[:, :2]
scale_factor = im_shape[:, 2:]
else:
origin_shape = im_shape
bboxes, score = self.decode(head_out, rois, origin_shape, scale_factor)
bbox_pred, bbox_num = self.nms(bboxes, score) bbox_pred, bbox_num = self.nms(bboxes, score)
return bbox_pred, bbox_num return bbox_pred, bbox_num
...@@ -38,12 +31,12 @@ class MaskPostProcess(object): ...@@ -38,12 +31,12 @@ class MaskPostProcess(object):
self.mask_resolution = mask_resolution self.mask_resolution = mask_resolution
self.binary_thresh = binary_thresh self.binary_thresh = binary_thresh
def __call__(self, bboxes, mask_head_out, im_info): def __call__(self, bboxes, mask_head_out, im_shape, scale_factor=None):
# TODO: modify related ops for deploying # TODO: modify related ops for deploying
bboxes_np = (i.numpy() for i in bboxes) bboxes_np = (i.numpy() for i in bboxes)
mask = mask_post_process(bboxes_np, mask = mask_post_process(bboxes_np,
mask_head_out.numpy(), mask_head_out.numpy(),
im_info.numpy(), self.mask_resolution, im_shape.numpy(), scale_factor[:, 0].numpy(),
self.binary_thresh) self.mask_resolution, self.binary_thresh)
mask = {'mask': mask} mask = {'mask': mask}
return mask return mask
...@@ -10,7 +10,8 @@ import cv2 ...@@ -10,7 +10,8 @@ import cv2
def bbox_post_process(bboxes, def bbox_post_process(bboxes,
bbox_prob, bbox_prob,
bbox_deltas, bbox_deltas,
im_info, im_shape,
scale_factor,
keep_top_k=100, keep_top_k=100,
score_thresh=0.05, score_thresh=0.05,
nms_thresh=0.5, nms_thresh=0.5,
...@@ -27,14 +28,14 @@ def bbox_post_process(bboxes, ...@@ -27,14 +28,14 @@ def bbox_post_process(bboxes,
end_num += box_num end_num += box_num
boxes = bbox[st_num:end_num, :] # bbox boxes = bbox[st_num:end_num, :] # bbox
boxes = boxes / im_info[i][2] # scale boxes = boxes / scale_factor[i] # scale
bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta
bbox_delta = np.reshape(bbox_delta, (box_num, -1)) bbox_delta = np.reshape(bbox_delta, (box_num, -1))
# step1: decode # step1: decode
boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights) boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights)
# step2: clip # step2: clip
boxes = clip_bbox(boxes, im_info[i][:2] / im_info[i][2]) boxes = clip_bbox(boxes, im_shape[i][:2] / scale_factor[i])
# step3: nms # step3: nms
cls_boxes = [[] for _ in range(class_nums)] cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_prob[st_num:end_num, :] scores_n = bbox_prob[st_num:end_num, :]
...@@ -72,7 +73,12 @@ def bbox_post_process(bboxes, ...@@ -72,7 +73,12 @@ def bbox_post_process(bboxes,
@jit @jit
def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5): def mask_post_process(bboxes,
masks,
im_shape,
scale_factor,
resolution=14,
binary_thresh=0.5):
if masks.shape[0] == 0: if masks.shape[0] == 0:
return masks return masks
bbox, bbox_nums = bboxes bbox, bbox_nums = bboxes
...@@ -93,8 +99,8 @@ def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5): ...@@ -93,8 +99,8 @@ def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5):
labels_n = labels[st_num:end_num] labels_n = labels[st_num:end_num]
masks_n = masks[st_num:end_num] masks_n = masks[st_num:end_num]
im_h = int(round(im_info[i][0] / im_info[i][2])) im_h = int(round(im_shape[i][0] / scale_factor[i]))
im_w = int(round(im_info[i][1] / im_info[i][2])) im_w = int(round(im_shape[i][1] / scale_factor[i]))
boxes_n = expand_bbox(boxes_n, scale) boxes_n = expand_bbox(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32) boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册