diff --git a/ppdet/py_op/__init__.py b/ppdet/py_op/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ceb1cef677e14329a66c492bd652a48632d4119 --- /dev/null +++ b/ppdet/py_op/__init__.py @@ -0,0 +1,4 @@ +from .bbox import * +from .mask import * +from .target import * +from .post_process import * diff --git a/ppdet/py_op/bbox.py b/ppdet/py_op/bbox.py new file mode 100755 index 0000000000000000000000000000000000000000..83b68a78222cf553e84ba52e52586d7b3aefd944 --- /dev/null +++ b/ppdet/py_op/bbox.py @@ -0,0 +1,261 @@ +import numpy as np +from numba import jit + + +@jit +def bbox2delta(bboxes1, bboxes2, weights): + ex_w = bboxes1[:, 2] - bboxes1[:, 0] + 1 + ex_h = bboxes1[:, 3] - bboxes1[:, 1] + 1 + ex_ctr_x = bboxes1[:, 0] + 0.5 * ex_w + ex_ctr_y = bboxes1[:, 1] + 0.5 * ex_h + + gt_w = bboxes2[:, 2] - bboxes2[:, 0] + 1 + gt_h = bboxes2[:, 3] - bboxes2[:, 1] + 1 + gt_ctr_x = bboxes2[:, 0] + 0.5 * gt_w + gt_ctr_y = bboxes2[:, 1] + 0.5 * gt_h + + dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0] + dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1] + dw = (np.log(gt_w / ex_w)) / weights[2] + dh = (np.log(gt_h / ex_h)) / weights[3] + + deltas = np.vstack([dx, dy, dw, dh]).transpose() + return deltas + + +@jit +def delta2bbox(deltas, boxes, weights, bbox_clip=4.13): + if boxes.shape[0] == 0: + return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) + boxes = boxes.astype(deltas.dtype, copy=False) + + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = weights + dx = deltas[:, 0::4] * wx + dy = deltas[:, 1::4] * wy + dw = deltas[:, 2::4] * ww + dh = deltas[:, 3::4] * wh + + # Prevent sending too large values into np.exp() + dw = np.minimum(dw, bbox_clip) + dh = np.minimum(dh, bbox_clip) + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 + # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 + + return pred_boxes + + +@jit +def expand_bbox(bboxes, scale): + w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5 + h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5 + x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5 + y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + bboxes_exp = np.zeros(bboxes.shape) + bboxes_exp[:, 0] = x_c - w_half + bboxes_exp[:, 2] = x_c + w_half + bboxes_exp[:, 1] = y_c - h_half + bboxes_exp[:, 3] = y_c + h_half + + return bboxes_exp + + +@jit +def clip_bbox(boxes, im_shape): + assert boxes.shape[1] % 4 == 0, \ + 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( + boxes.shape[1] + ) + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +@jit +def bbox_overlaps(bboxes1, bboxes2): + w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0) + h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0) + w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0) + h2 = np.maximum(bboxes2[:, 3] - bboxes2[:, 1] + 1, 0) + area1 = w1 * h1 + area2 = w2 * h2 + + overlaps = np.zeros((bboxes1.shape[0], bboxes2.shape[0])) + for ind1 in range(bboxes1.shape[0]): + for ind2 in range(bboxes2.shape[0]): + inter_x1 = np.maximum(bboxes1[ind1, 0], bboxes2[ind2, 0]) + inter_y1 = np.maximum(bboxes1[ind1, 1], bboxes2[ind2, 1]) + inter_x2 = np.minimum(bboxes1[ind1, 2], bboxes2[ind2, 2]) + inter_y2 = np.minimum(bboxes1[ind1, 3], bboxes2[ind2, 3]) + inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) + inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) + inter_area = inter_w * inter_h + iou = inter_area * 1.0 / (area1[ind1] + area2[ind2] - inter_area) + overlaps[ind1, ind2] = iou + return overlaps + + +@jit +def nms(dets, thresh): + if dets.shape[0] == 0: + return [] + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + ndets = dets.shape[0] + suppressed = np.zeros((ndets), dtype=np.int) + + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return np.where(suppressed == 0)[0] + + +def nms_with_decode(bboxes, + bbox_probs, + bbox_deltas, + im_info, + keep_top_k=100, + score_thresh=0.05, + nms_thresh=0.5, + class_nums=81, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]): + bboxes_num = [0, bboxes.shape[0]] + bboxes_v = np.array(bboxes) + bbox_probs_v = np.array(bbox_probs) + bbox_deltas_v = np.array(bbox_deltas) + variance_v = np.array(bbox_reg_weights) + im_results = [[] for _ in range(len(bboxes_num) - 1)] + new_bboxes_num = [0] + for i in range(len(bboxes_num) - 1): + start = bboxes_num[i] + end = bboxes_num[i + 1] + if start == end: + continue + + bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta + rois_n = bboxes_v[start:end, :] # box + rois_n = rois_n / im_info[i][2] # scale + rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v) + rois_n = clip_bbox(rois_n, im_info[i][:2] / im_info[i][2]) + cls_boxes = [[] for _ in range(class_nums)] + scores_n = bbox_probs_v[start:end, :] + for j in range(1, class_nums): + inds = np.where(scores_n[:, j] > score_thresh)[0] + scores_j = scores_n[inds, j] + rois_j = rois_n[inds, j * 4:(j + 1) * 4] + dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( + np.float32, copy=False) + keep = nms(dets_j, nms_thresh) + nms_dets = dets_j[keep, :] + #add labels + label = np.array([j for _ in range(len(keep))]) + nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( + np.float32, copy=False) + cls_boxes[j] = nms_dets + + # Limit to max_per_image detections **over all classes** + image_scores = np.hstack( + [cls_boxes[j][:, 1] for j in range(1, class_nums)]) + if len(image_scores) > keep_top_k: + image_thresh = np.sort(image_scores)[-keep_top_k] + for j in range(1, class_nums): + keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] + cls_boxes[j] = cls_boxes[j][keep, :] + im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) + im_results[i] = im_results_n + new_bboxes_num.append(len(im_results_n) + new_bboxes_num[-1]) + labels = im_results_n[:, 0] + scores = im_results_n[:, 1] + boxes = im_results_n[:, 2:] + im_results = np.vstack([im_results[k] for k in range(len(bboxes_num) - 1)]) + new_bboxes_num = np.array(new_bboxes_num) + return new_bboxes_num, im_results + + +@jit +def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights): + assert bboxes1.shape[0] == bboxes2.shape[0] + assert bboxes1.shape[1] == 4 + assert bboxes2.shape[1] == 4 + + targets = np.zeros(bboxes1.shape) + bbox_reg_weights = np.asarray(bbox_reg_weights) + targets = bbox2delta( + bboxes1=bboxes1, bboxes2=bboxes2, weights=bbox_reg_weights) + + return np.hstack([labels[:, np.newaxis], targets]).astype( + np.float32, copy=False) + + +@jit +def expand_bbox_targets(bbox_targets_input, + class_nums=81, + is_cls_agnostic=False): + class_labels = bbox_targets_input[:, 0] + fg_inds = np.where(class_labels > 0)[0] + if not is_cls_agnostic: + class_nums = 2 + bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) + bbox_inside_weights = np.zeros(bbox_targets.shape) + for ind in fg_inds: + class_label = int(class_labels[ind]) if not is_cls_agnostic else 1 + start_ind = class_label * 4 + end_ind = class_label * 4 + 4 + bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] + bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) + return bbox_targets, bbox_inside_weights diff --git a/ppdet/py_op/mask.py b/ppdet/py_op/mask.py new file mode 100755 index 0000000000000000000000000000000000000000..07ff76a9a4e34d67b826d5254ce6fb11160083f8 --- /dev/null +++ b/ppdet/py_op/mask.py @@ -0,0 +1,202 @@ +import six +import math +import numpy as np +from numba import jit + + +@jit +def decode(cnts, m): + v = 0 + mask = [] + for j in range(m): + for k in range(cnts[j]): + mask.append(v) + v = 1 - v + return mask + + +#@jit +def poly2mask(xy, k, h, w): + scale = 5. + x = [int(scale * p + 0.5) for p in xy[::2]] + x = x + [x[0]] + y = [int(scale * p + 0.5) for p in xy[1::2]] + y = y + [y[0]] + m = sum([ + int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + int(1) + for j in range(k) + ]) + u, v = [], [] + for j in range(k): + xs = x[j] + xe = x[j + 1] + ys = y[j] + ye = y[j + 1] + dx = abs(xe - xs) + dy = abs(ys - ye) + flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye) + if flip: + xs, xe = xe, xs + ys, ye = ye, ys + + if dx >= dy: + if (dx == 0): + assert ye - ys == 0 + + s = 0 if dx == 0 else float(ye - ys) / dx + else: + if (dy == 0): + assert xe - xs == 0 + s = 0 if dy == 0 else float(xe - xs) / dy + + if dx >= dy: + ts = [dx - d if flip else d for d in range(dx + 1)] + u.extend([xs + t for t in ts]) + v.extend([int(ys + s * t + .5) for t in ts]) + else: + ts = [dy - d if flip else d for d in range(dy + 1)] + v.extend([t + ys for t in ts]) + u.extend([int(xs + s * t + .5) for t in ts]) + + k = len(u) + x = np.zeros((k), np.int) + y = np.zeros((k), np.int) + m = 0 + for j in six.moves.xrange(1, k): + if u[j] != u[j - 1]: + xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1)) + xd = (xd + .5) / scale - .5 + if (math.floor(xd) != xd or xd < 0 or xd > (w - 1)): + continue + yd = float(v[j] if v[j] < v[j - 1] else v[j - 1]) + yd = (yd + .5) / scale - .5 + yd = math.ceil(0 if yd < 0 else (h if yd > h else yd)) + x[m] = int(xd) + y[m] = int(yd) + m += 1 + k = m + a = [int(x[i] * h + y[i]) for i in range(k)] + a.append(h * w) + a.sort() + b = [0] + a[:len(a) - 1] + a = [c - d for (c, d) in zip(a, b)] + + k += 1 + b = [0 for i in range(k)] + b[0] = a[0] + m, j = 1, 1 + while (j < k): + if a[j] > 0: + b[m] = a[j] + m += 1 + j += 1 + else: + j += 1 + if (j < k): + b[m - 1] += a[j] + j += 1 + mask = decode(b, m) + mask = np.array(mask, dtype=np.int).reshape((w, h)) + mask = mask.transpose((1, 0)) + return mask + + +def polys_to_boxes(polys): + """Convert a list of polygons into an array of tight bounding boxes.""" + boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) + for j in range(len(polys)): + x_min, y_min = 10000000, 10000000 + x_max, y_max = 0, 0 + for i in range(len(polys[j])): + poly = polys[j][i] + x0 = min(min(p[::2]) for p in poly) + x_min = min(x0, x_min) + y0 = min(min(p[1::2]) for p in poly) + y_min = min(y0, y_min) + x1 = max(max(p[::2]) for p in poly) + x_max = max(x_max, x1) + y1 = max(max(p[1::2]) for p in poly) + y_max = max(y1, y_max) + boxes_from_polys[j, :] = [x_min, y_min, x_max, y_max] + return boxes_from_polys + + +@jit +def bbox_overlaps_mask(boxes, query_boxes): + N = boxes.shape[0] + K = query_boxes.shape[0] + overlaps = np.zeros((N, K), dtype=boxes.dtype) + for k in range(K): + box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\ + (query_boxes[k, 3] - query_boxes[k, 1] + 1) + for n in range(N): + iw = min(boxes[n, 2], query_boxes[k, 2]) -\ + max(boxes[n, 0], query_boxes[k, 0]) + 1 + if iw > 0: + ih = min(boxes[n, 3], query_boxes[k, 3]) -\ + max(boxes[n, 1], query_boxes[k, 1]) + 1 + if ih > 0: + ua = float( + (boxes[n, 2] - boxes[n, 0] + 1) *\ + (boxes[n, 3] - boxes[n, 1] + 1) +\ + box_area - iw * ih) + overlaps[n, k] = iw * ih / ua + return overlaps + + +@jit +def polys_to_mask_wrt_box(polygons, box, M): + """Convert from the COCO polygon segmentation format to a binary mask + encoded as a 2D array of data type numpy.float32. The polygon segmentation + is understood to be enclosed in the given box and rasterized to an M x M + mask. The resulting mask is therefore of shape (M, M). + """ + w = box[2] - box[0] + h = box[3] - box[1] + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + polygons_norm = [] + i = 0 + for poly in polygons: + p = np.array(poly, dtype=np.float32) + p = p.reshape(-1) + p[0::2] = (p[0::2] - box[0]) * M / w + p[1::2] = (p[1::2] - box[1]) * M / h + polygons_norm.append(p) + + mask = [] + for polygons in polygons_norm: + assert polygons.shape[0] % 2 == 0, polygons.shape + k = polygons.shape[0] // 2 + + one_msk = poly2mask(polygons, k, M, M) + mask.append(one_msk) + + mask = np.array(mask) + # Flatten in case polygons was a list + mask = np.sum(mask, axis=0) + mask = np.array(mask > 0, dtype=np.float32) + return mask + + +@jit +def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): + """Expand masks from shape (#masks, resolution ** 2) + to (#masks, #classes * resolution ** 2) to encode class + specific mask targets. + """ + assert masks.shape[0] == mask_class_labels.shape[0] + # Target values of -1 are "don't care" / ignore labels + mask_targets = -np.ones( + (masks.shape[0], num_classes * resolution**2), dtype=np.int32) + for i in range(masks.shape[0]): + cls = int(mask_class_labels[i]) + start = resolution**2 * cls + end = start + resolution**2 + # Ignore background instance + # (only happens when there is no fg samples in an image) + if cls > 0: + mask_targets[i, start:end] = masks[i, :] + + return mask_targets diff --git a/ppdet/py_op/post_process.py b/ppdet/py_op/post_process.py new file mode 100755 index 0000000000000000000000000000000000000000..bcbb027caac6d460c395942b0154a487f50e3d12 --- /dev/null +++ b/ppdet/py_op/post_process.py @@ -0,0 +1,185 @@ +import six +import os +import numpy as np +from numba import jit +from .bbox import delta2bbox, clip_bbox, expand_bbox, nms + + +def bbox_post_process(bboxes, + bbox_probs, + bbox_deltas, + im_info, + keep_top_k=100, + score_thresh=0.05, + nms_thresh=0.5, + class_nums=81, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]): + bbox_nums = [0, bboxes.shape[0]] + bboxes_v = np.array(bboxes) + bbox_probs_v = np.array(bbox_probs) + bbox_deltas_v = np.array(bbox_deltas) + variance_v = np.array(bbox_reg_weights) + new_bboxes = [[] for _ in range(len(bbox_nums) - 1)] + new_bbox_nums = [0] + for i in range(len(bbox_nums) - 1): + start = bbox_nums[i] + end = bbox_nums[i + 1] + if start == end: + continue + + bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta + rois_n = bboxes_v[start:end, :] # box + rois_n = rois_n / im_info[i][2] # scale + rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v) + rois_n = clip_bbox(rois_n, im_info[i][:2] / im_info[i][2]) + cls_boxes = [[] for _ in range(class_nums)] + scores_n = bbox_probs_v[start:end, :] + for j in range(1, class_nums): + inds = np.where(scores_n[:, j] > score_thresh)[0] + scores_j = scores_n[inds, j] + rois_j = rois_n[inds, j * 4:(j + 1) * 4] + dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( + np.float32, copy=False) + keep = nms(dets_j, nms_thresh) + nms_dets = dets_j[keep, :] + #add labels + label = np.array([j for _ in range(len(keep))]) + nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( + np.float32, copy=False) + cls_boxes[j] = nms_dets + + # Limit to max_per_image detections **over all classes** + image_scores = np.hstack( + [cls_boxes[j][:, 1] for j in range(1, class_nums)]) + if len(image_scores) > keep_top_k: + image_thresh = np.sort(image_scores)[-keep_top_k] + for j in range(1, class_nums): + keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] + cls_boxes[j] = cls_boxes[j][keep, :] + new_bboxes_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) + new_bboxes[i] = new_bboxes_n + new_bbox_nums.append(len(new_bboxes_n) + new_bbox_nums[-1]) + labels = new_bboxes_n[:, 0] + scores = new_bboxes_n[:, 1] + boxes = new_bboxes_n[:, 2:] + new_bboxes = np.vstack([new_bboxes[k] for k in range(len(bbox_nums) - 1)]) + new_bbox_nums = np.array(new_bbox_nums) + return new_bbox_nums, new_bboxes + + +@jit +def mask_post_process(bbox_nums, bboxes, masks, im_info): + bboxes = np.array(bboxes) + M = cfg.resolution + scale = (M + 2.0) / M + masks_v = np.array(masks) + boxes = bboxes[:, 2:] + labels = bboxes[:, 0] + segms_results = [[] for _ in range(len(bbox_nums) - 1)] + sum = 0 + for i in range(len(bbox_nums) - 1): + bboxes_n = bboxes[bbox_nums[i]:bbox_nums[i + 1]] + cls_segms = [] + masks_n = masks_v[bbox_nums[i]:bbox_nums[i + 1]] + boxes_n = boxes[bbox_nums[i]:bbox_nums[i + 1]] + labels_n = labels[bbox_nums[i]:bbox_nums[i + 1]] + im_h = int(round(im_info[i][0] / im_info[i][2])) + im_w = int(round(im_info[i][1] / im_info[i][2])) + boxes_n = expand_boxes(boxes_n, scale) + boxes_n = boxes_n.astype(np.int32) + padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) + for j in range(len(bboxes_n)): + class_id = int(labels_n[j]) + padded_mask[1:-1, 1:-1] = masks_n[j, class_id, :, :] + + ref_box = boxes_n[j, :] + w = ref_box[2] - ref_box[0] + 1 + h = ref_box[3] - ref_box[1] + 1 + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + mask = cv2.resize(padded_mask, (w, h)) + mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8) + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + + x_0 = max(ref_box[0], 0) + x_1 = min(ref_box[2] + 1, im_w) + y_0 = max(ref_box[1], 0) + y_1 = min(ref_box[3] + 1, im_h) + im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[ + 1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])] + sum += im_mask.sum() + rle = mask_util.encode( + np.array( + im_mask[:, :, np.newaxis], order='F'))[0] + cls_segms.append(rle) + segms_results[i] = np.array(cls_segms)[:, np.newaxis] + segms_results = np.vstack([segms_results[k] for k in range(len(lod) - 1)]) + bboxes = np.hstack([segms_results, bboxes]) + return bboxes[:, :3] + + +@jit +def get_det_res(bbox_nums, + bbox, + image_id, + image_shape, + num_id_to_cat_id_map, + batch_size=1): + det_res = [] + bbox_v = np.array(bbox) + if bbox_v.shape == ( + 1, + 1, ): + return dts_res + assert (len(bbox_nums) == batch_size + 1), \ + "Error bbox_nums Tensor offset dimension. bbox_nums({}) vs. batch_size({})"\ + .format(len(bbox_nums), batch_size) + k = 0 + for i in range(batch_size): + dt_num_this_img = bbox_nums[i + 1] - bbox_nums[i] + image_id = int(image_id[i][0]) + image_width = int(image_shape[i][1]) #int(data[i][-1][1]) + image_height = int(image_shape[i][2]) #int(data[i][-1][2]) + for j in range(dt_num_this_img): + dt = bbox_v[k] + k = k + 1 + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + category_id = num_id_to_cat_id_map[num_id] + w = xmax - xmin + 1 + h = ymax - ymin + 1 + bbox = [xmin, ymin, w, h] + dt_res = { + 'image_id': image_id, + 'category_id': category_id, + 'bbox': bbox, + 'score': score + } + det_res.append(dt_res) + return det_res + + +@jit +def get_seg_res(mask_nums, mask, image_id, num_id_to_cat_id_map, batch_size=1): + seg_res = [] + mask_v = np.array(mask) + k = 0 + for i in range(batch_size): + image_id = int(image_id[i][0]) + dt_num_this_img = mask_nums[i + 1] - mask_nums[i] + for j in range(dt_num_this_img): + dt = mask_v[k] + k = k + 1 + sg, num_id, score = dt.tolist() + cat_id = num_id_to_cat_id_map[num_id] + if six.PY3: + if 'counts' in sg: + sg['counts'] = sg['counts'].decode("utf8") + sg_res = { + 'image_id': image_id, + 'category_id': cat_id, + 'segmentation': sg, + 'score': score + } + seg_res.append(sg_res) + return seg_res diff --git a/ppdet/py_op/post_processing.py b/ppdet/py_op/post_processing.py new file mode 100755 index 0000000000000000000000000000000000000000..841b500aee568898063ccb6613e300f327c7d89e --- /dev/null +++ b/ppdet/py_op/post_processing.py @@ -0,0 +1,187 @@ +import six +import os +import numpy as np +from numba import jit +from .bbox import nms + + +@jit +def box_decoder(deltas, boxes, weights, bbox_clip=4.13): + if boxes.shape[0] == 0: + return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) + boxes = boxes.astype(deltas.dtype, copy=False) + + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = weights + dx = deltas[:, 0::4] * wx + dy = deltas[:, 1::4] * wy + dw = deltas[:, 2::4] * ww + dh = deltas[:, 3::4] * wh + + # Prevent sending too large values into np.exp() + dw = np.minimum(dw, bbox_clip) + dh = np.minimum(dh, bbox_clip) + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 + # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 + + return pred_boxes + + +@jit +def clip_tiled_boxes(boxes, im_shape): + """Clip boxes to image boundaries. im_shape is [height, width] and boxes + has shape (N, 4 * num_tiled_boxes).""" + assert boxes.shape[1] % 4 == 0, \ + 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( + boxes.shape[1] + ) + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +#@jit +def get_nmsed_box(rpn_rois, + confs, + locs, + class_nums, + im_info, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + score_thresh=0.05, + nms_thresh=0.5, + detections_per_im=100): + box_nums = [0, rpn_rois.shape[0]] + variance_v = np.array(bbox_reg_weights) + rpn_rois_v = np.array(rpn_rois) + confs_v = np.array(confs) + locs_v = np.array(locs) + + im_results = [[] for _ in range(len(box_nums) - 1)] + new_box_nums = [0] + for i in range(len(box_nums) - 1): + start = box_nums[i] + end = box_nums[i + 1] + if start == end: + continue + + locs_n = locs_v[start:end, :] # box delta + rois_n = rpn_rois_v[start:end, :] # box + rois_n = rois_n / im_info[i][2] # scale + rois_n = box_decoder(locs_n, rois_n, variance_v) + rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2]) + cls_boxes = [[] for _ in range(class_nums)] + scores_n = confs_v[start:end, :] + for j in range(1, class_nums): + inds = np.where(scores_n[:, j] > TEST.score_thresh)[0] + scores_j = scores_n[inds, j] + rois_j = rois_n[inds, j * 4:(j + 1) * 4] + dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( + np.float32, copy=False) + keep = nms(dets_j, TEST.nms_thresh) + nms_dets = dets_j[keep, :] + #add labels + label = np.array([j for _ in range(len(keep))]) + nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( + np.float32, copy=False) + cls_boxes[j] = nms_dets + + # Limit to max_per_image detections **over all classes** + image_scores = np.hstack( + [cls_boxes[j][:, 1] for j in range(1, class_nums)]) + if len(image_scores) > detections_per_im: + image_thresh = np.sort(image_scores)[-detections_per_im] + for j in range(1, class_nums): + keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] + cls_boxes[j] = cls_boxes[j][keep, :] + im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) + im_results[i] = im_results_n + new_box_nums.append(len(im_results_n) + new_box_nums[-1]) + labels = im_results_n[:, 0] + scores = im_results_n[:, 1] + boxes = im_results_n[:, 2:] + im_results = np.vstack([im_results[k] for k in range(len(box_nums) - 1)]) + return new_box_nums, im_results + + +@jit +def get_dt_res(batch_size, box_nums, nmsed_out, data, num_id_to_cat_id_map): + dts_res = [] + nmsed_out_v = np.array(nmsed_out) + if nmsed_out_v.shape == ( + 1, + 1, ): + return dts_res + assert (len(box_nums) == batch_size + 1), \ + "Error Tensor offset dimension. Box Nums({}) vs. batch_size({})"\ + .format(len(box_nums), batch_size) + k = 0 + for i in range(batch_size): + dt_num_this_img = box_nums[i + 1] - box_nums[i] + image_id = int(data[i][-1]) + image_width = int(data[i][1][1]) + image_height = int(data[i][1][2]) + for j in range(dt_num_this_img): + dt = nmsed_out_v[k] + k = k + 1 + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + category_id = num_id_to_cat_id_map[num_id] + w = xmax - xmin + 1 + h = ymax - ymin + 1 + bbox = [xmin, ymin, w, h] + dt_res = { + 'image_id': image_id, + 'category_id': category_id, + 'bbox': bbox, + 'score': score + } + dts_res.append(dt_res) + return dts_res + + +@jit +def get_segms_res(batch_size, box_nums, segms_out, data, num_id_to_cat_id_map): + segms_res = [] + segms_out_v = np.array(segms_out) + k = 0 + for i in range(batch_size): + dt_num_this_img = box_nums[i + 1] - box_nums[i] + image_id = int(data[i][-1]) + for j in range(dt_num_this_img): + dt = segms_out_v[k] + k = k + 1 + segm, num_id, score = dt.tolist() + cat_id = num_id_to_cat_id_map[num_id] + if six.PY3: + if 'counts' in segm: + segm['counts'] = segm['counts'].decode("utf8") + segm_res = { + 'image_id': image_id, + 'category_id': cat_id, + 'segmentation': segm, + 'score': score + } + segms_res.append(segm_res) + return segms_res diff --git a/ppdet/py_op/target.py b/ppdet/py_op/target.py new file mode 100755 index 0000000000000000000000000000000000000000..04329681a77da3f24d7a4991dbaba6bc29bf5a37 --- /dev/null +++ b/ppdet/py_op/target.py @@ -0,0 +1,398 @@ +import six +import math +import numpy as np +from numba import jit +from .bbox import * +from .mask import * + + +@jit +def generate_rpn_anchor_target(anchor_box, + gt_boxes, + is_crowd, + im_info, + rpn_straddle_thresh, + rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random=True): + anchor_num = anchor_box.shape[0] + batch_size = gt_boxes.shape[0] + + for i in range(batch_size): + im_height = im_info[i][0] + im_width = im_info[i][1] + im_scale = im_info[i][2] + if rpn_straddle_thresh >= 0: + # Only keep anchors inside the image by a margin of straddle_thresh + inds_inside = np.where( + (anchor_box[:, 0] >= -rpn_straddle_thresh + ) & (anchor_box[:, 1] >= -rpn_straddle_thresh) & ( + anchor_box[:, 2] < im_width + rpn_straddle_thresh) & ( + anchor_box[:, 3] < im_height + rpn_straddle_thresh))[0] + # keep only inside anchors + inside_anchors = anchor_box[inds_inside, :] + else: + inds_inside = np.arange(anchor_box.shape[0]) + inside_anchors = anchor_box + gt_boxes_slice = gt_boxes[i] * im_scale + is_crowd_slice = is_crowd[i] + + not_crowd_inds = np.where(is_crowd_slice == 0)[0] + gt_boxes_slice = gt_boxes_slice[not_crowd_inds] + iou = bbox_overlaps(inside_anchors, gt_boxes_slice) + + loc_inds, score_inds, labels, gt_inds, bbox_inside_weight = _sample_anchor( + iou, rpn_batch_size_per_im, rpn_positive_overlap, + rpn_negative_overlap, rpn_fg_fraction, use_random) + # unmap to all anchor + loc_inds = inds_inside[loc_inds] + score_inds = inds_inside[score_inds] + sampled_anchor = anchor_box[loc_inds] + sampled_gt = gt_boxes_slice[gt_inds] + box_deltas = bbox2delta(sampled_anchor, sampled_gt, [1., 1., 1., 1.]) + + if i == 0: + loc_indexes = loc_inds + score_indexes = score_inds + tgt_labels = labels + tgt_bboxes = box_deltas + bbox_inside_weights = bbox_inside_weight + else: + loc_indexes = np.concatenate( + [loc_indexes, loc_inds + i * anchor_num]) + score_indexes = np.concatenate( + [score_indexes, score_inds + i * anchor_num]) + tgt_labels = np.concatenate([tgt_labels, labels]) + tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) + bbox_inside_weights = np.vstack([bbox_inside_weights, \ + bbox_inside_weight]) + tgt_labels = tgt_labels.astype('float32') + tgt_bboxes = tgt_bboxes.astype('float32') + return loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights + + +@jit +def _sample_anchor(anchor_by_gt_overlap, + rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random=True): + + anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) + anchor_to_gt_max = anchor_by_gt_overlap[np.arange( + anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] + + gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) + gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange( + anchor_by_gt_overlap.shape[1])] + anchors_with_max_overlap = np.where( + anchor_by_gt_overlap == gt_to_anchor_max)[0] + + labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1 + labels[anchors_with_max_overlap] = 1 + labels[anchor_to_gt_max >= rpn_positive_overlap] = 1 + + num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im) + fg_inds = np.where(labels == 1)[0] + if len(fg_inds) > num_fg and use_random: + disable_inds = np.random.choice( + fg_inds, size=(len(fg_inds) - num_fg), replace=False) + else: + disable_inds = fg_inds[num_fg:] + + labels[disable_inds] = -1 + fg_inds = np.where(labels == 1)[0] + + num_bg = rpn_batch_size_per_im - np.sum(labels == 1) + bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0] + if len(bg_inds) > num_bg and use_random: + enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] + else: + enable_inds = bg_inds[:num_bg] + + fg_fake_inds = np.array([], np.int32) + fg_value = np.array([fg_inds[0]], np.int32) + fake_num = 0 + for bg_id in enable_inds: + if bg_id in fg_inds: + fake_num += 1 + fg_fake_inds = np.hstack([fg_fake_inds, fg_value]) + labels[enable_inds] = 0 + + fg_inds = np.where(labels == 1)[0] + bg_inds = np.where(labels == 0)[0] + + loc_index = np.hstack([fg_fake_inds, fg_inds]) + score_index = np.hstack([fg_inds, bg_inds]) + labels = labels[score_index] + + gt_inds = anchor_to_gt_argmax[loc_index] + + bbox_inside_weight = np.zeros((len(loc_index), 4), dtype=np.float32) + bbox_inside_weight[fake_num:, :] = 1 + return loc_index, score_index, labels, gt_inds, bbox_inside_weight + + +@jit +def generate_proposal_target(rpn_rois, + rpn_rois_nums, + gt_classes, + is_crowd, + gt_boxes, + im_info, + batch_size_per_im, + fg_fraction, + fg_thresh, + bg_thresh_hi, + bg_thresh_lo, + bbox_reg_weights, + class_nums=81, + use_random=True, + is_cls_agnostic=False, + is_cascade_rcnn=False): + + rois = [] + labels_int32 = [] + bbox_targets = [] + bbox_inside_weights = [] + bbox_outside_weights = [] + rois_nums = [] + batch_size = gt_boxes.shape[0] + # TODO: modify here + # rpn_rois = rpn_rois.reshape(batch_size, -1, 4) + st_num = 0 + print("debug: ", rpn_rois_nums) + for im_i in range(len(rpn_rois_nums)): + rpn_rois_num = rpn_rois_nums[im_i] + frcn_blobs = _sample_rois( + rpn_rois[st_num:rpn_rois_num], gt_classes[im_i], is_crowd[im_i], + gt_boxes[im_i], im_info[im_i], batch_size_per_im, fg_fraction, + fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, + use_random, is_cls_agnostic, is_cascade_rcnn) + st_num = rpn_rois_num + + rois.append(frcn_blobs['rois']) + labels_int32.append(frcn_blobs['labels_int32']) + bbox_targets.append(frcn_blobs['bbox_targets']) + bbox_inside_weights.append(frcn_blobs['bbox_inside_weights']) + bbox_outside_weights.append(frcn_blobs['bbox_outside_weights']) + rois_nums.append(frcn_blobs['rois'].shape[0]) + + rois = np.concatenate(rois, axis=0).astype(np.float32) + bbox_labels = np.concatenate( + labels_int32, axis=0).astype(np.int32).reshape(-1, 1) + bbox_gts = np.concatenate(bbox_targets, axis=0).astype(np.float32) + bbox_inside_weights = np.concatenate( + bbox_inside_weights, axis=0).astype(np.float32) + bbox_outside_weights = np.concatenate( + bbox_outside_weights, axis=0).astype(np.float32) + rois_nums = np.asarray(rois_nums, np.int32) + + return rois, bbox_labels, bbox_gts, bbox_inside_weights, bbox_outside_weights, rois_nums + + +@jit +def _sample_rois(rpn_rois, + gt_classes, + is_crowd, + gt_boxes, + im_info, + batch_size_per_im, + fg_fraction, + fg_thresh, + bg_thresh_hi, + bg_thresh_lo, + bbox_reg_weights, + class_nums, + use_random=True, + is_cls_agnostic=False, + is_cascade_rcnn=False): + rois_per_image = int(batch_size_per_im) + fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) + + # Roidb + im_scale = im_info[2] + inv_im_scale = 1. / im_scale + rpn_rois = rpn_rois * inv_im_scale + if is_cascade_rcnn: + rpn_rois = rpn_rois[gt_boxes.shape[0]:, :] + boxes = np.vstack([gt_boxes, rpn_rois]) + gt_overlaps = np.zeros((boxes.shape[0], class_nums)) + box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) + if len(gt_boxes) > 0: + proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes) + overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) + overlaps_max = proposal_to_gt_overlaps.max(axis=1) + # Boxes which with non-zero overlap with gt boxes + overlapped_boxes_ind = np.where(overlaps_max > 0)[0].astype('int32') + overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[ + overlapped_boxes_ind]].astype('int32') + gt_overlaps[overlapped_boxes_ind, + overlapped_boxes_gt_classes] = overlaps_max[ + overlapped_boxes_ind] + box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ + overlapped_boxes_ind] + + crowd_ind = np.where(is_crowd)[0] + gt_overlaps[crowd_ind] = -1 + + max_overlaps = gt_overlaps.max(axis=1) + max_classes = gt_overlaps.argmax(axis=1) + + # Cascade RCNN Decode Filter + if is_cascade_rcnn: + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws > 0) & (hs > 0))[0] + boxes = boxes[keep] + fg_inds = np.where(max_overlaps >= fg_thresh)[0] + bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= + bg_thresh_lo))[0] + fg_rois_per_this_image = fg_inds.shape[0] + bg_rois_per_this_image = bg_inds.shape[0] + else: + # Foreground + fg_inds = np.where(max_overlaps >= fg_thresh)[0] + fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) + # Sample foreground if there are too many + if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random: + fg_inds = np.random.choice( + fg_inds, size=fg_rois_per_this_image, replace=False) + fg_inds = fg_inds[:fg_rois_per_this_image] + # Background + bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= + bg_thresh_lo))[0] + bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image + bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, + bg_inds.shape[0]) + # Sample background if there are too many + if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random: + bg_inds = np.random.choice( + bg_inds, size=bg_rois_per_this_image, replace=False) + bg_inds = bg_inds[:bg_rois_per_this_image] + + keep_inds = np.append(fg_inds, bg_inds) + sampled_labels = max_classes[keep_inds] + sampled_labels[fg_rois_per_this_image:] = 0 + sampled_boxes = boxes[keep_inds] + sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] + sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] + bbox_label_targets = compute_bbox_targets(sampled_boxes, sampled_gts, + sampled_labels, bbox_reg_weights) + bbox_targets, bbox_inside_weights = expand_bbox_targets( + bbox_label_targets, class_nums, is_cls_agnostic) + bbox_outside_weights = np.array( + bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) + + # Scale rois + sampled_rois = sampled_boxes * im_scale + + # Faster RCNN blobs + frcn_blobs = dict( + rois=sampled_rois, + labels_int32=sampled_labels, + bbox_targets=bbox_targets, + bbox_inside_weights=bbox_inside_weights, + bbox_outside_weights=bbox_outside_weights) + return frcn_blobs + + +@jit +def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, + rois_nums, labels_int32, num_classes, resolution): + mask_rois = [] + rois_has_mask_int32 = [] + mask_int32 = [] + st_num = 0 + for i in range(len(rois_nums)): + rois_num = rois_nums[i] + mask_blob = _sample_mask( + rois[st_num:rois_num], labels_int32[st_num:rois_num], gt_segms[i], + im_info[i], gt_classes[i], is_crowd[i], num_classes, resolution) + + st_num = rois_num + mask_rois.append(mask_blob['mask_rois']) + rois_has_mask_int32.append(mask_blob['roi_has_mask_int32']) + mask_int32.append(mask_blob['mask_int32']) + mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32) + rois_has_mask_int32 = np.concatenate( + rois_has_mask_int32, axis=0).astype(np.int32) + mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32) + + return mask_rois, rois_has_mask_int32, mask_int32 + + +@jit +def _sample_mask( + rois, + label_int32, + gt_polys, + im_info, + gt_classes, + is_crowd, + num_classes, + resolution, ): + + # remove padding + new_gt_polys = [] + for i in range(gt_polys.shape[0]): + gt_segs = [] + for j in range(gt_polys[i].shape[0]): + new_poly = [] + polys = gt_polys[i][j] + for ii in range(polys.shape[0]): + x, y = polys[ii] + if (x == -1 and y == -1): + continue + elif (x >= 0 and y >= 0): + new_poly.append([x, y]) # array, one poly + if len(new_poly) > 0: + gt_segs.append(new_poly) + new_gt_polys.append(gt_segs) + + im_scale = im_info[2] + sample_boxes = rois / im_scale + + polys_gt_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] + + polys_gt = [new_gt_polys[i] for i in polys_gt_inds] + boxes_from_polys = polys_to_boxes(polys_gt) + fg_inds = np.where(label_int32 > 0)[0] + roi_has_mask = fg_inds.copy() + + if fg_inds.shape[0] > 0: + mask_class_labels = label_int32[fg_inds] + masks = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) + rois_fg = sample_boxes[fg_inds] + + overlaps_bbfg_bbpolys = bbox_overlaps_mask(rois_fg, boxes_from_polys) + fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) + + for i in range(rois_fg.shape[0]): + fg_polys_ind = fg_polys_inds[i] + poly_gt = polys_gt[fg_polys_ind] + roi_fg = rois_fg[i] + + mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution) + mask = np.array(mask > 0, dtype=np.int32) + masks[i, :] = np.reshape(mask, resolution**2) + else: + bg_inds = np.where(label_int32 == 0)[0] + rois_fg = sample_boxes[bg_inds[0]].reshape((1, -1)) + masks = -np.ones((1, resolution**2), dtype=np.int32) + mask_class_labels = np.zeros((1, )) + roi_has_mask = np.append(roi_has_mask, 0) + + masks = expand_mask_targets(masks, mask_class_labels, resolution, + num_classes) + + rois_fg *= im_scale + mask_blob = dict() + mask_blob['mask_rois'] = rois_fg + mask_blob['roi_has_mask_int32'] = roi_has_mask + mask_blob['mask_int32'] = masks + + return mask_blob