未验证 提交 90e8a508 编写于 作者: F FDInSky 提交者: GitHub

add python ops for dygraph (#897)

add python ops for dygraph
上级 8ef4a22d
from .bbox import *
from .mask import *
from .target import *
from .post_process import *
import numpy as np
from numba import jit
@jit
def bbox2delta(bboxes1, bboxes2, weights):
ex_w = bboxes1[:, 2] - bboxes1[:, 0] + 1
ex_h = bboxes1[:, 3] - bboxes1[:, 1] + 1
ex_ctr_x = bboxes1[:, 0] + 0.5 * ex_w
ex_ctr_y = bboxes1[:, 1] + 0.5 * ex_h
gt_w = bboxes2[:, 2] - bboxes2[:, 0] + 1
gt_h = bboxes2[:, 3] - bboxes2[:, 1] + 1
gt_ctr_x = bboxes2[:, 0] + 0.5 * gt_w
gt_ctr_y = bboxes2[:, 1] + 0.5 * gt_h
dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
dw = (np.log(gt_w / ex_w)) / weights[2]
dh = (np.log(gt_h / ex_h)) / weights[3]
deltas = np.vstack([dx, dy, dw, dh]).transpose()
return deltas
@jit
def delta2bbox(deltas, boxes, weights, bbox_clip=4.13):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] * wx
dy = deltas[:, 1::4] * wy
dw = deltas[:, 2::4] * ww
dh = deltas[:, 3::4] * wh
# Prevent sending too large values into np.exp()
dw = np.minimum(dw, bbox_clip)
dh = np.minimum(dh, bbox_clip)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
return pred_boxes
@jit
def expand_bbox(bboxes, scale):
w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
w_half *= scale
h_half *= scale
bboxes_exp = np.zeros(bboxes.shape)
bboxes_exp[:, 0] = x_c - w_half
bboxes_exp[:, 2] = x_c + w_half
bboxes_exp[:, 1] = y_c - h_half
bboxes_exp[:, 3] = y_c + h_half
return bboxes_exp
@jit
def clip_bbox(boxes, im_shape):
assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1]
)
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
@jit
def bbox_overlaps(bboxes1, bboxes2):
w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0)
h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0)
w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0)
h2 = np.maximum(bboxes2[:, 3] - bboxes2[:, 1] + 1, 0)
area1 = w1 * h1
area2 = w2 * h2
overlaps = np.zeros((bboxes1.shape[0], bboxes2.shape[0]))
for ind1 in range(bboxes1.shape[0]):
for ind2 in range(bboxes2.shape[0]):
inter_x1 = np.maximum(bboxes1[ind1, 0], bboxes2[ind2, 0])
inter_y1 = np.maximum(bboxes1[ind1, 1], bboxes2[ind2, 1])
inter_x2 = np.minimum(bboxes1[ind1, 2], bboxes2[ind2, 2])
inter_y2 = np.minimum(bboxes1[ind1, 3], bboxes2[ind2, 3])
inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0)
inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0)
inter_area = inter_w * inter_h
iou = inter_area * 1.0 / (area1[ind1] + area2[ind2] - inter_area)
overlaps[ind1, ind2] = iou
return overlaps
@jit
def nms(dets, thresh):
if dets.shape[0] == 0:
return []
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int)
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
return np.where(suppressed == 0)[0]
def nms_with_decode(bboxes,
bbox_probs,
bbox_deltas,
im_info,
keep_top_k=100,
score_thresh=0.05,
nms_thresh=0.5,
class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]):
bboxes_num = [0, bboxes.shape[0]]
bboxes_v = np.array(bboxes)
bbox_probs_v = np.array(bbox_probs)
bbox_deltas_v = np.array(bbox_deltas)
variance_v = np.array(bbox_reg_weights)
im_results = [[] for _ in range(len(bboxes_num) - 1)]
new_bboxes_num = [0]
for i in range(len(bboxes_num) - 1):
start = bboxes_num[i]
end = bboxes_num[i + 1]
if start == end:
continue
bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta
rois_n = bboxes_v[start:end, :] # box
rois_n = rois_n / im_info[i][2] # scale
rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v)
rois_n = clip_bbox(rois_n, im_info[i][:2] / im_info[i][2])
cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_probs_v[start:end, :]
for j in range(1, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = rois_n[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)])
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(1, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
im_results[i] = im_results_n
new_bboxes_num.append(len(im_results_n) + new_bboxes_num[-1])
labels = im_results_n[:, 0]
scores = im_results_n[:, 1]
boxes = im_results_n[:, 2:]
im_results = np.vstack([im_results[k] for k in range(len(bboxes_num) - 1)])
new_bboxes_num = np.array(new_bboxes_num)
return new_bboxes_num, im_results
@jit
def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights):
assert bboxes1.shape[0] == bboxes2.shape[0]
assert bboxes1.shape[1] == 4
assert bboxes2.shape[1] == 4
targets = np.zeros(bboxes1.shape)
bbox_reg_weights = np.asarray(bbox_reg_weights)
targets = bbox2delta(
bboxes1=bboxes1, bboxes2=bboxes2, weights=bbox_reg_weights)
return np.hstack([labels[:, np.newaxis], targets]).astype(
np.float32, copy=False)
@jit
def expand_bbox_targets(bbox_targets_input,
class_nums=81,
is_cls_agnostic=False):
class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0]
if not is_cls_agnostic:
class_nums = 2
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
bbox_inside_weights = np.zeros(bbox_targets.shape)
for ind in fg_inds:
class_label = int(class_labels[ind]) if not is_cls_agnostic else 1
start_ind = class_label * 4
end_ind = class_label * 4 + 4
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
import six
import math
import numpy as np
from numba import jit
@jit
def decode(cnts, m):
v = 0
mask = []
for j in range(m):
for k in range(cnts[j]):
mask.append(v)
v = 1 - v
return mask
#@jit
def poly2mask(xy, k, h, w):
scale = 5.
x = [int(scale * p + 0.5) for p in xy[::2]]
x = x + [x[0]]
y = [int(scale * p + 0.5) for p in xy[1::2]]
y = y + [y[0]]
m = sum([
int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + int(1)
for j in range(k)
])
u, v = [], []
for j in range(k):
xs = x[j]
xe = x[j + 1]
ys = y[j]
ye = y[j + 1]
dx = abs(xe - xs)
dy = abs(ys - ye)
flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye)
if flip:
xs, xe = xe, xs
ys, ye = ye, ys
if dx >= dy:
if (dx == 0):
assert ye - ys == 0
s = 0 if dx == 0 else float(ye - ys) / dx
else:
if (dy == 0):
assert xe - xs == 0
s = 0 if dy == 0 else float(xe - xs) / dy
if dx >= dy:
ts = [dx - d if flip else d for d in range(dx + 1)]
u.extend([xs + t for t in ts])
v.extend([int(ys + s * t + .5) for t in ts])
else:
ts = [dy - d if flip else d for d in range(dy + 1)]
v.extend([t + ys for t in ts])
u.extend([int(xs + s * t + .5) for t in ts])
k = len(u)
x = np.zeros((k), np.int)
y = np.zeros((k), np.int)
m = 0
for j in six.moves.xrange(1, k):
if u[j] != u[j - 1]:
xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1))
xd = (xd + .5) / scale - .5
if (math.floor(xd) != xd or xd < 0 or xd > (w - 1)):
continue
yd = float(v[j] if v[j] < v[j - 1] else v[j - 1])
yd = (yd + .5) / scale - .5
yd = math.ceil(0 if yd < 0 else (h if yd > h else yd))
x[m] = int(xd)
y[m] = int(yd)
m += 1
k = m
a = [int(x[i] * h + y[i]) for i in range(k)]
a.append(h * w)
a.sort()
b = [0] + a[:len(a) - 1]
a = [c - d for (c, d) in zip(a, b)]
k += 1
b = [0 for i in range(k)]
b[0] = a[0]
m, j = 1, 1
while (j < k):
if a[j] > 0:
b[m] = a[j]
m += 1
j += 1
else:
j += 1
if (j < k):
b[m - 1] += a[j]
j += 1
mask = decode(b, m)
mask = np.array(mask, dtype=np.int).reshape((w, h))
mask = mask.transpose((1, 0))
return mask
def polys_to_boxes(polys):
"""Convert a list of polygons into an array of tight bounding boxes."""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for j in range(len(polys)):
x_min, y_min = 10000000, 10000000
x_max, y_max = 0, 0
for i in range(len(polys[j])):
poly = polys[j][i]
x0 = min(min(p[::2]) for p in poly)
x_min = min(x0, x_min)
y0 = min(min(p[1::2]) for p in poly)
y_min = min(y0, y_min)
x1 = max(max(p[::2]) for p in poly)
x_max = max(x_max, x1)
y1 = max(max(p[1::2]) for p in poly)
y_max = max(y1, y_max)
boxes_from_polys[j, :] = [x_min, y_min, x_max, y_max]
return boxes_from_polys
@jit
def bbox_overlaps_mask(boxes, query_boxes):
N = boxes.shape[0]
K = query_boxes.shape[0]
overlaps = np.zeros((N, K), dtype=boxes.dtype)
for k in range(K):
box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
for n in range(N):
iw = min(boxes[n, 2], query_boxes[k, 2]) -\
max(boxes[n, 0], query_boxes[k, 0]) + 1
if iw > 0:
ih = min(boxes[n, 3], query_boxes[k, 3]) -\
max(boxes[n, 1], query_boxes[k, 1]) + 1
if ih > 0:
ua = float(
(boxes[n, 2] - boxes[n, 0] + 1) *\
(boxes[n, 3] - boxes[n, 1] + 1) +\
box_area - iw * ih)
overlaps[n, k] = iw * ih / ua
return overlaps
@jit
def polys_to_mask_wrt_box(polygons, box, M):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed in the given box and rasterized to an M x M
mask. The resulting mask is therefore of shape (M, M).
"""
w = box[2] - box[0]
h = box[3] - box[1]
w = np.maximum(w, 1)
h = np.maximum(h, 1)
polygons_norm = []
i = 0
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p = p.reshape(-1)
p[0::2] = (p[0::2] - box[0]) * M / w
p[1::2] = (p[1::2] - box[1]) * M / h
polygons_norm.append(p)
mask = []
for polygons in polygons_norm:
assert polygons.shape[0] % 2 == 0, polygons.shape
k = polygons.shape[0] // 2
one_msk = poly2mask(polygons, k, M, M)
mask.append(one_msk)
mask = np.array(mask)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=0)
mask = np.array(mask > 0, dtype=np.float32)
return mask
@jit
def expand_mask_targets(masks, mask_class_labels, resolution, num_classes):
"""Expand masks from shape (#masks, resolution ** 2)
to (#masks, #classes * resolution ** 2) to encode class
specific mask targets.
"""
assert masks.shape[0] == mask_class_labels.shape[0]
# Target values of -1 are "don't care" / ignore labels
mask_targets = -np.ones(
(masks.shape[0], num_classes * resolution**2), dtype=np.int32)
for i in range(masks.shape[0]):
cls = int(mask_class_labels[i])
start = resolution**2 * cls
end = start + resolution**2
# Ignore background instance
# (only happens when there is no fg samples in an image)
if cls > 0:
mask_targets[i, start:end] = masks[i, :]
return mask_targets
import six
import os
import numpy as np
from numba import jit
from .bbox import delta2bbox, clip_bbox, expand_bbox, nms
def bbox_post_process(bboxes,
bbox_probs,
bbox_deltas,
im_info,
keep_top_k=100,
score_thresh=0.05,
nms_thresh=0.5,
class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]):
bbox_nums = [0, bboxes.shape[0]]
bboxes_v = np.array(bboxes)
bbox_probs_v = np.array(bbox_probs)
bbox_deltas_v = np.array(bbox_deltas)
variance_v = np.array(bbox_reg_weights)
new_bboxes = [[] for _ in range(len(bbox_nums) - 1)]
new_bbox_nums = [0]
for i in range(len(bbox_nums) - 1):
start = bbox_nums[i]
end = bbox_nums[i + 1]
if start == end:
continue
bbox_deltas_n = bbox_deltas_v[start:end, :] # box delta
rois_n = bboxes_v[start:end, :] # box
rois_n = rois_n / im_info[i][2] # scale
rois_n = delta2bbox(bbox_deltas_n, rois_n, variance_v)
rois_n = clip_bbox(rois_n, im_info[i][:2] / im_info[i][2])
cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_probs_v[start:end, :]
for j in range(1, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = rois_n[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)])
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(1, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
new_bboxes_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
new_bboxes[i] = new_bboxes_n
new_bbox_nums.append(len(new_bboxes_n) + new_bbox_nums[-1])
labels = new_bboxes_n[:, 0]
scores = new_bboxes_n[:, 1]
boxes = new_bboxes_n[:, 2:]
new_bboxes = np.vstack([new_bboxes[k] for k in range(len(bbox_nums) - 1)])
new_bbox_nums = np.array(new_bbox_nums)
return new_bbox_nums, new_bboxes
@jit
def mask_post_process(bbox_nums, bboxes, masks, im_info):
bboxes = np.array(bboxes)
M = cfg.resolution
scale = (M + 2.0) / M
masks_v = np.array(masks)
boxes = bboxes[:, 2:]
labels = bboxes[:, 0]
segms_results = [[] for _ in range(len(bbox_nums) - 1)]
sum = 0
for i in range(len(bbox_nums) - 1):
bboxes_n = bboxes[bbox_nums[i]:bbox_nums[i + 1]]
cls_segms = []
masks_n = masks_v[bbox_nums[i]:bbox_nums[i + 1]]
boxes_n = boxes[bbox_nums[i]:bbox_nums[i + 1]]
labels_n = labels[bbox_nums[i]:bbox_nums[i + 1]]
im_h = int(round(im_info[i][0] / im_info[i][2]))
im_w = int(round(im_info[i][1] / im_info[i][2]))
boxes_n = expand_boxes(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for j in range(len(bboxes_n)):
class_id = int(labels_n[j])
padded_mask[1:-1, 1:-1] = masks_n[j, class_id, :, :]
ref_box = boxes_n[j, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8)
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, im_w)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, im_h)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[
1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])]
sum += im_mask.sum()
rle = mask_util.encode(
np.array(
im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms.append(rle)
segms_results[i] = np.array(cls_segms)[:, np.newaxis]
segms_results = np.vstack([segms_results[k] for k in range(len(lod) - 1)])
bboxes = np.hstack([segms_results, bboxes])
return bboxes[:, :3]
@jit
def get_det_res(bbox_nums,
bbox,
image_id,
image_shape,
num_id_to_cat_id_map,
batch_size=1):
det_res = []
bbox_v = np.array(bbox)
if bbox_v.shape == (
1,
1, ):
return dts_res
assert (len(bbox_nums) == batch_size + 1), \
"Error bbox_nums Tensor offset dimension. bbox_nums({}) vs. batch_size({})"\
.format(len(bbox_nums), batch_size)
k = 0
for i in range(batch_size):
dt_num_this_img = bbox_nums[i + 1] - bbox_nums[i]
image_id = int(image_id[i][0])
image_width = int(image_shape[i][1]) #int(data[i][-1][1])
image_height = int(image_shape[i][2]) #int(data[i][-1][2])
for j in range(dt_num_this_img):
dt = bbox_v[k]
k = k + 1
num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
category_id = num_id_to_cat_id_map[num_id]
w = xmax - xmin + 1
h = ymax - ymin + 1
bbox = [xmin, ymin, w, h]
dt_res = {
'image_id': image_id,
'category_id': category_id,
'bbox': bbox,
'score': score
}
det_res.append(dt_res)
return det_res
@jit
def get_seg_res(mask_nums, mask, image_id, num_id_to_cat_id_map, batch_size=1):
seg_res = []
mask_v = np.array(mask)
k = 0
for i in range(batch_size):
image_id = int(image_id[i][0])
dt_num_this_img = mask_nums[i + 1] - mask_nums[i]
for j in range(dt_num_this_img):
dt = mask_v[k]
k = k + 1
sg, num_id, score = dt.tolist()
cat_id = num_id_to_cat_id_map[num_id]
if six.PY3:
if 'counts' in sg:
sg['counts'] = sg['counts'].decode("utf8")
sg_res = {
'image_id': image_id,
'category_id': cat_id,
'segmentation': sg,
'score': score
}
seg_res.append(sg_res)
return seg_res
import six
import os
import numpy as np
from numba import jit
from .bbox import nms
@jit
def box_decoder(deltas, boxes, weights, bbox_clip=4.13):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] * wx
dy = deltas[:, 1::4] * wy
dw = deltas[:, 2::4] * ww
dh = deltas[:, 3::4] * wh
# Prevent sending too large values into np.exp()
dw = np.minimum(dw, bbox_clip)
dh = np.minimum(dh, bbox_clip)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
return pred_boxes
@jit
def clip_tiled_boxes(boxes, im_shape):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes)."""
assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1]
)
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
#@jit
def get_nmsed_box(rpn_rois,
confs,
locs,
class_nums,
im_info,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
score_thresh=0.05,
nms_thresh=0.5,
detections_per_im=100):
box_nums = [0, rpn_rois.shape[0]]
variance_v = np.array(bbox_reg_weights)
rpn_rois_v = np.array(rpn_rois)
confs_v = np.array(confs)
locs_v = np.array(locs)
im_results = [[] for _ in range(len(box_nums) - 1)]
new_box_nums = [0]
for i in range(len(box_nums) - 1):
start = box_nums[i]
end = box_nums[i + 1]
if start == end:
continue
locs_n = locs_v[start:end, :] # box delta
rois_n = rpn_rois_v[start:end, :] # box
rois_n = rois_n / im_info[i][2] # scale
rois_n = box_decoder(locs_n, rois_n, variance_v)
rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2])
cls_boxes = [[] for _ in range(class_nums)]
scores_n = confs_v[start:end, :]
for j in range(1, class_nums):
inds = np.where(scores_n[:, j] > TEST.score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = rois_n[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, TEST.nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)])
if len(image_scores) > detections_per_im:
image_thresh = np.sort(image_scores)[-detections_per_im]
for j in range(1, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
im_results[i] = im_results_n
new_box_nums.append(len(im_results_n) + new_box_nums[-1])
labels = im_results_n[:, 0]
scores = im_results_n[:, 1]
boxes = im_results_n[:, 2:]
im_results = np.vstack([im_results[k] for k in range(len(box_nums) - 1)])
return new_box_nums, im_results
@jit
def get_dt_res(batch_size, box_nums, nmsed_out, data, num_id_to_cat_id_map):
dts_res = []
nmsed_out_v = np.array(nmsed_out)
if nmsed_out_v.shape == (
1,
1, ):
return dts_res
assert (len(box_nums) == batch_size + 1), \
"Error Tensor offset dimension. Box Nums({}) vs. batch_size({})"\
.format(len(box_nums), batch_size)
k = 0
for i in range(batch_size):
dt_num_this_img = box_nums[i + 1] - box_nums[i]
image_id = int(data[i][-1])
image_width = int(data[i][1][1])
image_height = int(data[i][1][2])
for j in range(dt_num_this_img):
dt = nmsed_out_v[k]
k = k + 1
num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
category_id = num_id_to_cat_id_map[num_id]
w = xmax - xmin + 1
h = ymax - ymin + 1
bbox = [xmin, ymin, w, h]
dt_res = {
'image_id': image_id,
'category_id': category_id,
'bbox': bbox,
'score': score
}
dts_res.append(dt_res)
return dts_res
@jit
def get_segms_res(batch_size, box_nums, segms_out, data, num_id_to_cat_id_map):
segms_res = []
segms_out_v = np.array(segms_out)
k = 0
for i in range(batch_size):
dt_num_this_img = box_nums[i + 1] - box_nums[i]
image_id = int(data[i][-1])
for j in range(dt_num_this_img):
dt = segms_out_v[k]
k = k + 1
segm, num_id, score = dt.tolist()
cat_id = num_id_to_cat_id_map[num_id]
if six.PY3:
if 'counts' in segm:
segm['counts'] = segm['counts'].decode("utf8")
segm_res = {
'image_id': image_id,
'category_id': cat_id,
'segmentation': segm,
'score': score
}
segms_res.append(segm_res)
return segms_res
import six
import math
import numpy as np
from numba import jit
from .bbox import *
from .mask import *
@jit
def generate_rpn_anchor_target(anchor_box,
gt_boxes,
is_crowd,
im_info,
rpn_straddle_thresh,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True):
anchor_num = anchor_box.shape[0]
batch_size = gt_boxes.shape[0]
for i in range(batch_size):
im_height = im_info[i][0]
im_width = im_info[i][1]
im_scale = im_info[i][2]
if rpn_straddle_thresh >= 0:
# Only keep anchors inside the image by a margin of straddle_thresh
inds_inside = np.where(
(anchor_box[:, 0] >= -rpn_straddle_thresh
) & (anchor_box[:, 1] >= -rpn_straddle_thresh) & (
anchor_box[:, 2] < im_width + rpn_straddle_thresh) & (
anchor_box[:, 3] < im_height + rpn_straddle_thresh))[0]
# keep only inside anchors
inside_anchors = anchor_box[inds_inside, :]
else:
inds_inside = np.arange(anchor_box.shape[0])
inside_anchors = anchor_box
gt_boxes_slice = gt_boxes[i] * im_scale
is_crowd_slice = is_crowd[i]
not_crowd_inds = np.where(is_crowd_slice == 0)[0]
gt_boxes_slice = gt_boxes_slice[not_crowd_inds]
iou = bbox_overlaps(inside_anchors, gt_boxes_slice)
loc_inds, score_inds, labels, gt_inds, bbox_inside_weight = _sample_anchor(
iou, rpn_batch_size_per_im, rpn_positive_overlap,
rpn_negative_overlap, rpn_fg_fraction, use_random)
# unmap to all anchor
loc_inds = inds_inside[loc_inds]
score_inds = inds_inside[score_inds]
sampled_anchor = anchor_box[loc_inds]
sampled_gt = gt_boxes_slice[gt_inds]
box_deltas = bbox2delta(sampled_anchor, sampled_gt, [1., 1., 1., 1.])
if i == 0:
loc_indexes = loc_inds
score_indexes = score_inds
tgt_labels = labels
tgt_bboxes = box_deltas
bbox_inside_weights = bbox_inside_weight
else:
loc_indexes = np.concatenate(
[loc_indexes, loc_inds + i * anchor_num])
score_indexes = np.concatenate(
[score_indexes, score_inds + i * anchor_num])
tgt_labels = np.concatenate([tgt_labels, labels])
tgt_bboxes = np.vstack([tgt_bboxes, box_deltas])
bbox_inside_weights = np.vstack([bbox_inside_weights, \
bbox_inside_weight])
tgt_labels = tgt_labels.astype('float32')
tgt_bboxes = tgt_bboxes.astype('float32')
return loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights
@jit
def _sample_anchor(anchor_by_gt_overlap,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True):
anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
anchor_to_gt_max = anchor_by_gt_overlap[np.arange(
anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax]
gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange(
anchor_by_gt_overlap.shape[1])]
anchors_with_max_overlap = np.where(
anchor_by_gt_overlap == gt_to_anchor_max)[0]
labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1
labels[anchors_with_max_overlap] = 1
labels[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg and use_random:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
else:
disable_inds = fg_inds[num_fg:]
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(labels == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
if len(bg_inds) > num_bg and use_random:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
else:
enable_inds = bg_inds[:num_bg]
fg_fake_inds = np.array([], np.int32)
fg_value = np.array([fg_inds[0]], np.int32)
fake_num = 0
for bg_id in enable_inds:
if bg_id in fg_inds:
fake_num += 1
fg_fake_inds = np.hstack([fg_fake_inds, fg_value])
labels[enable_inds] = 0
fg_inds = np.where(labels == 1)[0]
bg_inds = np.where(labels == 0)[0]
loc_index = np.hstack([fg_fake_inds, fg_inds])
score_index = np.hstack([fg_inds, bg_inds])
labels = labels[score_index]
gt_inds = anchor_to_gt_argmax[loc_index]
bbox_inside_weight = np.zeros((len(loc_index), 4), dtype=np.float32)
bbox_inside_weight[fake_num:, :] = 1
return loc_index, score_index, labels, gt_inds, bbox_inside_weight
@jit
def generate_proposal_target(rpn_rois,
rpn_rois_nums,
gt_classes,
is_crowd,
gt_boxes,
im_info,
batch_size_per_im,
fg_fraction,
fg_thresh,
bg_thresh_hi,
bg_thresh_lo,
bbox_reg_weights,
class_nums=81,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
rois = []
labels_int32 = []
bbox_targets = []
bbox_inside_weights = []
bbox_outside_weights = []
rois_nums = []
batch_size = gt_boxes.shape[0]
# TODO: modify here
# rpn_rois = rpn_rois.reshape(batch_size, -1, 4)
st_num = 0
print("debug: ", rpn_rois_nums)
for im_i in range(len(rpn_rois_nums)):
rpn_rois_num = rpn_rois_nums[im_i]
frcn_blobs = _sample_rois(
rpn_rois[st_num:rpn_rois_num], gt_classes[im_i], is_crowd[im_i],
gt_boxes[im_i], im_info[im_i], batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
use_random, is_cls_agnostic, is_cascade_rcnn)
st_num = rpn_rois_num
rois.append(frcn_blobs['rois'])
labels_int32.append(frcn_blobs['labels_int32'])
bbox_targets.append(frcn_blobs['bbox_targets'])
bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
rois_nums.append(frcn_blobs['rois'].shape[0])
rois = np.concatenate(rois, axis=0).astype(np.float32)
bbox_labels = np.concatenate(
labels_int32, axis=0).astype(np.int32).reshape(-1, 1)
bbox_gts = np.concatenate(bbox_targets, axis=0).astype(np.float32)
bbox_inside_weights = np.concatenate(
bbox_inside_weights, axis=0).astype(np.float32)
bbox_outside_weights = np.concatenate(
bbox_outside_weights, axis=0).astype(np.float32)
rois_nums = np.asarray(rois_nums, np.int32)
return rois, bbox_labels, bbox_gts, bbox_inside_weights, bbox_outside_weights, rois_nums
@jit
def _sample_rois(rpn_rois,
gt_classes,
is_crowd,
gt_boxes,
im_info,
batch_size_per_im,
fg_fraction,
fg_thresh,
bg_thresh_hi,
bg_thresh_lo,
bbox_reg_weights,
class_nums,
use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb
im_scale = im_info[2]
inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale
if is_cascade_rcnn:
rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
boxes = np.vstack([gt_boxes, rpn_rois])
gt_overlaps = np.zeros((boxes.shape[0], class_nums))
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
if len(gt_boxes) > 0:
proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
overlaps_max = proposal_to_gt_overlaps.max(axis=1)
# Boxes which with non-zero overlap with gt boxes
overlapped_boxes_ind = np.where(overlaps_max > 0)[0].astype('int32')
overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
overlapped_boxes_ind]].astype('int32')
gt_overlaps[overlapped_boxes_ind,
overlapped_boxes_gt_classes] = overlaps_max[
overlapped_boxes_ind]
box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
overlapped_boxes_ind]
crowd_ind = np.where(is_crowd)[0]
gt_overlaps[crowd_ind] = -1
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
# Cascade RCNN Decode Filter
if is_cascade_rcnn:
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0]
boxes = boxes[keep]
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
fg_rois_per_this_image = fg_inds.shape[0]
bg_rois_per_this_image = bg_inds.shape[0]
else:
# Foreground
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample foreground if there are too many
if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random:
fg_inds = np.random.choice(
fg_inds, size=fg_rois_per_this_image, replace=False)
fg_inds = fg_inds[:fg_rois_per_this_image]
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
# Sample background if there are too many
if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random:
bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds]
sampled_labels[fg_rois_per_this_image:] = 0
sampled_boxes = boxes[keep_inds]
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
bbox_label_targets = compute_bbox_targets(sampled_boxes, sampled_gts,
sampled_labels, bbox_reg_weights)
bbox_targets, bbox_inside_weights = expand_bbox_targets(
bbox_label_targets, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
# Scale rois
sampled_rois = sampled_boxes * im_scale
# Faster RCNN blobs
frcn_blobs = dict(
rois=sampled_rois,
labels_int32=sampled_labels,
bbox_targets=bbox_targets,
bbox_inside_weights=bbox_inside_weights,
bbox_outside_weights=bbox_outside_weights)
return frcn_blobs
@jit
def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
rois_nums, labels_int32, num_classes, resolution):
mask_rois = []
rois_has_mask_int32 = []
mask_int32 = []
st_num = 0
for i in range(len(rois_nums)):
rois_num = rois_nums[i]
mask_blob = _sample_mask(
rois[st_num:rois_num], labels_int32[st_num:rois_num], gt_segms[i],
im_info[i], gt_classes[i], is_crowd[i], num_classes, resolution)
st_num = rois_num
mask_rois.append(mask_blob['mask_rois'])
rois_has_mask_int32.append(mask_blob['roi_has_mask_int32'])
mask_int32.append(mask_blob['mask_int32'])
mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32)
rois_has_mask_int32 = np.concatenate(
rois_has_mask_int32, axis=0).astype(np.int32)
mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32)
return mask_rois, rois_has_mask_int32, mask_int32
@jit
def _sample_mask(
rois,
label_int32,
gt_polys,
im_info,
gt_classes,
is_crowd,
num_classes,
resolution, ):
# remove padding
new_gt_polys = []
for i in range(gt_polys.shape[0]):
gt_segs = []
for j in range(gt_polys[i].shape[0]):
new_poly = []
polys = gt_polys[i][j]
for ii in range(polys.shape[0]):
x, y = polys[ii]
if (x == -1 and y == -1):
continue
elif (x >= 0 and y >= 0):
new_poly.append([x, y]) # array, one poly
if len(new_poly) > 0:
gt_segs.append(new_poly)
new_gt_polys.append(gt_segs)
im_scale = im_info[2]
sample_boxes = rois / im_scale
polys_gt_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0]
polys_gt = [new_gt_polys[i] for i in polys_gt_inds]
boxes_from_polys = polys_to_boxes(polys_gt)
fg_inds = np.where(label_int32 > 0)[0]
roi_has_mask = fg_inds.copy()
if fg_inds.shape[0] > 0:
mask_class_labels = label_int32[fg_inds]
masks = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32)
rois_fg = sample_boxes[fg_inds]
overlaps_bbfg_bbpolys = bbox_overlaps_mask(rois_fg, boxes_from_polys)
fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)
for i in range(rois_fg.shape[0]):
fg_polys_ind = fg_polys_inds[i]
poly_gt = polys_gt[fg_polys_ind]
roi_fg = rois_fg[i]
mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution)
mask = np.array(mask > 0, dtype=np.int32)
masks[i, :] = np.reshape(mask, resolution**2)
else:
bg_inds = np.where(label_int32 == 0)[0]
rois_fg = sample_boxes[bg_inds[0]].reshape((1, -1))
masks = -np.ones((1, resolution**2), dtype=np.int32)
mask_class_labels = np.zeros((1, ))
roi_has_mask = np.append(roi_has_mask, 0)
masks = expand_mask_targets(masks, mask_class_labels, resolution,
num_classes)
rois_fg *= im_scale
mask_blob = dict()
mask_blob['mask_rois'] = rois_fg
mask_blob['roi_has_mask_int32'] = roi_has_mask
mask_blob['mask_int32'] = masks
return mask_blob
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册