# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math import paddle import paddle.nn.functional as F import math def bbox2delta(src_boxes, tgt_boxes, weights): src_w = src_boxes[:, 2] - src_boxes[:, 0] src_h = src_boxes[:, 3] - src_boxes[:, 1] src_ctr_x = src_boxes[:, 0] + 0.5 * src_w src_ctr_y = src_boxes[:, 1] + 0.5 * src_h tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0] tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1] tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h wx, wy, ww, wh = weights dx = wx * (tgt_ctr_x - src_ctr_x) / src_w dy = wy * (tgt_ctr_y - src_ctr_y) / src_h dw = ww * paddle.log(tgt_w / src_w) dh = wh * paddle.log(tgt_h / src_h) deltas = paddle.stack((dx, dy, dw, dh), axis=1) return deltas def delta2bbox(deltas, boxes, weights): clip_scale = math.log(1000.0 / 16) widths = boxes[:, 2] - boxes[:, 0] heights = boxes[:, 3] - boxes[:, 1] ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights wx, wy, ww, wh = weights dx = deltas[:, 0::4] / wx dy = deltas[:, 1::4] / wy dw = deltas[:, 2::4] / ww dh = deltas[:, 3::4] / wh # Prevent sending too large values into paddle.exp() dw = paddle.clip(dw, max=clip_scale) dh = paddle.clip(dh, max=clip_scale) pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) pred_w = paddle.exp(dw) * widths.unsqueeze(1) pred_h = paddle.exp(dh) * heights.unsqueeze(1) pred_boxes = [] pred_boxes.append(pred_ctr_x - 0.5 * pred_w) pred_boxes.append(pred_ctr_y - 0.5 * pred_h) pred_boxes.append(pred_ctr_x + 0.5 * pred_w) pred_boxes.append(pred_ctr_y + 0.5 * pred_h) pred_boxes = paddle.stack(pred_boxes, axis=-1) return pred_boxes def expand_bbox(bboxes, scale): w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5 h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5 x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5 y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5 w_half *= scale h_half *= scale bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32) bboxes_exp[:, 0] = x_c - w_half bboxes_exp[:, 2] = x_c + w_half bboxes_exp[:, 1] = y_c - h_half bboxes_exp[:, 3] = y_c + h_half return bboxes_exp def clip_bbox(boxes, im_shape): h, w = im_shape[0], im_shape[1] x1 = boxes[:, 0].clip(0, w) y1 = boxes[:, 1].clip(0, h) x2 = boxes[:, 2].clip(0, w) y2 = boxes[:, 3].clip(0, h) return paddle.stack([x1, y1, x2, y2], axis=1) def nonempty_bbox(boxes, min_size=0, return_mask=False): w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] mask = paddle.logical_and(w > min_size, w > min_size) if return_mask: return mask keep = paddle.nonzero(mask).flatten() return keep def bbox_area(boxes): return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def bbox_overlaps(boxes1, boxes2): """ Calculate overlaps between boxes1 and boxes2 Args: boxes1 (Tensor): boxes with shape [M, 4] boxes2 (Tensor): boxes with shape [N, 4] Return: overlaps (Tensor): overlaps between boxes1 and boxes2 with shape [M, N] """ area1 = bbox_area(boxes1) area2 = bbox_area(boxes2) xy_max = paddle.minimum( paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:]) xy_min = paddle.maximum( paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2]) width_height = xy_max - xy_min width_height = width_height.clip(min=0) inter = width_height.prod(axis=2) overlaps = paddle.where(inter > 0, inter / (paddle.unsqueeze(area1, 1) + area2 - inter), paddle.zeros_like(inter)) return overlaps def xywh2xyxy(box): x, y, w, h = box x1 = x - w * 0.5 y1 = y - h * 0.5 x2 = x + w * 0.5 y2 = y + h * 0.5 return [x1, y1, x2, y2] def make_grid(h, w, dtype): yv, xv = paddle.meshgrid([paddle.arange(h), paddle.arange(w)]) return paddle.stack((xv, yv), 2).cast(dtype=dtype) def decode_yolo(box, anchor, downsample_ratio): """decode yolo box Args: box (list): [x, y, w, h], all have the shape [b, na, h, w, 1] anchor (list): anchor with the shape [na, 2] downsample_ratio (int): downsample ratio, default 32 scale (float): scale, default 1. Return: box (list): decoded box, [x, y, w, h], all have the shape [b, na, h, w, 1] """ x, y, w, h = box na, grid_h, grid_w = x.shape[1:4] grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2)) x1 = (x + grid[:, :, :, :, 0:1]) / grid_w y1 = (y + grid[:, :, :, :, 1:2]) / grid_h anchor = paddle.to_tensor(anchor) anchor = paddle.cast(anchor, x.dtype) anchor = anchor.reshape((1, na, 1, 1, 2)) w1 = paddle.exp(w) * anchor[:, :, :, :, 0:1] / (downsample_ratio * grid_w) h1 = paddle.exp(h) * anchor[:, :, :, :, 1:2] / (downsample_ratio * grid_h) return [x1, y1, w1, h1] def iou_similarity(box1, box2, eps=1e-9): """Calculate iou of box1 and box2 Args: box1 (Tensor): box with the shape [N, M1, 4] box2 (Tensor): box with the shape [N, M2, 4] Return: iou (Tensor): iou between box1 and box2 with the shape [N, M1, M2] """ box1 = box1.unsqueeze(2) # [N, M1, 4] -> [N, M1, 1, 4] box2 = box2.unsqueeze(1) # [N, M2, 4] -> [N, 1, M2, 4] px1y1, px2y2 = box1[:, :, :, 0:2], box1[:, :, :, 2:4] gx1y1, gx2y2 = box2[:, :, :, 0:2], box2[:, :, :, 2:4] x1y1 = paddle.maximum(px1y1, gx1y1) x2y2 = paddle.minimum(px2y2, gx2y2) overlap = (x2y2 - x1y1).clip(0).prod(-1) area1 = (px2y2 - px1y1).clip(0).prod(-1) area2 = (gx2y2 - gx1y1).clip(0).prod(-1) union = area1 + area2 - overlap + eps return overlap / union def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9): """calculate the iou of box1 and box2 Args: box1 (list): [x, y, w, h], all have the shape [b, na, h, w, 1] box2 (list): [x, y, w, h], all have the shape [b, na, h, w, 1] giou (bool): whether use giou or not, default False diou (bool): whether use diou or not, default False ciou (bool): whether use ciou or not, default False eps (float): epsilon to avoid divide by zero Return: iou (Tensor): iou of box1 and box1, with the shape [b, na, h, w, 1] """ px1, py1, px2, py2 = box1 gx1, gy1, gx2, gy2 = box2 x1 = paddle.maximum(px1, gx1) y1 = paddle.maximum(py1, gy1) x2 = paddle.minimum(px2, gx2) y2 = paddle.minimum(py2, gy2) overlap = ((x2 - x1).clip(0)) * ((y2 - y1).clip(0)) area1 = (px2 - px1) * (py2 - py1) area1 = area1.clip(0) area2 = (gx2 - gx1) * (gy2 - gy1) area2 = area2.clip(0) union = area1 + area2 - overlap + eps iou = overlap / union if giou or ciou or diou: # convex w, h cw = paddle.maximum(px2, gx2) - paddle.minimum(px1, gx1) ch = paddle.maximum(py2, gy2) - paddle.minimum(py1, gy1) if giou: c_area = cw * ch + eps return iou - (c_area - union) / c_area else: # convex diagonal squared c2 = cw**2 + ch**2 + eps # center distance rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4 if diou: return iou - rho2 / c2 else: w1, h1 = px2 - px1, py2 - py1 + eps w2, h2 = gx2 - gx1, gy2 - gy1 + eps delta = paddle.atan(w1 / h1) - paddle.atan(w2 / h2) v = (4 / math.pi**2) * paddle.pow(delta, 2) alpha = v / (1 + eps - iou + v) alpha.stop_gradient = True return iou - (rho2 / c2 + v * alpha) else: return iou