# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import logging import numpy as np import cv2 import paddle.fluid as fluid __all__ = ['nms'] logger = logging.getLogger(__name__) def box_flip(boxes, im_shape): im_width = im_shape[0][1] flipped_boxes = boxes.copy() flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1 flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1 return flipped_boxes def nms(dets, thresh): """Apply classic DPM-style greedy NMS.""" if dets.shape[0] == 0: return [] scores = dets[:, 0] x1 = dets[:, 1] y1 = dets[:, 2] x2 = dets[:, 3] y2 = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] ndets = dets.shape[0] suppressed = np.zeros((ndets), dtype=np.int) # nominal indices # _i, _j # sorted indices # i, j # temp variables for box i's (the box currently under consideration) # ix1, iy1, ix2, iy2, iarea # variables for computing overlap with box j (lower scoring box) # xx1, yy1, xx2, yy2 # w, h # inter, ovr for _i in range(ndets): i = order[_i] if suppressed[i] == 1: continue ix1 = x1[i] iy1 = y1[i] ix2 = x2[i] iy2 = y2[i] iarea = areas[i] for _j in range(_i + 1, ndets): j = order[_j] if suppressed[j] == 1: continue xx1 = max(ix1, x1[j]) yy1 = max(iy1, y1[j]) xx2 = min(ix2, x2[j]) yy2 = min(iy2, y2[j]) w = max(0.0, xx2 - xx1 + 1) h = max(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (iarea + areas[j] - inter) if ovr >= thresh: suppressed[j] = 1 return np.where(suppressed == 0)[0] def bbox_area(box): w = box[2] - box[0] + 1 h = box[3] - box[1] + 1 return w * h def bbox_overlaps(x, y): N = x.shape[0] K = y.shape[0] overlaps = np.zeros((N, K), dtype=np.float32) for k in range(K): y_area = bbox_area(y[k]) for n in range(N): iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1 if iw > 0: ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1 if ih > 0: x_area = bbox_area(x[n]) ua = x_area + y_area - iw * ih overlaps[n, k] = iw * ih / ua return overlaps def box_voting(nms_dets, dets, vote_thresh): top_dets = nms_dets.copy() top_boxes = nms_dets[:, 1:] all_boxes = dets[:, 1:] all_scores = dets[:, 0] top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes) for k in range(nms_dets.shape[0]): inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0] boxes_to_vote = all_boxes[inds_to_vote, :] ws = all_scores[inds_to_vote] top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws) return top_dets def get_nms_result(boxes, scores, cfg): cls_boxes = [[] for _ in range(cfg.num_classes)] for j in range(1, cfg.num_classes): inds = np.where(scores[:, j] > cfg.MultiScaleTEST['score_thresh'])[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype( np.float32, copy=False) keep = nms(dets_j, cfg.MultiScaleTEST['nms_thresh']) nms_dets = dets_j[keep, :] if cfg.MultiScaleTEST['enable_voting']: nms_dets = box_voting(nms_dets, dets_j, cfg.MultiScaleTEST['vote_thresh']) #add labels label = np.array([j for _ in range(len(keep))]) nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( np.float32, copy=False) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** image_scores = np.hstack( [cls_boxes[j][:, 1] for j in range(1, cfg.num_classes)]) if len(image_scores) > cfg.MultiScaleTEST['detections_per_im']: image_thresh = np.sort(image_scores)[-cfg.MultiScaleTEST[ 'detections_per_im']] for j in range(1, cfg.num_classes): keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, cfg.num_classes)]) return im_results def mstest_box_post_process(result, cfg): """ Multi-scale Test Only available for batch_size=1 now. """ post_bbox = {} use_flip = False ms_boxes = [] ms_scores = [] im_shape = result['im_shape'][0] for k in result.keys(): if 'bbox' in k: boxes = result[k][0] boxes = np.reshape(boxes, (-1, 4 * cfg.num_classes)) scores = result['score' + k[4:]][0] if 'flip' in k: boxes = box_flip(boxes, im_shape) use_flip = True ms_boxes.append(boxes) ms_scores.append(scores) ms_boxes = np.concatenate(ms_boxes) ms_scores = np.concatenate(ms_scores) bbox_pred = get_nms_result(ms_boxes, ms_scores, cfg) post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])}) if use_flip: bbox = bbox_pred[:, 2:] bbox_flip = np.append( bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1) post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])}) return post_bbox def mstest_mask_post_process(result, cfg): mask_list = [] im_shape = result['im_shape'][0] M = cfg.FPNRoIAlign['mask_resolution'] for k in result.keys(): if 'mask' in k: masks = result[k][0] if len(masks.shape) != 4: masks = np.zeros((0, M, M)) mask_list.append(masks) continue if 'flip' in k: masks = masks[:, :, :, ::-1] mask_list.append(masks) mask_pred = np.mean(mask_list, axis=0) return {'mask': (mask_pred, [[len(mask_pred)]])} def mask_encode(results, resolution, thresh_binarize=0.5): import pycocotools.mask as mask_util from ppdet.utils.coco_eval import expand_boxes scale = (resolution + 2.0) / resolution bboxes = results['bbox'][0] masks = results['mask'][0] lengths = results['mask'][1][0] im_shapes = results['im_shape'][0] segms = [] if bboxes.shape == (1, 1) or bboxes is None: return segms if len(bboxes.tolist()) == 0: return segms s = 0 # for each sample for i in range(len(lengths)): num = lengths[i] im_shape = im_shapes[i] bbox = bboxes[s:s + num][:, 2:] clsid_scores = bboxes[s:s + num][:, 0:2] mask = masks[s:s + num] s += num im_h = int(im_shape[0]) im_w = int(im_shape[1]) expand_bbox = expand_boxes(bbox, scale) expand_bbox = expand_bbox.astype(np.int32) padded_mask = np.zeros( (resolution + 2, resolution + 2), dtype=np.float32) for j in range(num): xmin, ymin, xmax, ymax = expand_bbox[j].tolist() clsid, score = clsid_scores[j].tolist() clsid = int(clsid) padded_mask[1:-1, 1:-1] = mask[j, clsid, :, :] w = xmax - xmin + 1 h = ymax - ymin + 1 w = np.maximum(w, 1) h = np.maximum(h, 1) resized_mask = cv2.resize(padded_mask, (w, h)) resized_mask = np.array( resized_mask > thresh_binarize, dtype=np.uint8) im_mask = np.zeros((im_h, im_w), dtype=np.uint8) x0 = min(max(xmin, 0), im_w) x1 = min(max(xmax + 1, 0), im_w) y0 = min(max(ymin, 0), im_h) y1 = min(max(ymax + 1, 0), im_h) im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( x0 - xmin):(x1 - xmin)] segm = mask_util.encode( np.array( im_mask[:, :, np.newaxis], order='F'))[0] segms.append(segm) return segms