diff --git a/util/eval_tool.py b/util/eval_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..91a3365895951a90c6c3f36d7af99b3cc4c01766 --- /dev/null +++ b/util/eval_tool.py @@ -0,0 +1,304 @@ +from __future__ import division + +from collections import defaultdict +import itertools +import numpy as np +import six + +from model.utils.bbox_tools import bbox_iou + + +def eval_detection_voc( + pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, + gt_difficults=None, + iou_thresh=0.5, use_07_metric=False): + """Calculate average precisions based on evaluation code of PASCAL VOC. + + This function evaluates predicted bounding boxes obtained from a dataset + which has :math:`N` images by using average precision for each class. + The code is based on the evaluation code used in PASCAL VOC Challenge. + + Args: + pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` + sets of bounding boxes. + Its index corresponds to an index for the base dataset. + Each element of :obj:`pred_bboxes` is a set of coordinates + of bounding boxes. This is an array whose shape is :math:`(R, 4)`, + where :math:`R` corresponds + to the number of bounding boxes, which may vary among boxes. + The second axis corresponds to + :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. + pred_labels (iterable of numpy.ndarray): An iterable of labels. + Similar to :obj:`pred_bboxes`, its index corresponds to an + index for the base dataset. Its length is :math:`N`. + pred_scores (iterable of numpy.ndarray): An iterable of confidence + scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, + its index corresponds to an index for the base dataset. + Its length is :math:`N`. + gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth + bounding boxes + whose length is :math:`N`. An element of :obj:`gt_bboxes` is a + bounding box whose shape is :math:`(R, 4)`. Note that the number of + bounding boxes in each image does not need to be same as the number + of corresponding predicted boxes. + gt_labels (iterable of numpy.ndarray): An iterable of ground truth + labels which are organized similarly to :obj:`gt_bboxes`. + gt_difficults (iterable of numpy.ndarray): An iterable of boolean + arrays which is organized similarly to :obj:`gt_bboxes`. + This tells whether the + corresponding ground truth bounding box is difficult or not. + By default, this is :obj:`None`. In that case, this function + considers all bounding boxes to be not difficult. + iou_thresh (float): A prediction is correct if its Intersection over + Union with the ground truth is above this value. + use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric + for calculating average precision. The default value is + :obj:`False`. + + Returns: + dict: + + The keys, value-types and the description of the values are listed + below. + + * **ap** (*numpy.ndarray*): An array of average precisions. \ + The :math:`l`-th value corresponds to the average precision \ + for class :math:`l`. If class :math:`l` does not exist in \ + either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \ + value is set to :obj:`numpy.nan`. + * **map** (*float*): The average of Average Precisions over classes. + + """ + + prec, rec = calc_detection_voc_prec_rec( + pred_bboxes, pred_labels, pred_scores, + gt_bboxes, gt_labels, gt_difficults, + iou_thresh=iou_thresh) + + ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric) + + return {'ap': ap, 'map': np.nanmean(ap)} + + +def calc_detection_voc_prec_rec( + pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, + gt_difficults=None, + iou_thresh=0.5): + """Calculate precision and recall based on evaluation code of PASCAL VOC. + + This function calculates precision and recall of + predicted bounding boxes obtained from a dataset which has :math:`N` + images. + The code is based on the evaluation code used in PASCAL VOC Challenge. + + Args: + pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` + sets of bounding boxes. + Its index corresponds to an index for the base dataset. + Each element of :obj:`pred_bboxes` is a set of coordinates + of bounding boxes. This is an array whose shape is :math:`(R, 4)`, + where :math:`R` corresponds + to the number of bounding boxes, which may vary among boxes. + The second axis corresponds to + :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. + pred_labels (iterable of numpy.ndarray): An iterable of labels. + Similar to :obj:`pred_bboxes`, its index corresponds to an + index for the base dataset. Its length is :math:`N`. + pred_scores (iterable of numpy.ndarray): An iterable of confidence + scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, + its index corresponds to an index for the base dataset. + Its length is :math:`N`. + gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth + bounding boxes + whose length is :math:`N`. An element of :obj:`gt_bboxes` is a + bounding box whose shape is :math:`(R, 4)`. Note that the number of + bounding boxes in each image does not need to be same as the number + of corresponding predicted boxes. + gt_labels (iterable of numpy.ndarray): An iterable of ground truth + labels which are organized similarly to :obj:`gt_bboxes`. + gt_difficults (iterable of numpy.ndarray): An iterable of boolean + arrays which is organized similarly to :obj:`gt_bboxes`. + This tells whether the + corresponding ground truth bounding box is difficult or not. + By default, this is :obj:`None`. In that case, this function + considers all bounding boxes to be not difficult. + iou_thresh (float): A prediction is correct if its Intersection over + Union with the ground truth is above this value.. + + Returns: + tuple of two lists: + This function returns two lists: :obj:`prec` and :obj:`rec`. + + * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ + for class :math:`l`. If class :math:`l` does not exist in \ + either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ + set to :obj:`None`. + * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ + for class :math:`l`. If class :math:`l` that is not marked as \ + difficult does not exist in \ + :obj:`gt_labels`, :obj:`rec[l]` is \ + set to :obj:`None`. + + """ + + pred_bboxes = iter(pred_bboxes) + pred_labels = iter(pred_labels) + pred_scores = iter(pred_scores) + gt_bboxes = iter(gt_bboxes) + gt_labels = iter(gt_labels) + if gt_difficults is None: + gt_difficults = itertools.repeat(None) + else: + gt_difficults = iter(gt_difficults) + + n_pos = defaultdict(int) + score = defaultdict(list) + match = defaultdict(list) + + for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ + six.moves.zip( + pred_bboxes, pred_labels, pred_scores, + gt_bboxes, gt_labels, gt_difficults): + + if gt_difficult is None: + gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) + + for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): + pred_mask_l = pred_label == l + pred_bbox_l = pred_bbox[pred_mask_l] + pred_score_l = pred_score[pred_mask_l] + # sort by score + order = pred_score_l.argsort()[::-1] + pred_bbox_l = pred_bbox_l[order] + pred_score_l = pred_score_l[order] + + gt_mask_l = gt_label == l + gt_bbox_l = gt_bbox[gt_mask_l] + gt_difficult_l = gt_difficult[gt_mask_l] + + n_pos[l] += np.logical_not(gt_difficult_l).sum() + score[l].extend(pred_score_l) + + if len(pred_bbox_l) == 0: + continue + if len(gt_bbox_l) == 0: + match[l].extend((0,) * pred_bbox_l.shape[0]) + continue + + # VOC evaluation follows integer typed bounding boxes. + pred_bbox_l = pred_bbox_l.copy() + pred_bbox_l[:, 2:] += 1 + gt_bbox_l = gt_bbox_l.copy() + gt_bbox_l[:, 2:] += 1 + + iou = bbox_iou(pred_bbox_l, gt_bbox_l) + gt_index = iou.argmax(axis=1) + # set -1 if there is no matching ground truth + gt_index[iou.max(axis=1) < iou_thresh] = -1 + del iou + + selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) + for gt_idx in gt_index: + if gt_idx >= 0: + if gt_difficult_l[gt_idx]: + match[l].append(-1) + else: + if not selec[gt_idx]: + match[l].append(1) + else: + match[l].append(0) + selec[gt_idx] = True + else: + match[l].append(0) + + for iter_ in ( + pred_bboxes, pred_labels, pred_scores, + gt_bboxes, gt_labels, gt_difficults): + if next(iter_, None) is not None: + raise ValueError('Length of input iterables need to be same.') + + n_fg_class = max(n_pos.keys()) + 1 + prec = [None] * n_fg_class + rec = [None] * n_fg_class + + for l in n_pos.keys(): + score_l = np.array(score[l]) + match_l = np.array(match[l], dtype=np.int8) + + order = score_l.argsort()[::-1] + match_l = match_l[order] + + tp = np.cumsum(match_l == 1) + fp = np.cumsum(match_l == 0) + + # If an element of fp + tp is 0, + # the corresponding element of prec[l] is nan. + prec[l] = tp / (fp + tp) + # If n_pos[l] is 0, rec[l] is None. + if n_pos[l] > 0: + rec[l] = tp / n_pos[l] + + return prec, rec + + +def calc_detection_voc_ap(prec, rec, use_07_metric=False): + """Calculate average precisions based on evaluation code of PASCAL VOC. + + This function calculates average precisions + from given precisions and recalls. + The code is based on the evaluation code used in PASCAL VOC Challenge. + + Args: + prec (list of numpy.array): A list of arrays. + :obj:`prec[l]` indicates precision for class :math:`l`. + If :obj:`prec[l]` is :obj:`None`, this function returns + :obj:`numpy.nan` for class :math:`l`. + rec (list of numpy.array): A list of arrays. + :obj:`rec[l]` indicates recall for class :math:`l`. + If :obj:`rec[l]` is :obj:`None`, this function returns + :obj:`numpy.nan` for class :math:`l`. + use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric + for calculating average precision. The default value is + :obj:`False`. + + Returns: + ~numpy.ndarray: + This function returns an array of average precisions. + The :math:`l`-th value corresponds to the average precision + for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is + :obj:`None`, the corresponding value is set to :obj:`numpy.nan`. + + """ + + n_fg_class = len(prec) + ap = np.empty(n_fg_class) + for l in six.moves.range(n_fg_class): + if prec[l] is None or rec[l] is None: + ap[l] = np.nan + continue + + if use_07_metric: + # 11 point metric + ap[l] = 0 + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec[l] >= t) == 0: + p = 0 + else: + p = np.max(np.nan_to_num(prec[l])[rec[l] >= t]) + ap[l] += p / 11 + else: + # correct AP calculation + # first append sentinel values at the end + mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0])) + mrec = np.concatenate(([0], rec[l], [1])) + + mpre = np.maximum.accumulate(mpre[::-1])[::-1] + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + + return ap