提交 4437d7b4 编写于 作者: J Jonathan Huang 提交者: TF Object Detection Team

Add support for LVIS metrics.

PiperOrigin-RevId: 339190667
上级 b1809d94
......@@ -70,6 +70,9 @@ class InputDataFields(object):
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights.
groundtruth_verified_negative_classes: groundtruth verified negative classes
groundtruth_not_exhaustive_classes: groundtruth not-exhaustively labeled
classes.
groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
......@@ -120,6 +123,8 @@ class InputDataFields(object):
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_verified_neg_classes = 'groundtruth_verified_neg_classes'
groundtruth_not_exhaustive_classes = 'groundtruth_not_exhaustive_classes'
groundtruth_weights = 'groundtruth_weights'
groundtruth_dp_num_points = 'groundtruth_dp_num_points'
groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
......
......@@ -85,6 +85,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
original_image_spatial_shapes = tf.constant([[20, 20]], dtype=tf.int32)
groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
if resized_groundtruth_masks:
groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
......@@ -100,6 +102,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
groundtruth_keypoints = tf.tile(
tf.expand_dims(groundtruth_keypoints, 0),
multiples=[batch_size, 1, 1])
original_image_spatial_shapes = tf.tile(original_image_spatial_shapes,
multiples=[batch_size, 1])
detections = {
detection_fields.detection_boxes: detection_boxes,
......@@ -112,7 +116,10 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
input_data_fields.groundtruth_instance_masks:
groundtruth_instance_masks,
input_data_fields.original_image_spatial_shape:
original_image_spatial_shapes
}
if batch_size > 1:
return eval_util.result_dict_for_batched_example(
......
......@@ -1191,18 +1191,20 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
detection_masks_batched, num_det_boxes_per_image,
original_image_spatial_shape):
"""Update op for metrics."""
for (image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box,
detection_scores, detection_classes,
detection_masks, num_det_box) in zip(
detection_masks, num_det_box, original_image_shape) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_instance_masks_batched,
groundtruth_is_crowd_batched, num_gt_boxes_per_image,
detection_scores_batched, detection_classes_batched,
detection_masks_batched, num_det_boxes_per_image):
detection_masks_batched, num_det_boxes_per_image,
original_image_spatial_shape):
self.add_single_ground_truth_image_info(
image_id, {
'groundtruth_boxes':
......@@ -1210,7 +1212,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'groundtruth_classes':
groundtruth_classes[:num_gt_box],
'groundtruth_instance_masks':
groundtruth_instance_masks[:num_gt_box],
groundtruth_instance_masks[:num_gt_box][
:original_image_shape[0], :original_image_shape[1]],
'groundtruth_is_crowd':
groundtruth_is_crowd[:num_gt_box]
})
......@@ -1218,13 +1221,16 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id, {
'detection_scores': detection_scores[:num_det_box],
'detection_classes': detection_classes[:num_det_box],
'detection_masks': detection_masks[:num_det_box]
'detection_masks': detection_masks[:num_det_box][
:original_image_shape[0], :original_image_shape[1]]
})
# Unpack items from the evaluation dictionary.
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
image_id = eval_dict[input_data_fields.key]
original_image_spatial_shape = eval_dict[
input_data_fields.original_image_spatial_shape]
groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes]
groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
groundtruth_instance_masks = eval_dict[
......@@ -1276,7 +1282,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id, groundtruth_boxes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_is_crowd,
num_gt_boxes_per_image, detection_scores, detection_classes,
detection_masks, num_det_boxes_per_image
detection_masks, num_det_boxes_per_image, original_image_spatial_shape
], [])
def get_estimator_eval_metric_ops(self, eval_dict):
......
......@@ -1601,6 +1601,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
......@@ -1612,6 +1613,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
input_data_fields.original_image_spatial_shape:
original_image_spatial_shape,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
......@@ -1637,6 +1640,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant')
]),
original_image_spatial_shape: np.array([[120, 120]]),
detection_scores:
np.array([.9, .8]),
detection_classes:
......@@ -1661,6 +1665,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
......@@ -1672,6 +1677,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
input_data_fields.original_image_spatial_shape:
original_image_spatial_shape,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
......@@ -1701,6 +1708,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant')
]),
original_image_spatial_shape: np.array([[120, 120], [120, 120]]),
detection_scores:
np.array([.9, .8]),
detection_classes:
......@@ -1725,6 +1733,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant'),
original_image_spatial_shape: np.array([[70, 70]]),
detection_scores: np.array([.8]),
detection_classes: np.array([1]),
detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8),
......@@ -1740,6 +1749,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
dtype=np.uint8),
((0, 0), (10, 10), (10, 10)),
mode='constant'),
original_image_spatial_shape: np.array([[45, 45]]),
detection_scores: np.array([.8]),
detection_classes: np.array([1]),
detection_masks: np.pad(np.ones([1, 25, 25],
......@@ -1778,6 +1788,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
groundtruth_masks = tf.placeholder(
tf.uint8, shape=(batch_size, None, None, None))
original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
detection_masks = tf.placeholder(
......@@ -1790,6 +1801,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
input_data_fields.original_image_spatial_shape:
original_image_spatial_shape,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
......@@ -1826,6 +1839,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
mode='constant')
],
axis=0),
original_image_spatial_shape: np.array(
[[100, 100], [100, 100], [100, 100]]),
detection_scores:
np.array([[.8], [.8], [.8]]),
detection_classes:
......
此差异已折叠。
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.metrics.coco_evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields as fields
from object_detection.metrics import lvis_evaluation
from object_detection.utils import tf_version
def _get_categories_list():
return [{
'id': 1,
'name': 'person',
'frequency': 'f'
}, {
'id': 2,
'name': 'dog',
'frequency': 'c'
}, {
'id': 3,
'name': 'cat',
'frequency': 'r'
}]
class LvisMaskEvaluationTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
"""Tests that mAP is calculated correctly on GT and Detections."""
masks1 = np.expand_dims(np.pad(
np.ones([100, 100], dtype=np.uint8),
((100, 56), (100, 56)), mode='constant'), axis=0)
masks2 = np.expand_dims(np.pad(
np.ones([50, 50], dtype=np.uint8),
((50, 156), (50, 156)), mode='constant'), axis=0)
masks3 = np.expand_dims(np.pad(
np.ones([25, 25], dtype=np.uint8),
((25, 206), (25, 206)), mode='constant'), axis=0)
lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(
_get_categories_list())
lvis_evaluator.add_single_ground_truth_image_info(
image_id='image1',
groundtruth_dict={
fields.InputDataFields.groundtruth_boxes:
np.array([[100., 100., 200., 200.]]),
fields.InputDataFields.groundtruth_classes: np.array([1]),
fields.InputDataFields.groundtruth_instance_masks: masks1,
fields.InputDataFields.groundtruth_verified_neg_classes:
np.array([0, 0, 0]),
fields.InputDataFields.groundtruth_not_exhaustive_classes:
np.array([0, 0, 0])
})
lvis_evaluator.add_single_detected_image_info(
image_id='image1',
detections_dict={
fields.DetectionResultFields.detection_masks: masks1,
fields.DetectionResultFields.detection_scores:
np.array([.8]),
fields.DetectionResultFields.detection_classes:
np.array([1])
})
lvis_evaluator.add_single_ground_truth_image_info(
image_id='image2',
groundtruth_dict={
fields.InputDataFields.groundtruth_boxes:
np.array([[50., 50., 100., 100.]]),
fields.InputDataFields.groundtruth_classes: np.array([1]),
fields.InputDataFields.groundtruth_instance_masks: masks2,
fields.InputDataFields.groundtruth_verified_neg_classes:
np.array([0, 0, 0]),
fields.InputDataFields.groundtruth_not_exhaustive_classes:
np.array([0, 0, 0])
})
lvis_evaluator.add_single_detected_image_info(
image_id='image2',
detections_dict={
fields.DetectionResultFields.detection_masks: masks2,
fields.DetectionResultFields.detection_scores:
np.array([.8]),
fields.DetectionResultFields.detection_classes:
np.array([1])
})
lvis_evaluator.add_single_ground_truth_image_info(
image_id='image3',
groundtruth_dict={
fields.InputDataFields.groundtruth_boxes:
np.array([[25., 25., 50., 50.]]),
fields.InputDataFields.groundtruth_classes: np.array([1]),
fields.InputDataFields.groundtruth_instance_masks: masks3,
fields.InputDataFields.groundtruth_verified_neg_classes:
np.array([0, 0, 0]),
fields.InputDataFields.groundtruth_not_exhaustive_classes:
np.array([0, 0, 0])
})
lvis_evaluator.add_single_detected_image_info(
image_id='image3',
detections_dict={
fields.DetectionResultFields.detection_masks: masks3,
fields.DetectionResultFields.detection_scores:
np.array([.8]),
fields.DetectionResultFields.detection_classes:
np.array([1])
})
metrics = lvis_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_AP'], 1.0)
@unittest.skipIf(tf_version.is_tf1(), 'Only Supported in TF2.X')
class LVISMaskEvaluationPyFuncTest(tf.test.TestCase):
def testAddEvalDict(self):
lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(_get_categories_list())
image_id = tf.constant('image1', dtype=tf.string)
groundtruth_boxes = tf.constant(
np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]),
dtype=tf.float32)
groundtruth_classes = tf.constant(np.array([1, 2]), dtype=tf.float32)
groundtruth_masks = tf.constant(np.stack([
np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)),
mode='constant'),
np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant')
]), dtype=tf.uint8)
original_image_spatial_shapes = tf.constant([[120, 120], [120, 120]],
dtype=tf.int32)
groundtruth_verified_neg_classes = tf.constant(np.array([0, 0, 0]),
dtype=tf.float32)
groundtruth_not_exhaustive_classes = tf.constant(np.array([0, 0, 0]),
dtype=tf.float32)
detection_scores = tf.constant(np.array([.9, .8]), dtype=tf.float32)
detection_classes = tf.constant(np.array([2, 1]), dtype=tf.float32)
detection_masks = tf.constant(np.stack([
np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
mode='constant'),
np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)),
mode='constant'),
]), dtype=tf.uint8)
input_data_fields = fields.InputDataFields
detection_fields = fields.DetectionResultFields
eval_dict = {
input_data_fields.key: image_id,
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_masks,
input_data_fields.groundtruth_verified_neg_classes:
groundtruth_verified_neg_classes,
input_data_fields.groundtruth_not_exhaustive_classes:
groundtruth_not_exhaustive_classes,
input_data_fields.original_image_spatial_shape:
original_image_spatial_shapes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks
}
lvis_evaluator.add_eval_dict(eval_dict)
self.assertLen(lvis_evaluator._groundtruth_list, 2)
self.assertLen(lvis_evaluator._detection_masks_list, 2)
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrappers for third party lvis to be used within object_detection.
Usage example: given a set of images with ids in the list image_ids
and corresponding lists of numpy arrays encoding groundtruth (boxes,
masks and classes) and detections (masks, scores and classes), where
elements of each list correspond to detections/annotations of a single image,
then evaluation can be invoked as follows:
groundtruth = lvis_tools.LVISWrapper(groundtruth_dict)
detections = lvis_results.LVISResults(groundtruth, detections_list)
evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections,
iou_type='segm')
summary_metrics = evaluator.ComputeMetrics()
TODO(jonathanhuang): Add support for exporting to JSON.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from lvis import eval as lvis_eval
from lvis import lvis
import numpy as np
from pycocotools import mask
import six
from six.moves import range
def RleCompress(masks):
"""Compresses mask using Run-length encoding provided by pycocotools.
Args:
masks: uint8 numpy array of shape [mask_height, mask_width] with values in
{0, 1}.
Returns:
A pycocotools Run-length encoding of the mask.
"""
rle = mask.encode(np.asfortranarray(masks))
rle['counts'] = six.ensure_str(rle['counts'])
return rle
def _ConvertBoxToCOCOFormat(box):
"""Converts a box in [ymin, xmin, ymax, xmax] format to COCO format.
This is a utility function for converting from our internal
[ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
i.e., [xmin, ymin, width, height].
Args:
box: a [ymin, xmin, ymax, xmax] numpy array
Returns:
a list of floats representing [xmin, ymin, width, height]
"""
return [float(box[1]), float(box[0]), float(box[3] - box[1]),
float(box[2] - box[0])]
class LVISWrapper(lvis.LVIS):
"""Wrapper for the lvis.LVIS class."""
def __init__(self, dataset, detection_type='bbox'):
"""LVISWrapper constructor.
See https://www.lvisdataset.org/dataset for a description of the format.
By default, the coco.COCO class constructor reads from a JSON file.
This function duplicates the same behavior but loads from a dictionary,
allowing us to perform evaluation without writing to external storage.
Args:
dataset: a dictionary holding bounding box annotations in the COCO format.
detection_type: type of detections being wrapped. Can be one of ['bbox',
'segmentation']
Raises:
ValueError: if detection_type is unsupported.
"""
self.logger = logging.getLogger(__name__)
self.logger.info('Loading annotations.')
self.dataset = dataset
self._create_index()
class LVISEvalWrapper(lvis_eval.LVISEval):
"""LVISEval wrapper."""
def __init__(self, groundtruth=None, detections=None, iou_type='bbox'):
lvis_eval.LVISEval.__init__(
self, groundtruth, detections, iou_type=iou_type)
self._iou_type = iou_type
def ComputeMetrics(self):
self.run()
summary_metrics = {}
summary_metrics = self.results
return summary_metrics
def ExportSingleImageGroundtruthToLVIS(image_id,
next_annotation_id,
category_id_set,
groundtruth_boxes,
groundtruth_classes,
groundtruth_masks=None,
groundtruth_area=None):
"""Export groundtruth of a single image to LVIS format.
This function converts groundtruth detection annotations represented as numpy
arrays to dictionaries that can be ingested by the LVIS evaluation API. Note
that the image_ids provided here must match the ones given to
ExportSingleImageDetectionMasksToLVIS. We assume that boxes, classes and masks
are in correspondence - that is, e.g., groundtruth_boxes[i, :], and
groundtruth_classes[i] are associated with the same groundtruth annotation.
In the exported result, "area" fields are always set to the area of the
groundtruth bounding box.
Args:
image_id: a unique image identifier either of type integer or string.
next_annotation_id: integer specifying the first id to use for the
groundtruth annotations. All annotations are assigned a continuous integer
id starting from this value.
category_id_set: A set of valid class ids. Groundtruth with classes not in
category_id_set are dropped.
groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
groundtruth_masks: optional uint8 numpy array of shape [num_detections,
image_height, image_width] containing detection_masks.
groundtruth_area: numpy array (float32) with shape [num_gt_boxes]. If
provided, then the area values (in the original absolute coordinates) will
be populated instead of calculated from bounding box coordinates.
Returns:
a list of groundtruth annotations for a single image in the COCO format.
Raises:
ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
right lengths or (2) if each of the elements inside these lists do not
have the correct shapes or (3) if image_ids are not integers
"""
if len(groundtruth_classes.shape) != 1:
raise ValueError('groundtruth_classes is '
'expected to be of rank 1.')
if len(groundtruth_boxes.shape) != 2:
raise ValueError('groundtruth_boxes is expected to be of '
'rank 2.')
if groundtruth_boxes.shape[1] != 4:
raise ValueError('groundtruth_boxes should have '
'shape[1] == 4.')
num_boxes = groundtruth_classes.shape[0]
if num_boxes != groundtruth_boxes.shape[0]:
raise ValueError('Corresponding entries in groundtruth_classes, '
'and groundtruth_boxes should have '
'compatible shapes (i.e., agree on the 0th dimension).'
'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
groundtruth_classes.shape[0],
groundtruth_boxes.shape[0], image_id))
groundtruth_list = []
for i in range(num_boxes):
if groundtruth_classes[i] in category_id_set:
if groundtruth_area is not None and groundtruth_area[i] > 0:
area = float(groundtruth_area[i])
else:
area = float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
(groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]))
export_dict = {
'id':
next_annotation_id + i,
'image_id':
image_id,
'category_id':
int(groundtruth_classes[i]),
'bbox':
list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
'area': area,
}
if groundtruth_masks is not None:
export_dict['segmentation'] = RleCompress(groundtruth_masks[i])
groundtruth_list.append(export_dict)
return groundtruth_list
def ExportSingleImageDetectionMasksToLVIS(image_id,
category_id_set,
detection_masks,
detection_scores,
detection_classes):
"""Export detection masks of a single image to LVIS format.
This function converts detections represented as numpy arrays to dictionaries
that can be ingested by the LVIS evaluation API. We assume that
detection_masks, detection_scores, and detection_classes are in correspondence
- that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
are associated with the same annotation.
Args:
image_id: unique image identifier either of type integer or string.
category_id_set: A set of valid class ids. Detections with classes not in
category_id_set are dropped.
detection_masks: uint8 numpy array of shape [num_detections, image_height,
image_width] containing detection_masks.
detection_scores: float numpy array of shape [num_detections] containing
scores for detection masks.
detection_classes: integer numpy array of shape [num_detections] containing
the classes for detection masks.
Returns:
a list of detection mask annotations for a single image in the COCO format.
Raises:
ValueError: if (1) detection_masks, detection_scores and detection_classes
do not have the right lengths or (2) if each of the elements inside these
lists do not have the correct shapes or (3) if image_ids are not integers.
"""
if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
raise ValueError('All entries in detection_classes and detection_scores'
'expected to be of rank 1.')
num_boxes = detection_classes.shape[0]
if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
raise ValueError('Corresponding entries in detection_classes, '
'detection_scores and detection_masks should have '
'compatible lengths and shapes '
'Classes length: %d. Masks length: %d. '
'Scores length: %d' % (
detection_classes.shape[0], len(detection_masks),
detection_scores.shape[0]
))
detections_list = []
for i in range(num_boxes):
if detection_classes[i] in category_id_set:
detections_list.append({
'image_id': image_id,
'category_id': int(detection_classes[i]),
'segmentation': RleCompress(detection_masks[i]),
'score': float(detection_scores[i])
})
return detections_list
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_model.object_detection.metrics.lvis_tools."""
from lvis import results as lvis_results
import numpy as np
from pycocotools import mask
import tensorflow.compat.v1 as tf
from object_detection.metrics import lvis_tools
class LVISToolsTest(tf.test.TestCase):
def setUp(self):
super(LVISToolsTest, self).setUp()
mask1 = np.pad(
np.ones([100, 100], dtype=np.uint8),
((100, 56), (100, 56)), mode='constant')
mask2 = np.pad(
np.ones([50, 50], dtype=np.uint8),
((50, 156), (50, 156)), mode='constant')
mask1_rle = lvis_tools.RleCompress(mask1)
mask2_rle = lvis_tools.RleCompress(mask2)
groundtruth_annotations_list = [
{
'id': 1,
'image_id': 'first',
'category_id': 1,
'bbox': [100., 100., 100., 100.],
'area': 100.**2,
'segmentation': mask1_rle
},
{
'id': 2,
'image_id': 'second',
'category_id': 1,
'bbox': [50., 50., 50., 50.],
'area': 50.**2,
'segmentation': mask2_rle
},
]
image_list = [
{
'id': 'first',
'neg_category_ids': [],
'not_exhaustive_category_ids': [],
'height': 256,
'width': 256
},
{
'id': 'second',
'neg_category_ids': [],
'not_exhaustive_category_ids': [],
'height': 256,
'width': 256
}
]
category_list = [{'id': 0, 'name': 'person', 'frequency': 'f'},
{'id': 1, 'name': 'cat', 'frequency': 'c'},
{'id': 2, 'name': 'dog', 'frequency': 'r'}]
self._groundtruth_dict = {
'annotations': groundtruth_annotations_list,
'images': image_list,
'categories': category_list
}
self._detections_list = [
{
'image_id': 'first',
'category_id': 1,
'segmentation': mask1_rle,
'score': .8
},
{
'image_id': 'second',
'category_id': 1,
'segmentation': mask2_rle,
'score': .7
},
]
def testLVISWrappers(self):
groundtruth = lvis_tools.LVISWrapper(self._groundtruth_dict)
detections = lvis_results.LVISResults(groundtruth, self._detections_list)
evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections,
iou_type='segm')
summary_metrics = evaluator.ComputeMetrics()
self.assertAlmostEqual(1.0, summary_metrics['AP'])
def testSingleImageDetectionMaskExport(self):
masks = np.array(
[[[1, 1,], [1, 1]],
[[0, 0], [0, 1]],
[[0, 0], [0, 0]]], dtype=np.uint8)
classes = np.array([1, 2, 3], dtype=np.int32)
scores = np.array([0.8, 0.2, 0.7], dtype=np.float32)
lvis_annotations = lvis_tools.ExportSingleImageDetectionMasksToLVIS(
image_id='first_image',
category_id_set=set([1, 2, 3]),
detection_classes=classes,
detection_scores=scores,
detection_masks=masks)
expected_counts = ['04', '31', '4']
for i, mask_annotation in enumerate(lvis_annotations):
self.assertEqual(mask_annotation['segmentation']['counts'],
expected_counts[i])
self.assertTrue(np.all(np.equal(mask.decode(
mask_annotation['segmentation']), masks[i])))
self.assertEqual(mask_annotation['image_id'], 'first_image')
self.assertEqual(mask_annotation['category_id'], classes[i])
self.assertAlmostEqual(mask_annotation['score'], scores[i])
def testSingleImageGroundtruthExport(self):
masks = np.array(
[[[1, 1,], [1, 1]],
[[0, 0], [0, 1]],
[[0, 0], [0, 0]]], dtype=np.uint8)
boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5],
[.5, .5, 1, 1]], dtype=np.float32)
lvis_boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5],
[.5, .5, .5, .5]], dtype=np.float32)
classes = np.array([1, 2, 3], dtype=np.int32)
next_annotation_id = 1
expected_counts = ['04', '31', '4']
lvis_annotations = lvis_tools.ExportSingleImageGroundtruthToLVIS(
image_id='first_image',
category_id_set=set([1, 2, 3]),
next_annotation_id=next_annotation_id,
groundtruth_boxes=boxes,
groundtruth_classes=classes,
groundtruth_masks=masks)
for i, annotation in enumerate(lvis_annotations):
self.assertEqual(annotation['segmentation']['counts'],
expected_counts[i])
self.assertTrue(np.all(np.equal(mask.decode(
annotation['segmentation']), masks[i])))
self.assertTrue(np.all(np.isclose(annotation['bbox'], lvis_boxes[i])))
self.assertEqual(annotation['image_id'], 'first_image')
self.assertEqual(annotation['category_id'], classes[i])
self.assertEqual(annotation['id'], i + next_annotation_id)
if __name__ == '__main__':
tf.test.main()
......@@ -4,8 +4,8 @@ from setuptools import find_packages
from setuptools import setup
REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython',
'contextlib2', 'tf-slim', 'six', 'pycocotools', 'scipy',
'pandas']
'contextlib2', 'tf-slim', 'six', 'pycocotools', 'lvis',
'scipy', 'pandas']
setup(
name='object_detection',
......
......@@ -18,6 +18,7 @@ REQUIRED_PACKAGES = [
'tf-slim',
'six',
'pycocotools',
'lvis',
'scipy',
'pandas',
'tf-models-official'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册