Add support for LVIS metrics.

PiperOrigin-RevId: 339190667

Add support for LVIS metrics.
PiperOrigin-RevId: 339190667
4437d7b4 · Jonathan Huang · TF Object Detection Team · b1809d94 · 4437d7b4 · 4437d7b4
10 changed file
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -70,6 +70,9 @@ class InputDataFields(object):
    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
    groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
    groundtruth_label_weights: groundtruth label weights.
+    groundtruth_verified_negative_classes: groundtruth verified negative classes
+    groundtruth_not_exhaustive_classes: groundtruth not-exhaustively labeled
+      classes.
    groundtruth_weights: groundtruth weight factor for bounding boxes.
    groundtruth_dp_num_points: The number of DensePose sampled points for each
      instance.
@@ -120,6 +123,8 @@ class InputDataFields(object):
  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
  groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
  groundtruth_label_weights = 'groundtruth_label_weights'
+  groundtruth_verified_neg_classes = 'groundtruth_verified_neg_classes'
+  groundtruth_not_exhaustive_classes = 'groundtruth_not_exhaustive_classes'
  groundtruth_weights = 'groundtruth_weights'
  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'

--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -85,6 +85,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
    groundtruth_classes = tf.constant([1])
    groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
+    original_image_spatial_shapes = tf.constant([[20, 20]], dtype=tf.int32)
+
    groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
    if resized_groundtruth_masks:
      groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
@@ -100,6 +102,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
      groundtruth_keypoints = tf.tile(
          tf.expand_dims(groundtruth_keypoints, 0),
          multiples=[batch_size, 1, 1])
+      original_image_spatial_shapes = tf.tile(original_image_spatial_shapes,
+                                              multiples=[batch_size, 1])

    detections = {
        detection_fields.detection_boxes: detection_boxes,
@@ -112,7 +116,10 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
-        input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
+        input_data_fields.groundtruth_instance_masks:
+            groundtruth_instance_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shapes
    }
    if batch_size > 1:
      return eval_util.result_dict_for_batched_example(

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -1191,18 +1191,20 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                  groundtruth_instance_masks_batched,
                  groundtruth_is_crowd_batched, num_gt_boxes_per_image,
                  detection_scores_batched, detection_classes_batched,
-                  detection_masks_batched, num_det_boxes_per_image):
+                  detection_masks_batched, num_det_boxes_per_image,
+                  original_image_spatial_shape):
      """Update op for metrics."""

      for (image_id, groundtruth_boxes, groundtruth_classes,
           groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box,
           detection_scores, detection_classes,
-           detection_masks, num_det_box) in zip(
+           detection_masks, num_det_box, original_image_shape) in zip(
               image_id_batched, groundtruth_boxes_batched,
               groundtruth_classes_batched, groundtruth_instance_masks_batched,
               groundtruth_is_crowd_batched, num_gt_boxes_per_image,
               detection_scores_batched, detection_classes_batched,
-               detection_masks_batched, num_det_boxes_per_image):
+               detection_masks_batched, num_det_boxes_per_image,
+               original_image_spatial_shape):
        self.add_single_ground_truth_image_info(
            image_id, {
                'groundtruth_boxes':
@@ -1210,7 +1212,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                'groundtruth_classes':
                    groundtruth_classes[:num_gt_box],
                'groundtruth_instance_masks':
-                    groundtruth_instance_masks[:num_gt_box],
+                    groundtruth_instance_masks[:num_gt_box][
+                        :original_image_shape[0], :original_image_shape[1]],
                'groundtruth_is_crowd':
                    groundtruth_is_crowd[:num_gt_box]
            })
@@ -1218,13 +1221,16 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
            image_id, {
                'detection_scores': detection_scores[:num_det_box],
                'detection_classes': detection_classes[:num_det_box],
-                'detection_masks': detection_masks[:num_det_box]
+                'detection_masks': detection_masks[:num_det_box][
+                    :original_image_shape[0], :original_image_shape[1]]
            })

    # Unpack items from the evaluation dictionary.
    input_data_fields = standard_fields.InputDataFields
    detection_fields = standard_fields.DetectionResultFields
    image_id = eval_dict[input_data_fields.key]
+    original_image_spatial_shape = eval_dict[
+        input_data_fields.original_image_spatial_shape]
    groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes]
    groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
    groundtruth_instance_masks = eval_dict[
@@ -1276,7 +1282,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        image_id, groundtruth_boxes, groundtruth_classes,
        groundtruth_instance_masks, groundtruth_is_crowd,
        num_gt_boxes_per_image, detection_scores, detection_classes,
-        detection_masks, num_det_boxes_per_image
+        detection_masks, num_det_boxes_per_image, original_image_spatial_shape
    ], [])

  def get_estimator_eval_metric_ops(self, eval_dict):

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -1601,6 +1601,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
    groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
    groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
    groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
    detection_scores = tf.placeholder(tf.float32, shape=(None))
    detection_classes = tf.placeholder(tf.float32, shape=(None))
    detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
@@ -1612,6 +1613,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
@@ -1637,6 +1640,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
                          np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
                          mode='constant')
                  ]),
+              original_image_spatial_shape: np.array([[120, 120]]),
              detection_scores:
                  np.array([.9, .8]),
              detection_classes:
@@ -1661,6 +1665,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
    groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
    groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
    groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
    detection_scores = tf.placeholder(tf.float32, shape=(None))
    detection_classes = tf.placeholder(tf.float32, shape=(None))
    detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
@@ -1672,6 +1677,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
@@ -1701,6 +1708,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
                          np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
                          mode='constant')
                  ]),
+              original_image_spatial_shape: np.array([[120, 120], [120, 120]]),
              detection_scores:
                  np.array([.9, .8]),
              detection_classes:
@@ -1725,6 +1733,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
                                                     dtype=np.uint8),
                                             ((0, 0), (10, 10), (10, 10)),
                                             mode='constant'),
+                   original_image_spatial_shape: np.array([[70, 70]]),
                   detection_scores: np.array([.8]),
                   detection_classes: np.array([1]),
                   detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8),
@@ -1740,6 +1749,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
                                                     dtype=np.uint8),
                                             ((0, 0), (10, 10), (10, 10)),
                                             mode='constant'),
+                   original_image_spatial_shape: np.array([[45, 45]]),
                   detection_scores: np.array([.8]),
                   detection_classes: np.array([1]),
                   detection_masks: np.pad(np.ones([1, 25, 25],
@@ -1778,6 +1788,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
    groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
    groundtruth_masks = tf.placeholder(
        tf.uint8, shape=(batch_size, None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
    detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
    detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
    detection_masks = tf.placeholder(
@@ -1790,6 +1801,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
@@ -1826,6 +1839,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
                          mode='constant')
                  ],
                           axis=0),
+              original_image_spatial_shape: np.array(
+                  [[100, 100], [100, 100], [100, 100]]),
              detection_scores:
                  np.array([[.8], [.8], [.8]]),
              detection_classes:

--- a/research/object_detection/metrics/lvis_evaluation.py
+++ b/research/object_detection/metrics/lvis_evaluation.py
--- a/research/object_detection/metrics/lvis_evaluation_test.py
+++ b/research/object_detection/metrics/lvis_evaluation_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow_models.object_detection.metrics.coco_evaluation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+from object_detection.core import standard_fields as fields
+from object_detection.metrics import lvis_evaluation
+from object_detection.utils import tf_version
+
+
+def _get_categories_list():
+  return [{
+      'id': 1,
+      'name': 'person',
+      'frequency': 'f'
+  }, {
+      'id': 2,
+      'name': 'dog',
+      'frequency': 'c'
+  }, {
+      'id': 3,
+      'name': 'cat',
+      'frequency': 'r'
+  }]
+
+
+class LvisMaskEvaluationTest(tf.test.TestCase):
+
+  def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
+    """Tests that mAP is calculated correctly on GT and Detections."""
+    masks1 = np.expand_dims(np.pad(
+        np.ones([100, 100], dtype=np.uint8),
+        ((100, 56), (100, 56)), mode='constant'), axis=0)
+    masks2 = np.expand_dims(np.pad(
+        np.ones([50, 50], dtype=np.uint8),
+        ((50, 156), (50, 156)), mode='constant'), axis=0)
+    masks3 = np.expand_dims(np.pad(
+        np.ones([25, 25], dtype=np.uint8),
+        ((25, 206), (25, 206)), mode='constant'), axis=0)
+
+    lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(
+        _get_categories_list())
+    lvis_evaluator.add_single_ground_truth_image_info(
+        image_id='image1',
+        groundtruth_dict={
+            fields.InputDataFields.groundtruth_boxes:
+                np.array([[100., 100., 200., 200.]]),
+            fields.InputDataFields.groundtruth_classes: np.array([1]),
+            fields.InputDataFields.groundtruth_instance_masks: masks1,
+            fields.InputDataFields.groundtruth_verified_neg_classes:
+                np.array([0, 0, 0]),
+            fields.InputDataFields.groundtruth_not_exhaustive_classes:
+                np.array([0, 0, 0])
+        })
+    lvis_evaluator.add_single_detected_image_info(
+        image_id='image1',
+        detections_dict={
+            fields.DetectionResultFields.detection_masks: masks1,
+            fields.DetectionResultFields.detection_scores:
+            np.array([.8]),
+            fields.DetectionResultFields.detection_classes:
+            np.array([1])
+        })
+    lvis_evaluator.add_single_ground_truth_image_info(
+        image_id='image2',
+        groundtruth_dict={
+            fields.InputDataFields.groundtruth_boxes:
+            np.array([[50., 50., 100., 100.]]),
+            fields.InputDataFields.groundtruth_classes: np.array([1]),
+            fields.InputDataFields.groundtruth_instance_masks: masks2,
+            fields.InputDataFields.groundtruth_verified_neg_classes:
+                np.array([0, 0, 0]),
+            fields.InputDataFields.groundtruth_not_exhaustive_classes:
+                np.array([0, 0, 0])
+        })
+    lvis_evaluator.add_single_detected_image_info(
+        image_id='image2',
+        detections_dict={
+            fields.DetectionResultFields.detection_masks: masks2,
+            fields.DetectionResultFields.detection_scores:
+            np.array([.8]),
+            fields.DetectionResultFields.detection_classes:
+            np.array([1])
+        })
+    lvis_evaluator.add_single_ground_truth_image_info(
+        image_id='image3',
+        groundtruth_dict={
+            fields.InputDataFields.groundtruth_boxes:
+            np.array([[25., 25., 50., 50.]]),
+            fields.InputDataFields.groundtruth_classes: np.array([1]),
+            fields.InputDataFields.groundtruth_instance_masks: masks3,
+            fields.InputDataFields.groundtruth_verified_neg_classes:
+                np.array([0, 0, 0]),
+            fields.InputDataFields.groundtruth_not_exhaustive_classes:
+                np.array([0, 0, 0])
+        })
+    lvis_evaluator.add_single_detected_image_info(
+        image_id='image3',
+        detections_dict={
+            fields.DetectionResultFields.detection_masks: masks3,
+            fields.DetectionResultFields.detection_scores:
+            np.array([.8]),
+            fields.DetectionResultFields.detection_classes:
+            np.array([1])
+        })
+    metrics = lvis_evaluator.evaluate()
+    self.assertAlmostEqual(metrics['DetectionMasks_AP'], 1.0)
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Only Supported in TF2.X')
+class LVISMaskEvaluationPyFuncTest(tf.test.TestCase):
+
+  def testAddEvalDict(self):
+    lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(_get_categories_list())
+    image_id = tf.constant('image1', dtype=tf.string)
+    groundtruth_boxes = tf.constant(
+        np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]),
+        dtype=tf.float32)
+    groundtruth_classes = tf.constant(np.array([1, 2]), dtype=tf.float32)
+    groundtruth_masks = tf.constant(np.stack([
+        np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)),
+               mode='constant'),
+        np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
+               mode='constant')
+    ]), dtype=tf.uint8)
+    original_image_spatial_shapes = tf.constant([[120, 120], [120, 120]],
+                                                dtype=tf.int32)
+    groundtruth_verified_neg_classes = tf.constant(np.array([0, 0, 0]),
+                                                   dtype=tf.float32)
+    groundtruth_not_exhaustive_classes = tf.constant(np.array([0, 0, 0]),
+                                                     dtype=tf.float32)
+    detection_scores = tf.constant(np.array([.9, .8]), dtype=tf.float32)
+    detection_classes = tf.constant(np.array([2, 1]), dtype=tf.float32)
+    detection_masks = tf.constant(np.stack([
+        np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
+               mode='constant'),
+        np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)),
+               mode='constant'),
+    ]), dtype=tf.uint8)
+
+    input_data_fields = fields.InputDataFields
+    detection_fields = fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key: image_id,
+        input_data_fields.groundtruth_boxes: groundtruth_boxes,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.groundtruth_verified_neg_classes:
+            groundtruth_verified_neg_classes,
+        input_data_fields.groundtruth_not_exhaustive_classes:
+            groundtruth_not_exhaustive_classes,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shapes,
+        detection_fields.detection_scores: detection_scores,
+        detection_fields.detection_classes: detection_classes,
+        detection_fields.detection_masks: detection_masks
+    }
+    lvis_evaluator.add_eval_dict(eval_dict)
+    self.assertLen(lvis_evaluator._groundtruth_list, 2)
+    self.assertLen(lvis_evaluator._detection_masks_list, 2)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/metrics/lvis_tools.py
+++ b/research/object_detection/metrics/lvis_tools.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Wrappers for third party lvis to be used within object_detection.
+
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes,
+masks and classes) and detections (masks, scores and classes), where
+elements of each list correspond to detections/annotations of a single image,
+then evaluation can be invoked as follows:
+
+  groundtruth = lvis_tools.LVISWrapper(groundtruth_dict)
+    detections = lvis_results.LVISResults(groundtruth, detections_list)
+    evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections,
+      iou_type='segm')
+    summary_metrics = evaluator.ComputeMetrics()
+
+TODO(jonathanhuang): Add support for exporting to JSON.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+
+from lvis import eval as lvis_eval
+from lvis import lvis
+import numpy as np
+from pycocotools import mask
+import six
+from six.moves import range
+
+
+def RleCompress(masks):
+  """Compresses mask using Run-length encoding provided by pycocotools.
+
+  Args:
+    masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+    {0, 1}.
+
+  Returns:
+    A pycocotools Run-length encoding of the mask.
+  """
+  rle = mask.encode(np.asfortranarray(masks))
+  rle['counts'] = six.ensure_str(rle['counts'])
+  return rle
+
+
+def _ConvertBoxToCOCOFormat(box):
+  """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format.
+
+  This is a utility function for converting from our internal
+  [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+  i.e., [xmin, ymin, width, height].
+
+  Args:
+    box: a [ymin, xmin, ymax, xmax] numpy array
+
+  Returns:
+    a list of floats representing [xmin, ymin, width, height]
+  """
+  return [float(box[1]), float(box[0]), float(box[3] - box[1]),
+          float(box[2] - box[0])]
+
+
+class LVISWrapper(lvis.LVIS):
+  """Wrapper for the lvis.LVIS class."""
+
+  def __init__(self, dataset, detection_type='bbox'):
+    """LVISWrapper constructor.
+
+    See https://www.lvisdataset.org/dataset for a description of the format.
+    By default, the coco.COCO class constructor reads from a JSON file.
+    This function duplicates the same behavior but loads from a dictionary,
+    allowing us to perform evaluation without writing to external storage.
+
+    Args:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+
+    Raises:
+      ValueError: if detection_type is unsupported.
+    """
+    self.logger = logging.getLogger(__name__)
+    self.logger.info('Loading annotations.')
+    self.dataset = dataset
+    self._create_index()
+
+
+class LVISEvalWrapper(lvis_eval.LVISEval):
+  """LVISEval wrapper."""
+
+  def __init__(self, groundtruth=None, detections=None, iou_type='bbox'):
+    lvis_eval.LVISEval.__init__(
+        self, groundtruth, detections, iou_type=iou_type)
+    self._iou_type = iou_type
+
+  def ComputeMetrics(self):
+    self.run()
+    summary_metrics = {}
+    summary_metrics = self.results
+    return summary_metrics
+
+
+def ExportSingleImageGroundtruthToLVIS(image_id,
+                                       next_annotation_id,
+                                       category_id_set,
+                                       groundtruth_boxes,
+                                       groundtruth_classes,
+                                       groundtruth_masks=None,
+                                       groundtruth_area=None):
+  """Export groundtruth of a single image to LVIS format.
+
+  This function converts groundtruth detection annotations represented as numpy
+  arrays to dictionaries that can be ingested by the LVIS evaluation API. Note
+  that the image_ids provided here must match the ones given to
+  ExportSingleImageDetectionMasksToLVIS. We assume that boxes, classes and masks
+  are in correspondence - that is, e.g., groundtruth_boxes[i, :], and
+  groundtruth_classes[i] are associated with the same groundtruth annotation.
+
+  In the exported result, "area" fields are always set to the area of the
+  groundtruth bounding box.
+
+  Args:
+    image_id: a unique image identifier either of type integer or string.
+    next_annotation_id: integer specifying the first id to use for the
+      groundtruth annotations. All annotations are assigned a continuous integer
+      id starting from this value.
+    category_id_set: A set of valid class ids. Groundtruth with classes not in
+      category_id_set are dropped.
+    groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+    groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+    groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+      image_height, image_width] containing detection_masks.
+    groundtruth_area: numpy array (float32) with shape [num_gt_boxes]. If
+      provided, then the area values (in the original absolute coordinates) will
+      be populated instead of calculated from bounding box coordinates.
+
+  Returns:
+    a list of groundtruth annotations for a single image in the COCO format.
+
+  Raises:
+    ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+      right lengths or (2) if each of the elements inside these lists do not
+      have the correct shapes or (3) if image_ids are not integers
+  """
+
+  if len(groundtruth_classes.shape) != 1:
+    raise ValueError('groundtruth_classes is '
+                     'expected to be of rank 1.')
+  if len(groundtruth_boxes.shape) != 2:
+    raise ValueError('groundtruth_boxes is expected to be of '
+                     'rank 2.')
+  if groundtruth_boxes.shape[1] != 4:
+    raise ValueError('groundtruth_boxes should have '
+                     'shape[1] == 4.')
+  num_boxes = groundtruth_classes.shape[0]
+  if num_boxes != groundtruth_boxes.shape[0]:
+    raise ValueError('Corresponding entries in groundtruth_classes, '
+                     'and groundtruth_boxes should have '
+                     'compatible shapes (i.e., agree on the 0th dimension).'
+                     'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
+                         groundtruth_classes.shape[0],
+                         groundtruth_boxes.shape[0], image_id))
+
+  groundtruth_list = []
+  for i in range(num_boxes):
+    if groundtruth_classes[i] in category_id_set:
+      if groundtruth_area is not None and groundtruth_area[i] > 0:
+        area = float(groundtruth_area[i])
+      else:
+        area = float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                     (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]))
+      export_dict = {
+          'id':
+              next_annotation_id + i,
+          'image_id':
+              image_id,
+          'category_id':
+              int(groundtruth_classes[i]),
+          'bbox':
+              list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+          'area': area,
+      }
+      if groundtruth_masks is not None:
+        export_dict['segmentation'] = RleCompress(groundtruth_masks[i])
+
+      groundtruth_list.append(export_dict)
+  return groundtruth_list
+
+
+def ExportSingleImageDetectionMasksToLVIS(image_id,
+                                          category_id_set,
+                                          detection_masks,
+                                          detection_scores,
+                                          detection_classes):
+  """Export detection masks of a single image to LVIS format.
+
+  This function converts detections represented as numpy arrays to dictionaries
+  that can be ingested by the LVIS evaluation API. We assume that
+  detection_masks, detection_scores, and detection_classes are in correspondence
+  - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+    are associated with the same annotation.
+
+  Args:
+    image_id: unique image identifier either of type integer or string.
+    category_id_set: A set of valid class ids. Detections with classes not in
+      category_id_set are dropped.
+    detection_masks: uint8 numpy array of shape [num_detections, image_height,
+      image_width] containing detection_masks.
+    detection_scores: float numpy array of shape [num_detections] containing
+      scores for detection masks.
+    detection_classes: integer numpy array of shape [num_detections] containing
+      the classes for detection masks.
+
+  Returns:
+    a list of detection mask annotations for a single image in the COCO format.
+
+  Raises:
+    ValueError: if (1) detection_masks, detection_scores and detection_classes
+      do not have the right lengths or (2) if each of the elements inside these
+      lists do not have the correct shapes or (3) if image_ids are not integers.
+  """
+
+  if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+    raise ValueError('All entries in detection_classes and detection_scores'
+                     'expected to be of rank 1.')
+  num_boxes = detection_classes.shape[0]
+  if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+    raise ValueError('Corresponding entries in detection_classes, '
+                     'detection_scores and detection_masks should have '
+                     'compatible lengths and shapes '
+                     'Classes length: %d.  Masks length: %d. '
+                     'Scores length: %d' % (
+                         detection_classes.shape[0], len(detection_masks),
+                         detection_scores.shape[0]
+                     ))
+  detections_list = []
+  for i in range(num_boxes):
+    if detection_classes[i] in category_id_set:
+      detections_list.append({
+          'image_id': image_id,
+          'category_id': int(detection_classes[i]),
+          'segmentation': RleCompress(detection_masks[i]),
+          'score': float(detection_scores[i])
+      })
+  return detections_list
--- a/research/object_detection/metrics/lvis_tools_test.py
+++ b/research/object_detection/metrics/lvis_tools_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow_model.object_detection.metrics.lvis_tools."""
+from lvis import results as lvis_results
+import numpy as np
+from pycocotools import mask
+import tensorflow.compat.v1 as tf
+from object_detection.metrics import lvis_tools
+
+
+class LVISToolsTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(LVISToolsTest, self).setUp()
+    mask1 = np.pad(
+        np.ones([100, 100], dtype=np.uint8),
+        ((100, 56), (100, 56)), mode='constant')
+    mask2 = np.pad(
+        np.ones([50, 50], dtype=np.uint8),
+        ((50, 156), (50, 156)), mode='constant')
+    mask1_rle = lvis_tools.RleCompress(mask1)
+    mask2_rle = lvis_tools.RleCompress(mask2)
+    groundtruth_annotations_list = [
+        {
+            'id': 1,
+            'image_id': 'first',
+            'category_id': 1,
+            'bbox': [100., 100., 100., 100.],
+            'area': 100.**2,
+            'segmentation': mask1_rle
+        },
+        {
+            'id': 2,
+            'image_id': 'second',
+            'category_id': 1,
+            'bbox': [50., 50., 50., 50.],
+            'area': 50.**2,
+            'segmentation': mask2_rle
+        },
+    ]
+    image_list = [
+        {
+            'id': 'first',
+            'neg_category_ids': [],
+            'not_exhaustive_category_ids': [],
+            'height': 256,
+            'width': 256
+        },
+        {
+            'id': 'second',
+            'neg_category_ids': [],
+            'not_exhaustive_category_ids': [],
+            'height': 256,
+            'width': 256
+        }
+    ]
+    category_list = [{'id': 0, 'name': 'person', 'frequency': 'f'},
+                     {'id': 1, 'name': 'cat', 'frequency': 'c'},
+                     {'id': 2, 'name': 'dog', 'frequency': 'r'}]
+    self._groundtruth_dict = {
+        'annotations': groundtruth_annotations_list,
+        'images': image_list,
+        'categories': category_list
+    }
+
+    self._detections_list = [
+        {
+            'image_id': 'first',
+            'category_id': 1,
+            'segmentation': mask1_rle,
+            'score': .8
+        },
+        {
+            'image_id': 'second',
+            'category_id': 1,
+            'segmentation': mask2_rle,
+            'score': .7
+        },
+    ]
+
+  def testLVISWrappers(self):
+    groundtruth = lvis_tools.LVISWrapper(self._groundtruth_dict)
+    detections = lvis_results.LVISResults(groundtruth, self._detections_list)
+    evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections,
+                                           iou_type='segm')
+    summary_metrics = evaluator.ComputeMetrics()
+    self.assertAlmostEqual(1.0, summary_metrics['AP'])
+
+  def testSingleImageDetectionMaskExport(self):
+    masks = np.array(
+        [[[1, 1,], [1, 1]],
+         [[0, 0], [0, 1]],
+         [[0, 0], [0, 0]]], dtype=np.uint8)
+    classes = np.array([1, 2, 3], dtype=np.int32)
+    scores = np.array([0.8, 0.2, 0.7], dtype=np.float32)
+    lvis_annotations = lvis_tools.ExportSingleImageDetectionMasksToLVIS(
+        image_id='first_image',
+        category_id_set=set([1, 2, 3]),
+        detection_classes=classes,
+        detection_scores=scores,
+        detection_masks=masks)
+    expected_counts = ['04', '31', '4']
+    for i, mask_annotation in enumerate(lvis_annotations):
+      self.assertEqual(mask_annotation['segmentation']['counts'],
+                       expected_counts[i])
+      self.assertTrue(np.all(np.equal(mask.decode(
+          mask_annotation['segmentation']), masks[i])))
+      self.assertEqual(mask_annotation['image_id'], 'first_image')
+      self.assertEqual(mask_annotation['category_id'], classes[i])
+      self.assertAlmostEqual(mask_annotation['score'], scores[i])
+
+  def testSingleImageGroundtruthExport(self):
+    masks = np.array(
+        [[[1, 1,], [1, 1]],
+         [[0, 0], [0, 1]],
+         [[0, 0], [0, 0]]], dtype=np.uint8)
+    boxes = np.array([[0, 0, 1, 1],
+                      [0, 0, .5, .5],
+                      [.5, .5, 1, 1]], dtype=np.float32)
+    lvis_boxes = np.array([[0, 0, 1, 1],
+                           [0, 0, .5, .5],
+                           [.5, .5, .5, .5]], dtype=np.float32)
+    classes = np.array([1, 2, 3], dtype=np.int32)
+    next_annotation_id = 1
+    expected_counts = ['04', '31', '4']
+
+    lvis_annotations = lvis_tools.ExportSingleImageGroundtruthToLVIS(
+        image_id='first_image',
+        category_id_set=set([1, 2, 3]),
+        next_annotation_id=next_annotation_id,
+        groundtruth_boxes=boxes,
+        groundtruth_classes=classes,
+        groundtruth_masks=masks)
+    for i, annotation in enumerate(lvis_annotations):
+      self.assertEqual(annotation['segmentation']['counts'],
+                       expected_counts[i])
+      self.assertTrue(np.all(np.equal(mask.decode(
+          annotation['segmentation']), masks[i])))
+      self.assertTrue(np.all(np.isclose(annotation['bbox'], lvis_boxes[i])))
+      self.assertEqual(annotation['image_id'], 'first_image')
+      self.assertEqual(annotation['category_id'], classes[i])
+      self.assertEqual(annotation['id'], i + next_annotation_id)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/packages/tf1/setup.py
+++ b/research/object_detection/packages/tf1/setup.py
@@ -4,8 +4,8 @@ from setuptools import find_packages
 from setuptools import setup

 REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython',
-                     'contextlib2', 'tf-slim', 'six', 'pycocotools', 'scipy',
-                     'pandas']
+                     'contextlib2', 'tf-slim', 'six', 'pycocotools', 'lvis',
+                     'scipy', 'pandas']

 setup(
    name='object_detection',

--- a/research/object_detection/packages/tf2/setup.py
+++ b/research/object_detection/packages/tf2/setup.py
@@ -18,6 +18,7 @@ REQUIRED_PACKAGES = [
    'tf-slim',
    'six',
    'pycocotools',
+    'lvis',
    'scipy',
    'pandas',
    'tf-models-official'