Merged commit includes the following changes: (#8739)

318417714 by jonathanhuang: Internal change. -- 318367213 by sbeery: Pointing users to more documentation for beam -- 318358685 by sbeery: Context R-CNN sample config for GPU -- 318309800 by rathodv: Internal -- 318303364 by ronnyvotel: Adding the option for parsing and including DensePose annotations. http://densepose.org/ -- 318291319 by aom: Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils. -- 318200598 by ronnyvotel: Updating the TF Example Decoder to parse DensePose annotations. -- 318174065 by jonathanhuang: Internal change. -- 318167805 by rathodv: Add use_tpu flag to TF2 binary. -- 318145285 by aom: Adds option for convolutional keras box predictor to force use_bias. -- PiperOrigin-RevId: 318417714

Merged commit includes the following changes: (#8739)
318417714 by jonathanhuang: Internal change. -- 318367213 by sbeery: Pointing users to more documentation for beam -- 318358685 by sbeery: Context R-CNN sample config for GPU -- 318309800 by rathodv: Internal -- 318303364 by ronnyvotel: Adding the option for parsing and including DensePose annotations. http://densepose.org/ -- 318291319 by aom: Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils. -- 318200598 by ronnyvotel: Updating the TF Example Decoder to parse DensePose annotations. -- 318174065 by jonathanhuang: Internal change. -- 318167805 by rathodv: Add use_tpu flag to TF2 binary. -- 318145285 by aom: Adds option for convolutional keras box predictor to force use_bias. -- PiperOrigin-RevId: 318417714
0f0c7745 · vivek rathod · GitHub · 1e4fd825 · 0f0c7745 · 0f0c7745
34 changed file
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -391,7 +391,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    pass

  @abc.abstractmethod
-  def restore_map(self, fine_tune_checkpoint_type='detection'):
+  def restore_map(self,
+                  fine_tune_checkpoint_type='detection',
+                  load_all_detection_checkpoint_vars=False):
    """Returns a map of variables to load from a foreign checkpoint.

    Returns a map of variable names to load from a checkpoint to variables in
@@ -407,6 +409,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.
        Valid values: `detection`, `classification`. Default 'detection'.
+      load_all_detection_checkpoint_vars: whether to load all variables (when
+         `fine_tune_checkpoint_type` is `detection`). If False, only variables
+         within the feature extractor scope are included. Default False.

    Returns:
      A dict mapping variable names (to load from a checkpoint) to variables in
@@ -414,6 +419,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass

+  @abc.abstractmethod
+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of variables to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (above) is intended
+    to be used to restore Slim-based models when running Tensorflow 1.x.
+
+    TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
+    error for both restore_map and restore_from_objects depending on version.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """
+    pass
+
  @abc.abstractmethod
  def updates(self):
    """Returns a list of update operators for this model.

--- a/research/object_detection/core/model_test.py
+++ b/research/object_detection/core/model_test.py
@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self):
    return {}

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def regularization_losses(self):
    return []


--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -66,6 +66,11 @@ class InputDataFields(object):
    groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
    groundtruth_label_weights: groundtruth label weights.
    groundtruth_weights: groundtruth weight factor for bounding boxes.
+    groundtruth_dp_num_points: The number of DensePose sampled points for each
+      instance.
+    groundtruth_dp_part_ids: Part indices for DensePose points.
+    groundtruth_dp_surface_coords: Image locations and UV coordinates for
+      DensePose points.
    num_groundtruth_boxes: number of groundtruth boxes.
    is_annotated: whether an image has been labeled or not.
    true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
  groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
  groundtruth_label_weights = 'groundtruth_label_weights'
  groundtruth_weights = 'groundtruth_weights'
+  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+  groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
  num_groundtruth_boxes = 'num_groundtruth_boxes'
  is_annotated = 'is_annotated'
  true_image_shape = 'true_image_shape'

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils

 # pylint: disable=g-import-not-at-top
 try:
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
               num_additional_channels=0,
               load_multiclass_scores=False,
               load_context_features=False,
-               expand_hierarchy_labels=False):
+               expand_hierarchy_labels=False,
+               load_dense_pose=False):
    """Constructor sets keys_to_features and items_to_handlers.

    Args:
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        account the provided hierarchy in the label_map_proto_file. For positive
        classes, the labels are extended to ancestor. For negative classes,
        the labels are expanded to descendants.
+      load_dense_pose: Whether to load DensePose annotations.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if load_dense_pose:
+      self.keys_to_features['image/object/densepose/num'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/part_index'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/x'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/y'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/u'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/v'] = (
+          tf.VarLenFeature(tf.float32))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_num_points] = (
+              slim_example_decoder.Tensor('image/object/densepose/num'))
+      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+          slim_example_decoder.ItemHandlerCallback(
+              ['image/object/densepose/part_index',
+               'image/object/densepose/num'], self._dense_pose_part_indices))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_surface_coords] = (
+              slim_example_decoder.ItemHandlerCallback(
+                  ['image/object/densepose/x', 'image/object/densepose/y',
+                   'image/object/densepose/u', 'image/object/densepose/v',
+                   'image/object/densepose/num'],
+                  self._dense_pose_surface_coordinates))
+
    if label_map_proto_file:
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)

+    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+          dtype=tf.int32)
+      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+          dtype=tf.int32)
+
    return tensor_dict

  def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
        lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))

+  def _dense_pose_part_indices(self, keys_to_tensors):
+    """Creates a tensor that contains part indices for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 2-D int32 tensor of shape [num_instances, num_points] where each element
+      contains the DensePose part index (0-23). The value `num_points`
+      corresponds to the maximum number of sampled points across all instances
+      in the image. Note that instances with less sampled points will be padded
+      with zeros in the last dimension.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    part_index = keys_to_tensors['image/object/densepose/part_index']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(part_index, tf.SparseTensor):
+      part_index = tf.sparse_tensor_to_dense(part_index)
+    part_index = tf.cast(part_index, dtype=tf.int32)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_parts_tensor(instance_ind):
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      part_inds = part_index[points_range_start:points_range_end]
+      return shape_utils.pad_or_clip_nd(part_inds,
+                                        output_shape=[max_points_per_instance])
+
+    return tf.map_fn(pad_parts_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.int32)
+
+  def _dense_pose_surface_coordinates(self, keys_to_tensors):
+    """Creates a tensor that contains surface coords for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+      point contains (y, x, v, u) data for each sampled DensePose point. The
+      (y, x) coordinate has normalized image locations for the point, and (v, u)
+      contains the surface coordinate (also normalized) for the part. The value
+      `num_points` corresponds to the maximum number of sampled points across
+      all instances in the image. Note that instances with less sampled points
+      will be padded with zeros in dim=1.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    dp_y = keys_to_tensors['image/object/densepose/y']
+    dp_x = keys_to_tensors['image/object/densepose/x']
+    dp_v = keys_to_tensors['image/object/densepose/v']
+    dp_u = keys_to_tensors['image/object/densepose/u']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(dp_y, tf.SparseTensor):
+      dp_y = tf.sparse_tensor_to_dense(dp_y)
+    if isinstance(dp_x, tf.SparseTensor):
+      dp_x = tf.sparse_tensor_to_dense(dp_x)
+    if isinstance(dp_v, tf.SparseTensor):
+      dp_v = tf.sparse_tensor_to_dense(dp_v)
+    if isinstance(dp_u, tf.SparseTensor):
+      dp_u = tf.sparse_tensor_to_dense(dp_u)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_surface_coordinates_tensor(instance_ind):
+      """Pads DensePose surface coordinates for each instance."""
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      y = dp_y[points_range_start:points_range_end]
+      x = dp_x[points_range_start:points_range_end]
+      v = dp_v[points_range_start:points_range_end]
+      u = dp_u[points_range_start:points_range_end]
+      # Create [num_points_i, 4] tensor, where num_points_i is the number of
+      # sampled points for instance i.
+      unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+      return shape_utils.pad_or_clip_nd(
+          unpadded_tensor, output_shape=[max_points_per_instance, 4])
+
+    return tf.map_fn(pad_surface_coordinates_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.float32)
+
  def _expand_image_label_hierarchy(self, image_classes, image_confidences):
    """Expand image level labels according to the hierarchy.


--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+    self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
+                     tensor_dict)

  def testDecodeImageLabels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn_1, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 2]))
@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn_2, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 3]))
@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
        expected_image_confidence,
        tensor_dict[fields.InputDataFields.groundtruth_image_confidences])

+  def testDecodeDensePose(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    bbox_ymins = [0.0, 4.0, 2.0]
+    bbox_xmins = [1.0, 5.0, 8.0]
+    bbox_ymaxs = [2.0, 6.0, 1.0]
+    bbox_xmaxs = [3.0, 7.0, 3.3]
+    densepose_num = [0, 4, 2]
+    densepose_part_index = [2, 2, 3, 4, 2, 9]
+    densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
+    densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+    densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
+
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/object/bbox/ymin':
+                      dataset_util.float_list_feature(bbox_ymins),
+                  'image/object/bbox/xmin':
+                      dataset_util.float_list_feature(bbox_xmins),
+                  'image/object/bbox/ymax':
+                      dataset_util.float_list_feature(bbox_ymaxs),
+                  'image/object/bbox/xmax':
+                      dataset_util.float_list_feature(bbox_xmaxs),
+                  'image/object/densepose/num':
+                      dataset_util.int64_list_feature(densepose_num),
+                  'image/object/densepose/part_index':
+                      dataset_util.int64_list_feature(densepose_part_index),
+                  'image/object/densepose/x':
+                      dataset_util.float_list_feature(densepose_x),
+                  'image/object/densepose/y':
+                      dataset_util.float_list_feature(densepose_y),
+                  'image/object/densepose/u':
+                      dataset_util.float_list_feature(densepose_u),
+                  'image/object/densepose/v':
+                      dataset_util.float_list_feature(densepose_v),
+
+              })).SerializeToString()
+
+      example_decoder = tf_example_decoder.TfExampleDecoder(
+          load_dense_pose=True)
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
+      dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
+      dp_surface_coords = output[
+          fields.InputDataFields.groundtruth_dp_surface_coords]
+      return dp_num_points, dp_part_ids, dp_surface_coords
+
+    dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
+        graph_fn, [])
+
+    expected_dp_num_points = [0, 4, 2]
+    expected_dp_part_ids = [
+        [0, 0, 0, 0],
+        [2, 2, 3, 4],
+        [2, 9, 0, 0]
+    ]
+    expected_dp_surface_coords = np.array(
+        [
+            # Instance 0 (no points).
+            [[0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            # Instance 1 (4 points).
+            [[0.9, 0.1, 0.99, 0.01],
+             [0.8, 0.2, 0.98, 0.02],
+             [0.7, 0.3, 0.97, 0.03],
+             [0.6, 0.4, 0.96, 0.04]],
+            # Instance 2 (2 points).
+            [[0.5, 0.5, 0.95, 0.05],
+             [0.4, 0.6, 0.94, 0.06],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+        ], dtype=np.float32)
+
+    self.assertAllEqual(dp_num_points, expected_dp_num_points)
+    self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
+    self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass


--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -73,6 +73,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass


--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 r"""Convert raw COCO dataset to TFRecord for object_detection.

+This tool supports data generation for object detection (boxes, masks),
+keypoint detection, and DensePose.
+
 Please note that this tool creates sharded output files.

 Example usage:
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
                       'Training annotations JSON file.')
 tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
                       'Validation annotations JSON file.')
+# DensePose is only available for coco 2014.
+tf.flags.DEFINE_string('train_densepose_annotations_file', '',
+                       'Training annotations JSON file for DensePose.')
+tf.flags.DEFINE_string('val_densepose_annotations_file', '',
+                       'Validation annotations JSON file for DensePose.')
 tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+# Whether to only produce images/annotations on person class (for keypoint /
+# densepose task).
+tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
+                        'remove all annotations for non-person objects.')
+tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
+                        'remove all examples that do not contain a person.')

 FLAGS = flags.FLAGS

@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
    b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
 ]

+_COCO_PART_NAMES = [
+    b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+    b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+    b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+    b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+    b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+    b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+    b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+    b'left_face',
+]
+
+_DP_PART_ID_OFFSET = 1
+
+
+def clip_to_unit(x):
+  return min(max(x, 0.0), 1.0)
+

 def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
-                      keypoint_annotations_dict=None):
+                      keypoint_annotations_dict=None,
+                      densepose_annotations_dict=None,
+                      remove_non_person_annotations=False,
+                      remove_non_person_images=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
@@ -108,10 +142,23 @@ def create_tf_example(image,
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
+    densepose_annotations_dict: A dictionary that maps from annotation_id to a
+      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
+      representing part surface coordinates. For more information see
+      http://densepose.org/.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.

  Returns:
+    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+    num_keypoint_annotation_skipped: Number of keypoint annotations that were
+      skipped.
+    num_densepose_annotation_skipped: Number of DensePose annotations that were
+      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
@@ -146,6 +193,16 @@ def create_tf_example(image,
  num_annotations_skipped = 0
  num_keypoint_annotation_used = 0
  num_keypoint_annotation_skipped = 0
+  dp_part_index = []
+  dp_x = []
+  dp_y = []
+  dp_u = []
+  dp_v = []
+  dp_num_points = []
+  densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
+  include_densepose = densepose_annotations_dict is not None
+  num_densepose_annotation_used = 0
+  num_densepose_annotation_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
@@ -154,14 +211,18 @@ def create_tf_example(image,
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
+    category_id = int(object_annotations['category_id'])
+    category_name = category_index[category_id]['name'].encode('utf8')
+    if remove_non_person_annotations and category_name != b'person':
+      num_annotations_skipped += 1
+      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
-    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
-    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    category_names.append(category_name)
    area.append(object_annotations['area'])

    if include_masks:
@@ -197,6 +258,40 @@ def create_tf_example(image,
        keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
        keypoints_name.extend(_COCO_KEYPOINT_NAMES)
        num_keypoints.append(0)
+
+    if include_densepose:
+      annotation_id = object_annotations['id']
+      if (annotation_id in densepose_annotations_dict and
+          all(key in densepose_annotations_dict[annotation_id]
+              for key in densepose_keys)):
+        dp_annotations = densepose_annotations_dict[annotation_id]
+        num_densepose_annotation_used += 1
+        dp_num_points.append(len(dp_annotations['dp_I']))
+        dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
+                              for i in dp_annotations['dp_I']])
+        # DensePose surface coordinates are defined on a [256, 256] grid
+        # relative to each instance box (i.e. absolute coordinates in range
+        # [0., 256.]). The following converts the coordinates
+        # so that they are expressed in normalized image coordinates.
+        dp_x_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
+        dp_x_norm = [(float(x) + x_box_rel * width) / image_width
+                     for x_box_rel in dp_x_box_rel]
+        dp_y_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
+        dp_y_norm = [(float(y) + y_box_rel * height) / image_height
+                     for y_box_rel in dp_y_box_rel]
+        dp_x.extend(dp_x_norm)
+        dp_y.extend(dp_y_norm)
+        dp_u.extend(dp_annotations['dp_U'])
+        dp_v.extend(dp_annotations['dp_V'])
+      else:
+        dp_num_points.append(0)
+
+  if (remove_non_person_images and
+      not any(name == b'person' for name in category_names)):
+    return (key, None, num_annotations_skipped,
+            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
@@ -243,15 +338,34 @@ def create_tf_example(image,
        dataset_util.bytes_list_feature(keypoints_name))
    num_keypoint_annotation_skipped = (
        len(keypoint_annotations_dict) - num_keypoint_annotation_used)
+  if include_densepose:
+    feature_dict['image/object/densepose/num'] = (
+        dataset_util.int64_list_feature(dp_num_points))
+    feature_dict['image/object/densepose/part_index'] = (
+        dataset_util.int64_list_feature(dp_part_index))
+    feature_dict['image/object/densepose/x'] = (
+        dataset_util.float_list_feature(dp_x))
+    feature_dict['image/object/densepose/y'] = (
+        dataset_util.float_list_feature(dp_y))
+    feature_dict['image/object/densepose/u'] = (
+        dataset_util.float_list_feature(dp_u))
+    feature_dict['image/object/densepose/v'] = (
+        dataset_util.float_list_feature(dp_v))
+    num_densepose_annotation_skipped = (
+        len(densepose_annotations_dict) - num_densepose_annotation_used)

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
-  return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
+  return (key, example, num_annotations_skipped,
+          num_keypoint_annotation_skipped, num_densepose_annotation_skipped)


 def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards,
-                                            keypoint_annotations_file=''):
+                                            keypoint_annotations_file='',
+                                            densepose_annotations_file='',
+                                            remove_non_person_annotations=False,
+                                            remove_non_person_images=False):
  """Loads COCO annotation json files and converts to tf.Record format.

  Args:
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
    keypoint_annotations_file: JSON file containing the person keypoint
      annotations. If empty, then no person keypoint annotations will be
      generated.
+    densepose_annotations_file: JSON file containing the DensePose annotations.
+      If empty, then no DensePose annotations will be generated.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
-    logging.info('%d images are missing annotations.', missing_annotation_count)
+    logging.info('%d images are missing annotations.',
+                 missing_annotation_count)

    keypoint_annotations_index = {}
    if keypoint_annotations_file:
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
            keypoint_annotations_index[image_id] = {}
          keypoint_annotations_index[image_id][annotation['id']] = annotation

+    densepose_annotations_index = {}
+    if densepose_annotations_file:
+      with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
+        densepose_groundtruth_data = json.load(fid)
+      if 'annotations' in densepose_groundtruth_data:
+        for annotation in densepose_groundtruth_data['annotations']:
+          image_id = annotation['image_id']
+          if image_id not in densepose_annotations_index:
+            densepose_annotations_index[image_id] = {}
+          densepose_annotations_index[image_id][annotation['id']] = annotation
+
    total_num_annotations_skipped = 0
    total_num_keypoint_annotations_skipped = 0
+    total_num_densepose_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(images))
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
        keypoint_annotations_dict = {}
        if image['id'] in keypoint_annotations_index:
          keypoint_annotations_dict = keypoint_annotations_index[image['id']]
-      (_, tf_example, num_annotations_skipped,
-       num_keypoint_annotations_skipped) = create_tf_example(
+      densepose_annotations_dict = None
+      if densepose_annotations_file:
+        densepose_annotations_dict = {}
+        if image['id'] in densepose_annotations_index:
+          densepose_annotations_dict = densepose_annotations_index[image['id']]
+      (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
+       num_densepose_annotations_skipped) = create_tf_example(
           image, annotations_list, image_dir, category_index, include_masks,
-           keypoint_annotations_dict)
+           keypoint_annotations_dict, densepose_annotations_dict,
+           remove_non_person_annotations, remove_non_person_images)
      total_num_annotations_skipped += num_annotations_skipped
      total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
+      total_num_densepose_annotations_skipped += (
+          num_densepose_annotations_skipped)
      shard_idx = idx % num_shards
-      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+      if tf_example:
+        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    logging.info('Finished writing, skipped %d annotations.',
                 total_num_annotations_skipped)
    if keypoint_annotations_file:
      logging.info('Finished writing, skipped %d keypoint annotations.',
                   total_num_keypoint_annotations_skipped)
+    if densepose_annotations_file:
+      logging.info('Finished writing, skipped %d DensePose annotations.',
+                   total_num_densepose_annotations_skipped)


 def main(_):
@@ -347,20 +492,26 @@ def main(_):
      train_output_path,
      FLAGS.include_masks,
      num_shards=100,
-      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
+      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.train_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.val_annotations_file,
      FLAGS.val_image_dir,
      val_output_path,
      FLAGS.include_masks,
-      num_shards=100,
-      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
+      num_shards=50,
+      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.val_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.testdev_annotations_file,
      FLAGS.test_image_dir,
      testdev_output_path,
      FLAGS.include_masks,
-      num_shards=100)
+      num_shards=50)


 if __name__ == '__main__':

--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index)

    self.assertEqual(num_annotations_skipped, 0)
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index, include_masks=True)

    self.assertEqual(num_annotations_skipped, 0)
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        }
    }

-    (_, example, _,
-     num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
-         image,
-         annotations_list,
-         image_dir,
-         category_index,
-         include_masks=False,
-         keypoint_annotations_dict=keypoint_annotations_dict)
+    _, example, _, num_keypoint_annotation_skipped, _ = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            keypoint_annotations_dict=keypoint_annotations_dict))

    self.assertEqual(num_keypoint_annotation_skipped, 0)
    self._assertProtoEqual(
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        example.features.feature[
            'image/object/keypoint/visibility'].int64_list.value, vv)

+  def test_create_tf_example_with_dense_pose(self):
+    image_dir = self.get_temp_dir()
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
+        np.uint8)
+    save_path = os.path.join(image_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+
+    image = {
+        'file_name': image_file_name,
+        'height': 256,
+        'width': 256,
+        'id': 11,
+    }
+
+    min_x, min_y = 64, 64
+    max_x, max_y = 128, 128
+    keypoints = []
+    num_visible_keypoints = 0
+    xv = []
+    yv = []
+    vv = []
+    for _ in range(17):
+      xc = min_x + int(np.random.rand()*(max_x - min_x))
+      yc = min_y + int(np.random.rand()*(max_y - min_y))
+      vis = np.random.randint(0, 3)
+      xv.append(xc)
+      yv.append(yc)
+      vv.append(vis)
+      keypoints.extend([xc, yc, vis])
+      num_visible_keypoints += (vis > 0)
+
+    annotations_list = [{
+        'area': 0.5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [64, 64, 128, 128],
+        'category_id': 1,
+        'id': 1000
+    }]
+
+    num_points = 45
+    dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
+    dp_u = np.random.randn(num_points)
+    dp_v = np.random.randn(num_points)
+    dp_x = np.random.rand(num_points)*256.
+    dp_y = np.random.rand(num_points)*256.
+    densepose_annotations_dict = {
+        1000: {
+            'dp_I': dp_i,
+            'dp_U': dp_u,
+            'dp_V': dp_v,
+            'dp_x': dp_x,
+            'dp_y': dp_y,
+            'bbox': [64, 64, 128, 128],
+        }
+    }
+
+    category_index = {
+        1: {
+            'name': 'person',
+            'id': 1
+        }
+    }
+
+    _, example, _, _, num_densepose_annotation_skipped = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            densepose_annotations_dict=densepose_annotations_dict))
+
+    self.assertEqual(num_densepose_annotation_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [six.b(image_file_name)])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [six.b(str(image['id']))])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value,
+        [six.b('jpeg')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        [six.b('person')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/densepose/num'].int64_list.value,
+        [num_points])
+    self.assertAllEqual(
+        example.features.feature[
+            'image/object/densepose/part_index'].int64_list.value,
+        dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/u'].float_list.value,
+        dp_u)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/v'].float_list.value,
+        dp_v)
+    expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
+    expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/x'].float_list.value,
+        expected_dp_x)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/y'].float_list.value,
+        expected_dp_y)
+
  def test_create_sharded_tf_record(self):
    tmp_dir = self.get_temp_dir()
    image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']

--- a/research/object_detection/export_tflite_ssd_graph_lib_tf1_test.py
+++ b/research/object_detection/export_tflite_ssd_graph_lib_tf1_test.py
@@ -74,6 +74,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, from_detection_checkpoint):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass


--- a/research/object_detection/exporter_lib_tf2_test.py
+++ b/research/object_detection/exporter_lib_tf2_test.py
@@ -76,6 +76,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass


--- a/research/object_detection/exporter_tf1_test.py
+++ b/research/object_detection/exporter_tf1_test.py
@@ -105,6 +105,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass


--- a/research/object_detection/g3doc/context_rcnn.md
+++ b/research/object_detection/g3doc/context_rcnn.md
@@ -30,9 +30,12 @@ pip install apache-beam
 ```

 and can be run locally, or on a cluster for efficient processing of large
-amounts of data. See the
+amounts of data. Note that generate_detection_data.py and
+generate_embedding_data.py both involve running inference, and may be very slow
+to run locally. See the
 [Apache Beam documentation](https://beam.apache.org/documentation/runners/dataflow/)
-for more information.
+for more information, and Google Cloud Documentation for a tutorial on
+[running Beam jobs on DataFlow](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python).

 ### Generating TfRecords from a set of images and a COCO-CameraTraps style JSON

@@ -191,3 +194,6 @@ python export_inference_graph.py \
    --side_input_types float,int

 ```
+
+If you have questions about Context R-CNN, please contact
+[Sara Beery](https://beerys.github.io/).
--- a/research/object_detection/legacy/trainer_tf1_test.py
+++ b/research/object_detection/legacy/trainer_tf1_test.py
@@ -185,6 +185,9 @@ class FakeDetectionModel(model.DetectionModel):
    """
    return {var.op.name: var for var in tf.global_variables()}

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def updates(self):
    """Returns a list of update operators for this model.


--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -2330,8 +2330,39 @@ class CenterNetMetaArch(model.DetectionModel):
  def regularization_losses(self):
    return []

-  def restore_map(self, fine_tune_checkpoint_type='classification',
+  def restore_map(self,
+                  fine_tune_checkpoint_type='detection',
                  load_all_detection_checkpoint_vars=False):
+    raise RuntimeError('CenterNetMetaArch not supported under TF1.x.')
+
+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (not implemented
+    in CenterNet) is intended to be used to restore Slim-based models when
+    running Tensorflow 1.x.
+
+    TODO(jonathanhuang): Make this function consistent with other
+    meta-architectures.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """

    if fine_tune_checkpoint_type == 'classification':
      return {'feature_extractor': self._feature_extractor.get_base_model()}
@@ -2340,7 +2371,7 @@ class CenterNetMetaArch(model.DetectionModel):
      return {'feature_extractor': self._feature_extractor.get_model()}

    else:
-      raise ValueError('Unknown fine tune checkpoint type - {}'.format(
+      raise ValueError('Not supported  fine tune checkpoint type - {}'.format(
          fine_tune_checkpoint_type))

  def updates(self):

--- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
@@ -1574,8 +1574,9 @@ class CenterNetMetaArchRestoreTest(test_case.TestCase):
    """Test restore map for a resnet backbone."""

    model = build_center_net_meta_arch(build_resnet=True)
-    restore_map = model.restore_map('classification')
-    self.assertIsInstance(restore_map['feature_extractor'], tf.keras.Model)
+    restore_from_objects_map = model.restore_from_objects('classification')
+    self.assertIsInstance(restore_from_objects_map['feature_extractor'],
+                          tf.keras.Model)


 class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
@@ -1601,9 +1602,6 @@ class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
  def postprocess(self):
    pass

-  def restore_map(self):
-    pass
-
  def call(self, inputs):
    batch_size, input_height, input_width, _ = inputs.shape
    fake_output = tf.ones([

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -261,31 +261,6 @@ class FasterRCNNKerasFeatureExtractor(object):
    """Get model that extracts second stage box classifier features."""
    pass

-  def restore_from_classification_checkpoint_fn(
-      self,
-      first_stage_feature_extractor_scope,
-      second_stage_feature_extractor_scope):
-    """Returns a map of variables to load from a foreign checkpoint.
-
-    Args:
-      first_stage_feature_extractor_scope: A scope name for the first stage
-        feature extractor.
-      second_stage_feature_extractor_scope: A scope name for the second stage
-        feature extractor.
-
-    Returns:
-      A dict mapping variable names (to load from a checkpoint) to variables in
-      the model graph.
-    """
-    variables_to_restore = {}
-    for variable in variables_helper.get_global_variables_safely():
-      for scope_name in [first_stage_feature_extractor_scope,
-                         second_stage_feature_extractor_scope]:
-        if variable.op.name.startswith(scope_name):
-          var_name = variable.op.name.replace(scope_name + '/', '')
-          variables_to_restore[var_name] = variable
-    return variables_to_restore
-

 class FasterRCNNMetaArch(model.DetectionModel):
  """Faster R-CNN Meta-architecture definition."""
@@ -2801,6 +2776,43 @@ class FasterRCNNMetaArch(model.DetectionModel):
        variables_to_restore, include_patterns=include_patterns)
    return {var.op.name: var for var in feature_extractor_variables}

+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (above) is intended
+    to be used to restore Slim-based models when running Tensorflow 1.x.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """
+    if fine_tune_checkpoint_type == 'classification':
+      return {'feature_extractor': self.classification_backbone}
+    elif fine_tune_checkpoint_type == 'detection':
+      fake_model = tf.train.Checkpoint(
+          _feature_extractor_for_box_classifier_features=
+          self._feature_extractor_for_box_classifier_features,
+          _feature_extractor_for_proposal_features=
+          self._feature_extractor_for_proposal_features)
+      return {'model': fake_model}
+    else:
+      raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
+          fine_tune_checkpoint_type))
+
  def updates(self):
    """Returns a list of update operators for this model.


--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -250,35 +250,6 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
  def call(self, inputs, **kwargs):
    return self._extract_features(inputs)

-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map of variables to load from a foreign checkpoint.
-
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor.
-
-    Returns:
-      A dict mapping variable names (to load from a checkpoint) to variables in
-      the model graph.
-    """
-    variables_to_restore = {}
-    if tf.executing_eagerly():
-      for variable in self.variables:
-        # variable.name includes ":0" at the end, but the names in the
-        # checkpoint do not have the suffix ":0". So, we strip it here.
-        var_name = variable.name[:-2]
-        if var_name.startswith(feature_extractor_scope + '/'):
-          var_name = var_name.replace(feature_extractor_scope + '/', '')
-        variables_to_restore[var_name] = variable
-    else:
-      # b/137854499: use global_variables.
-      for variable in variables_helper.get_global_variables_safely():
-        var_name = variable.op.name
-        if var_name.startswith(feature_extractor_scope + '/'):
-          var_name = var_name.replace(feature_extractor_scope + '/', '')
-          variables_to_restore[var_name] = variable
-
-    return variables_to_restore
-

 class SSDMetaArch(model.DetectionModel):
  """SSD Meta-architecture definition."""
@@ -1295,8 +1266,8 @@ class SSDMetaArch(model.DetectionModel):
        classification checkpoint for initialization prior to training.
        Valid values: `detection`, `classification`. Default 'detection'.
      load_all_detection_checkpoint_vars: whether to load all variables (when
-         `fine_tune_checkpoint_type='detection'`). If False, only variables
-         within the appropriate scopes are included. Default False.
+         `fine_tune_checkpoint_type` is `detection`). If False, only variables
+         within the feature extractor scope are included. Default False.

    Returns:
      A dict mapping variable names (to load from a checkpoint) to variables in
@@ -1311,36 +1282,53 @@ class SSDMetaArch(model.DetectionModel):

    elif fine_tune_checkpoint_type == 'detection':
      variables_to_restore = {}
-      if tf.executing_eagerly():
+      for variable in variables_helper.get_global_variables_safely():
+        var_name = variable.op.name
        if load_all_detection_checkpoint_vars:
-          # Grab all detection vars by name
-          for variable in self.variables:
-            # variable.name includes ":0" at the end, but the names in the
-            # checkpoint do not have the suffix ":0". So, we strip it here.
-            var_name = variable.name[:-2]
-            variables_to_restore[var_name] = variable
+          variables_to_restore[var_name] = variable
        else:
-          # Grab just the feature extractor vars by name
-          for variable in self._feature_extractor.variables:
-            # variable.name includes ":0" at the end, but the names in the
-            # checkpoint do not have the suffix ":0". So, we strip it here.
-            var_name = variable.name[:-2]
-            variables_to_restore[var_name] = variable
-      else:
-        for variable in variables_helper.get_global_variables_safely():
-          var_name = variable.op.name
-          if load_all_detection_checkpoint_vars:
+          if var_name.startswith(self._extract_features_scope):
            variables_to_restore[var_name] = variable
-          else:
-            if var_name.startswith(self._extract_features_scope):
-              variables_to_restore[var_name] = variable
-
      return variables_to_restore

    else:
      raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
          fine_tune_checkpoint_type))

+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (above) is intended
+    to be used to restore Slim-based models when running Tensorflow 1.x.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """
+    if fine_tune_checkpoint_type == 'classification':
+      return {'feature_extractor': self.classification_backbone}
+    elif fine_tune_checkpoint_type == 'detection':
+      fake_model = tf.train.Checkpoint(
+          _feature_extractor=self._feature_extractor)
+      return {'model': fake_model}
+    else:
+      raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
+          fine_tune_checkpoint_type))
+
  def updates(self):
    """Returns a list of update operators for this model.


--- a/research/object_detection/model_lib_tf2_test.py
+++ b/research/object_detection/model_lib_tf2_test.py
@@ -123,6 +123,9 @@ class SimpleModel(model.DetectionModel):
    return []

  def restore_map(self, *args, **kwargs):
+    pass
+
+  def restore_from_objects(self, fine_tune_checkpoint_type):
    return {'model': self}

  def preprocess(self, _):
@@ -174,7 +177,7 @@ class ModelCheckpointTest(tf.test.TestCase):

 class IncompatibleModel(SimpleModel):

-  def restore_map(self, *args, **kwargs):
+  def restore_from_objects(self, *args, **kwargs):
    return {'weight': self.weight}


@@ -207,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
    model_lib_v2.load_fine_tune_checkpoint(
        self._model, self._ckpt_path, checkpoint_type='',
        checkpoint_version=train_pb2.CheckpointVersion.V2,
-        load_all_detection_checkpoint_vars=True,
        input_dataset=self._train_input_fn(),
        unpad_groundtruth_tensors=True)
    np.testing.assert_allclose(self._model.weight.numpy(), 42)
@@ -220,7 +222,6 @@ class CheckpointV2Test(tf.test.TestCase):
      model_lib_v2.load_fine_tune_checkpoint(
          IncompatibleModel(), self._ckpt_path, checkpoint_type='',
          checkpoint_version=train_pb2.CheckpointVersion.V2,
-          load_all_detection_checkpoint_vars=True,
          input_dataset=self._train_input_fn(),
          unpad_groundtruth_tensors=True)


--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util
 from object_detection.utils import ops
-from object_detection.utils import variables_helper
 from object_detection.utils import visualization_utils as vutils

 # pylint: disable=g-import-not-at-top
@@ -47,13 +46,6 @@ except ImportError:

 MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP

-### NOTE: This file is a wip.
-### TODO(kaftan): Explore adding unit tests for individual methods
-### TODO(kaftan): Add unit test that checks training on a single image w/
-#### groundtruth, and verfiy that loss goes to zero.
-#### Possibly have version that takes it as the whole train & eval dataset,
-#### & verify the loss output from the eval_loop method.
-### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro

 RESTORE_MAP_ERROR_TEMPLATE = (
    'Since we are restoring a v2 style checkpoint'
@@ -277,14 +269,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
  """

  for key, value in checkpoint_restore_map.items():
-    if not (isinstance(key, str) and isinstance(value, tf.Module)):
+    if not (isinstance(key, str) and
+            (isinstance(value, tf.Module)
+             or isinstance(value, tf.train.Checkpoint))):
      raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format(
          key.__class__.__name__, value.__class__.__name__))


+def is_object_based_checkpoint(checkpoint_path):
+  """Returns true if `checkpoint_path` points to an object-based checkpoint."""
+  var_names = [var[0] for var in tf.train.list_variables(checkpoint_path)]
+  return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names
+
+
 def load_fine_tune_checkpoint(
-    model, checkpoint_path, checkpoint_type, checkpoint_version,
-    load_all_detection_checkpoint_vars, input_dataset,
+    model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset,
    unpad_groundtruth_tensors):
  """Load a fine tuning classification or detection checkpoint.

@@ -292,8 +291,7 @@ def load_fine_tune_checkpoint(
  the model by computing a dummy loss. (Models might not have built their
  variables before their first execution)

-  It then loads a variable-name based classification or detection checkpoint
-  that comes from converted TF 1.x slim model checkpoints.
+  It then loads an object-based classification or detection checkpoint.

  This method updates the model in-place and does not return a value.

@@ -306,14 +304,22 @@ def load_fine_tune_checkpoint(
      classification checkpoint for initialization prior to training.
      Valid values: `detection`, `classification`.
    checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
-      whether to load checkpoints in V1 style or V2 style.
-    load_all_detection_checkpoint_vars: whether to load all variables (when
-      `fine_tune_checkpoint_type` is `detection`). If False, only variables
-      within the feature extractor scopes are included. Default False.
+      whether to load checkpoints in V1 style or V2 style.  In this binary
+      we only support V2 style (object-based) checkpoints.
    input_dataset: The tf.data Dataset the model is being trained on. Needed
      to get the shapes for the dummy loss computation.
    unpad_groundtruth_tensors: A parameter passed to unstack_batch.
+
+  Raises:
+    IOError: if `checkpoint_path` does not point at a valid object-based
+      checkpoint
+    ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
  """
+  if not is_object_based_checkpoint(checkpoint_path):
+    raise IOError('Checkpoint is expected to be an object-based checkpoint.')
+  if checkpoint_version == train_pb2.CheckpointVersion.V1:
+    raise ValueError('Checkpoint version should be V2')
+
  features, labels = iter(input_dataset).next()

  @tf.function
@@ -336,26 +342,11 @@ def load_fine_tune_checkpoint(
          labels,
      ))

-  if checkpoint_version == train_pb2.CheckpointVersion.V1:
-    var_map = model.restore_map(
-        fine_tune_checkpoint_type=checkpoint_type,
-        load_all_detection_checkpoint_vars=(
-            load_all_detection_checkpoint_vars))
-    available_var_map = variables_helper.get_variables_available_in_checkpoint(
-        var_map,
-        checkpoint_path,
-        include_global_step=False)
-    tf.train.init_from_checkpoint(checkpoint_path,
-                                  available_var_map)
-  elif checkpoint_version == train_pb2.CheckpointVersion.V2:
-    restore_map = model.restore_map(
-        fine_tune_checkpoint_type=checkpoint_type,
-        load_all_detection_checkpoint_vars=(
-            load_all_detection_checkpoint_vars))
-    validate_tf_v2_checkpoint_restore_map(restore_map)
-
-    ckpt = tf.train.Checkpoint(**restore_map)
-    ckpt.restore(checkpoint_path).assert_existing_objects_matched()
+  restore_from_objects_dict = model.restore_from_objects(
+      fine_tune_checkpoint_type=checkpoint_type)
+  validate_tf_v2_checkpoint_restore_map(restore_from_objects_dict)
+  ckpt = tf.train.Checkpoint(**restore_from_objects_dict)
+  ckpt.restore(checkpoint_path).assert_existing_objects_matched()


 def get_filepath(strategy, filepath):
@@ -464,8 +455,10 @@ def train_loop(
  if kwargs['use_bfloat16']:
    tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')

-  load_all_detection_checkpoint_vars = (
-      train_config.load_all_detection_checkpoint_vars)
+  if train_config.load_all_detection_checkpoint_vars:
+    raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
+                     'unsupported in TF2')
+
  config_util.update_fine_tune_checkpoint_type(train_config)
  fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
  fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version
@@ -533,7 +526,6 @@ def train_loop(
                                    train_config.fine_tune_checkpoint,
                                    fine_tune_checkpoint_type,
                                    fine_tune_checkpoint_version,
-                                    load_all_detection_checkpoint_vars,
                                    train_input,
                                    unpad_groundtruth_tensors)

@@ -807,8 +799,10 @@ def eager_eval_loop(
    eval_metrics[loss_key] = loss_metrics[loss_key].result()

  eval_metrics = {str(k): v for k, v in eval_metrics.items()}
+  tf.logging.info('Eval metrics at step %d', global_step)
  for k in eval_metrics:
    tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
+    tf.logging.info('\t+ %s: %f', k, eval_metrics[k])

  return eval_metrics


--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
@@ -16,14 +16,6 @@

 r"""Creates and runs TF2 object detection models.

-##################################
-NOTE: This module has not been fully tested; please bear with us while we iron
-out the kinks.
-##################################
-
-When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
-GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
-
 For local training/evaluation run:
 PIPELINE_CONFIG_PATH=path/to/pipeline.config
 MODEL_DIR=/tmp/model_outputs
@@ -60,6 +52,8 @@ flags.DEFINE_string(

 flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
                     'evaluation checkpoint before exiting.')
+
+flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
 flags.DEFINE_integer(
    'num_workers', 1, 'When num_workers > 1, training uses '
    'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
@@ -84,7 +78,7 @@ def main(unused_argv):
        checkpoint_dir=FLAGS.checkpoint_dir,
        wait_interval=300, timeout=FLAGS.eval_timeout)
  else:
-    if tf.config.get_visible_devices('TPU'):
+    if FLAGS.use_tpu:
      resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
      tf.config.experimental_connect_to_cluster(resolver)
      tf.tpu.experimental.initialize_tpu_system(resolver)

--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
@@ -73,7 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
    proposal_classifier_features = (
        model(proposal_feature_maps))
    features_shape = tf.shape(proposal_classifier_features)
-    self.assertAllEqual(features_shape.numpy(), [2, 8, 8, 1536])
+    self.assertAllEqual(features_shape.numpy(), [2, 9, 9, 1536])


 if __name__ == '__main__':

--- a/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
@@ -175,23 +175,6 @@ class FasterRCNNResnetKerasFeatureExtractor(
          self._variable_dict[variable.name[:-2]] = variable
        return keras_model

-  def restore_from_classification_checkpoint_fn(
-      self,
-      first_stage_feature_extractor_scope,
-      second_stage_feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-
-    Args:
-      first_stage_feature_extractor_scope: A scope name for the first stage
-        feature extractor (unused).
-      second_stage_feature_extractor_scope: A scope name for the second stage
-        feature extractor (unused).
-
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
-

 class FasterRCNNResnet50KerasFeatureExtractor(
    FasterRCNNResnetKerasFeatureExtractor):

--- a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
@@ -163,14 +163,3 @@ class SSDMobileNetV1KerasFeatureExtractor(
        'Conv2d_13_pointwise': image_features[1]})

    return list(feature_maps.values())
-
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
@@ -241,14 +241,3 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
        last_feature_map = layer(last_feature_map)
      feature_maps.append(last_feature_map)
    return feature_maps
-
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
@@ -166,14 +166,3 @@ class SSDMobileNetV2KerasFeatureExtractor(
        'layer_19': image_features[1]})

    return list(feature_maps.values())
-
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor(
      feature_maps.append(last_feature_map)
    return feature_maps

-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
-

 class SSDResNet50V1FpnKerasFeatureExtractor(
    SSDResNetV1FpnKerasFeatureExtractor):

--- a/research/object_detection/predictors/convolutional_keras_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py
@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
      self, inserted_layer_counter, target_channel):
    projection_layers = []
    if inserted_layer_counter >= 0:
-      use_bias = False if self._apply_batch_norm else True
+      use_bias = False if (self._apply_batch_norm and not
+                           self._conv_hyperparams.force_use_bias()) else True
      projection_layers.append(keras.Conv2D(
          target_channel, [1, 1], strides=1, padding='SAME',
          name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
    conv_layers = []
    batch_norm_layers = []
    activation_layers = []
-    use_bias = False if self._apply_batch_norm else True
+    use_bias = False if (self._apply_batch_norm and not
+                         self._conv_hyperparams.force_use_bias()) else True
    for additional_conv_layer_idx in range(self._num_layers_before_predictor):
      layer_name = '{}/conv2d_{}'.format(
          tower_name_scope, additional_conv_layer_idx)
@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
            training=(self._is_training and not self._freeze_batchnorm),
            name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
                tower_name_scope, additional_conv_layer_idx, feature_index)))
-      activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6))
+      activation_layers.append(self._conv_hyperparams.build_activation_layer(
+          name='{}/conv2d_{}/activation_{}'.format(
+              tower_name_scope, additional_conv_layer_idx, feature_index)))

    # Set conv layers as the shared conv layers for different feature maps with
    # the same tower_name_scope.

--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -31,7 +31,7 @@ enum InputType {
  TF_SEQUENCE_EXAMPLE = 2;        // TfSequenceExample Input
 }

-// Next id: 31
+// Next id: 32
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
@@ -119,6 +119,10 @@ message InputReader {
  // Type of instance mask.
  optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];

+  // Whether to load DensePose data. If set, must also set load_instance_masks
+  // to true.
+  optional bool load_dense_pose = 31 [default = false];
+
  // Whether to use the display name when decoding examples. This is only used
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];

--- a/research/object_detection/protos/train.proto
+++ b/research/object_detection/protos/train.proto
@@ -59,7 +59,8 @@ message TrainConfig {

  // Whether to load all checkpoint vars that match model variable names and
  // sizes. This option is only available if `from_detection_checkpoint` is
-  // True.
+  // True.  This option is *not* supported for TF2 --- setting it to true
+  // will raise an error.
  optional bool load_all_detection_checkpoint_vars = 19 [default = false];

  // Number of steps to train the DetectionModel for. If 0, will train the model

--- a/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
+++ b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
+# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
+# example input data with context_features.
+# This model uses attention into contextual features within the Faster R-CNN
+# object detection framework to improve object detection performance.
+# See https://arxiv.org/abs/1912.03538 for more information.
+# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
+# configured.
+
+model {
+  faster_rcnn {
+    num_classes: 48
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    feature_extractor {
+      type: "faster_rcnn_resnet101"
+      first_stage_features_stride: 16
+      batch_norm_trainable: true
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        height_stride: 16
+        width_stride: 16
+        scales: 0.25
+        scales: 0.5
+        scales: 1.0
+        scales: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+      }
+    }
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.00999999977648
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.699999988079
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        share_box_across_classes: true
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.600000023842
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+    use_matmul_crop_and_resize: true
+    clip_anchors_to_image: true
+    use_matmul_gather_in_matcher: true
+    use_static_balanced_label_sampler: true
+    use_static_shapes: true
+    context_config {
+      max_num_context_features: 2000
+      context_feature_length: 2057
+    }
+  }
+}
+train_config {
+  batch_size: 8
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  sync_replicas: true
+  optimizer {
+    momentum_optimizer {
+      learning_rate {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0
+          schedule {
+            step: 400000
+            learning_rate: 0.002
+          }
+          schedule {
+            step: 500000
+            learning_rate: 0.0002
+          }
+          schedule {
+            step: 600000
+            learning_rate: 0.00002
+          }
+          warmup: true
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
+  from_detection_checkpoint: true
+  num_steps: 5000000
+  replicas_to_aggregate: 8
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+  use_bfloat16: true
+}
+train_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
+  }
+  load_context_features: true
+  input_type: TF_SEQUENCE_EXAMPLE
+}
+eval_config {
+  max_evals: 50
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1
+}
+eval_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
+  }
+  load_context_features: true
+  input_type: TF_SEQUENCE_EXAMPLE
+}
--- a/research/object_detection/utils/bifpn_utils.py
+++ b/research/object_detection/utils/bifpn_utils.py
@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils

 def create_conv_block(name, num_filters, kernel_size, strides, padding,
                      use_separable, apply_batchnorm, apply_activation,
-                      conv_hyperparams, is_training, freeze_batchnorm):
+                      conv_hyperparams, is_training, freeze_batchnorm,
+                      conv_bn_act_pattern=True):
  """Create Keras layers for regular or separable convolutions.

  Args:
@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
      training or not. When training with a small batch size (e.g. 1), it is
      desirable to freeze batch norm update and use pretrained batch norm
      params.
+    conv_bn_act_pattern: Bool. By default, when True, the layers returned by
+      this function are in the order [conv, batchnorm, activation]. Otherwise,
+      when False, the order of the layers is [activation, conv, batchnorm].

  Returns:
    A list of keras layers, including (regular or seperable) convolution, and
@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
            depth_multiplier=1,
            padding=padding,
            strides=strides,
-            name=name + '_separable_conv',
+            name=name + 'separable_conv',
            **kwargs))
  else:
    layers.append(
@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
            kernel_size=kernel_size,
            padding=padding,
            strides=strides,
-            name=name + '_conv',
+            name=name + 'conv',
            **conv_hyperparams.params()))

  if apply_batchnorm:
    layers.append(
        conv_hyperparams.build_batch_norm(
            training=(is_training and not freeze_batchnorm),
-            name=name + '_batchnorm'))
+            name=name + 'batchnorm'))

  if apply_activation:
-    layers.append(
-        conv_hyperparams.build_activation_layer(name=name + '_activation'))
+    activation_layer = conv_hyperparams.build_activation_layer(
+        name=name + 'activation')
+    if conv_bn_act_pattern:
+      layers.append(activation_layer)
+    else:
+      layers = [activation_layer] + layers

  return layers

@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
            pool_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_max_x{}'.format(stride)))
+            name=name + 'downsample_max_x{}'.format(stride)))
  elif downsample_method == 'avg_pooling':
    layers.append(
        tf.keras.layers.AveragePooling2D(
            pool_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_avg_x{}'.format(stride)))
+            name=name + 'downsample_avg_x{}'.format(stride)))
  elif downsample_method == 'depthwise_conv':
    layers.append(
        tf.keras.layers.DepthwiseConv2D(
            kernel_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_depthwise_x{}'.format(stride)))
+            name=name + 'downsample_depthwise_x{}'.format(stride)))
    layers.append(
        conv_hyperparams.build_batch_norm(
            training=(is_training and not freeze_batchnorm),
-            name=name + '_downsample_batchnorm'))
+            name=name + 'downsample_batchnorm'))
    layers.append(
        conv_hyperparams.build_activation_layer(name=name +
-                                                '_downsample_activation'))
+                                                'downsample_activation'))
  else:
    raise ValueError('Unknown downsample method: {}'.format(downsample_method))


--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path,
  """Clears fine_tune_checkpoint and writes a new pipeline config file."""
  configs = get_configs_from_pipeline_file(pipeline_config_path)
  configs["train_config"].fine_tune_checkpoint = ""
+  configs["train_config"].load_all_detection_checkpoint_vars = False
  pipeline_proto = create_pipeline_proto_from_configs(configs)
  with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
    f.write(text_format.MessageToString(pipeline_proto))