未验证 提交 0f0c7745 编写于 作者: V vivek rathod 提交者: GitHub

Merged commit includes the following changes: (#8739)

318417714  by jonathanhuang:

    Internal change.

--
318367213  by sbeery:

    Pointing users to more documentation for beam

--
318358685  by sbeery:

    Context R-CNN sample config for GPU

--
318309800  by rathodv:

    Internal

--
318303364  by ronnyvotel:

    Adding the option for parsing and including DensePose annotations. http://densepose.org/

--
318291319  by aom:

    Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils.

--
318200598  by ronnyvotel:

    Updating the TF Example Decoder to parse DensePose annotations.

--
318174065  by jonathanhuang:

    Internal change.

--
318167805  by rathodv:

    Add use_tpu flag to TF2 binary.

--
318145285  by aom:

    Adds option for convolutional keras box predictor to force use_bias.

--

PiperOrigin-RevId: 318417714
上级 1e4fd825
......@@ -391,7 +391,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
pass
@abc.abstractmethod
def restore_map(self, fine_tune_checkpoint_type='detection'):
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a map of variable names to load from a checkpoint to variables in
......@@ -407,6 +409,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
......@@ -414,6 +419,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
@abc.abstractmethod
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
error for both restore_map and restore_from_objects depending on version.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
pass
@abc.abstractmethod
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self):
return {}
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def regularization_losses(self):
return []
......
......@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
groundtruth_dp_part_ids: Part indices for DensePose points.
groundtruth_dp_surface_coords: Image locations and UV coordinates for
DensePose points.
num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized
......@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights'
groundtruth_dp_num_points = 'groundtruth_dp_num_points'
groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape'
......
......@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util
from object_detection.utils import shape_utils
# pylint: disable=g-import-not-at-top
try:
......@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels=0,
load_multiclass_scores=False,
load_context_features=False,
expand_hierarchy_labels=False):
expand_hierarchy_labels=False,
load_dense_pose=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
......@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
load_dense_pose: Whether to load DensePose annotations.
Raises:
ValueError: If `instance_mask_type` option is not one of
......@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self._decode_png_instance_masks))
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
if load_dense_pose:
self.keys_to_features['image/object/densepose/num'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/part_index'] = (
tf.VarLenFeature(tf.int64))
self.keys_to_features['image/object/densepose/x'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/y'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/u'] = (
tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/densepose/v'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_num_points] = (
slim_example_decoder.Tensor('image/object/densepose/num'))
self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/part_index',
'image/object/densepose/num'], self._dense_pose_part_indices))
self.items_to_handlers[
fields.InputDataFields.groundtruth_dp_surface_coords] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/densepose/x', 'image/object/densepose/y',
'image/object/densepose/u', 'image/object/densepose/v',
'image/object/densepose/num'],
self._dense_pose_surface_coordinates))
if label_map_proto_file:
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
......@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
dtype=tf.int32)
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
dtype=tf.int32)
return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
......@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _dense_pose_part_indices(self, keys_to_tensors):
"""Creates a tensor that contains part indices for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D int32 tensor of shape [num_instances, num_points] where each element
contains the DensePose part index (0-23). The value `num_points`
corresponds to the maximum number of sampled points across all instances
in the image. Note that instances with less sampled points will be padded
with zeros in the last dimension.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
part_index = keys_to_tensors['image/object/densepose/part_index']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(part_index, tf.SparseTensor):
part_index = tf.sparse_tensor_to_dense(part_index)
part_index = tf.cast(part_index, dtype=tf.int32)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_parts_tensor(instance_ind):
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
part_inds = part_index[points_range_start:points_range_end]
return shape_utils.pad_or_clip_nd(part_inds,
output_shape=[max_points_per_instance])
return tf.map_fn(pad_parts_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.int32)
def _dense_pose_surface_coordinates(self, keys_to_tensors):
"""Creates a tensor that contains surface coords for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
point contains (y, x, v, u) data for each sampled DensePose point. The
(y, x) coordinate has normalized image locations for the point, and (v, u)
contains the surface coordinate (also normalized) for the part. The value
`num_points` corresponds to the maximum number of sampled points across
all instances in the image. Note that instances with less sampled points
will be padded with zeros in dim=1.
"""
num_points_per_instances = keys_to_tensors['image/object/densepose/num']
dp_y = keys_to_tensors['image/object/densepose/y']
dp_x = keys_to_tensors['image/object/densepose/x']
dp_v = keys_to_tensors['image/object/densepose/v']
dp_u = keys_to_tensors['image/object/densepose/u']
if isinstance(num_points_per_instances, tf.SparseTensor):
num_points_per_instances = tf.sparse_tensor_to_dense(
num_points_per_instances)
if isinstance(dp_y, tf.SparseTensor):
dp_y = tf.sparse_tensor_to_dense(dp_y)
if isinstance(dp_x, tf.SparseTensor):
dp_x = tf.sparse_tensor_to_dense(dp_x)
if isinstance(dp_v, tf.SparseTensor):
dp_v = tf.sparse_tensor_to_dense(dp_v)
if isinstance(dp_u, tf.SparseTensor):
dp_u = tf.sparse_tensor_to_dense(dp_u)
max_points_per_instance = tf.cast(
tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
num_points_cumulative = tf.concat([
[0], tf.math.cumsum(num_points_per_instances)], axis=0)
def pad_surface_coordinates_tensor(instance_ind):
"""Pads DensePose surface coordinates for each instance."""
points_range_start = num_points_cumulative[instance_ind]
points_range_end = num_points_cumulative[instance_ind + 1]
y = dp_y[points_range_start:points_range_end]
x = dp_x[points_range_start:points_range_end]
v = dp_v[points_range_start:points_range_end]
u = dp_u[points_range_start:points_range_end]
# Create [num_points_i, 4] tensor, where num_points_i is the number of
# sampled points for instance i.
unpadded_tensor = tf.stack([y, x, v, u], axis=1)
return shape_utils.pad_or_clip_nd(
unpadded_tensor, output_shape=[max_points_per_instance, 4])
return tf.map_fn(pad_surface_coordinates_tensor,
tf.range(tf.size(num_points_per_instances)),
dtype=tf.float32)
def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy.
......
......@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn, [])
self.assertTrue(
fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
tensor_dict)
def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
......@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_1, [])
self.assertTrue(
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 2]))
......@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_2, [])
self.assertTrue(
fields.InputDataFields.groundtruth_image_classes in tensor_dict)
self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3]))
......@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
def testDecodeDensePose(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data(
image_tensor, 'jpeg')
bbox_ymins = [0.0, 4.0, 2.0]
bbox_xmins = [1.0, 5.0, 8.0]
bbox_ymaxs = [2.0, 6.0, 1.0]
bbox_xmaxs = [3.0, 7.0, 3.3]
densepose_num = [0, 4, 2]
densepose_part_index = [2, 2, 3, 4, 2, 9]
densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
def graph_fn():
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/densepose/num':
dataset_util.int64_list_feature(densepose_num),
'image/object/densepose/part_index':
dataset_util.int64_list_feature(densepose_part_index),
'image/object/densepose/x':
dataset_util.float_list_feature(densepose_x),
'image/object/densepose/y':
dataset_util.float_list_feature(densepose_y),
'image/object/densepose/u':
dataset_util.float_list_feature(densepose_u),
'image/object/densepose/v':
dataset_util.float_list_feature(densepose_v),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_dense_pose=True)
output = example_decoder.decode(tf.convert_to_tensor(example))
dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = output[
fields.InputDataFields.groundtruth_dp_surface_coords]
return dp_num_points, dp_part_ids, dp_surface_coords
dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
graph_fn, [])
expected_dp_num_points = [0, 4, 2]
expected_dp_part_ids = [
[0, 0, 0, 0],
[2, 2, 3, 4],
[2, 9, 0, 0]
]
expected_dp_surface_coords = np.array(
[
# Instance 0 (no points).
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
# Instance 1 (4 points).
[[0.9, 0.1, 0.99, 0.01],
[0.8, 0.2, 0.98, 0.02],
[0.7, 0.3, 0.97, 0.03],
[0.6, 0.4, 0.96, 0.04]],
# Instance 2 (2 points).
[[0.5, 0.5, 0.95, 0.05],
[0.4, 0.6, 0.94, 0.06],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
], dtype=np.float32)
self.assertAllEqual(dp_num_points, expected_dp_num_points)
self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
if __name__ == '__main__':
tf.test.main()
......@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......
......@@ -73,6 +73,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......
......@@ -14,6 +14,9 @@
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files.
Example usage:
......@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS
......@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
]
_COCO_PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
_DP_PART_ID_OFFSET = 1
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=None):
keypoint_annotations_dict=None,
densepose_annotations_dict=None,
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto.
Args:
......@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns:
key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
......@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
......@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
category_names.append(category_name)
area.append(object_annotations['area'])
if include_masks:
......@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0)
if include_densepose:
annotation_id = object_annotations['id']
if (annotation_id in densepose_annotations_dict and
all(key in densepose_annotations_dict[annotation_id]
for key in densepose_keys)):
dp_annotations = densepose_annotations_dict[annotation_id]
num_densepose_annotation_used += 1
dp_num_points.append(len(dp_annotations['dp_I']))
dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
for i in dp_annotations['dp_I']])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
dp_x_norm = [(float(x) + x_box_rel * width) / image_width
for x_box_rel in dp_x_box_rel]
dp_y_box_rel = [
clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
dp_y_norm = [(float(y) + y_box_rel * height) / image_height
for y_box_rel in dp_y_box_rel]
dp_x.extend(dp_x_norm)
dp_y.extend(dp_y_norm)
dp_u.extend(dp_annotations['dp_U'])
dp_v.extend(dp_annotations['dp_V'])
else:
dp_num_points.append(0)
if (remove_non_person_images and
not any(name == b'person' for name in category_names)):
return (key, None, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
......@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used)
if include_densepose:
feature_dict['image/object/densepose/num'] = (
dataset_util.int64_list_feature(dp_num_points))
feature_dict['image/object/densepose/part_index'] = (
dataset_util.int64_list_feature(dp_part_index))
feature_dict['image/object/densepose/x'] = (
dataset_util.float_list_feature(dp_x))
feature_dict['image/object/densepose/y'] = (
dataset_util.float_list_feature(dp_y))
feature_dict['image/object/densepose/u'] = (
dataset_util.float_list_feature(dp_u))
feature_dict['image/object/densepose/v'] = (
dataset_util.float_list_feature(dp_v))
num_densepose_annotation_skipped = (
len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks,
num_shards,
keypoint_annotations_file=''):
keypoint_annotations_file='',
densepose_annotations_file='',
remove_non_person_annotations=False,
remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
......@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
......@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
logging.info('%d images are missing annotations.', missing_annotation_count)
logging.info('%d images are missing annotations.',
missing_annotation_count)
keypoint_annotations_index = {}
if keypoint_annotations_file:
......@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
......@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped,
num_keypoint_annotations_skipped) = create_tf_example(
densepose_annotations_dict = None
if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict)
keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
num_densepose_annotations_skipped)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
if tf_example:
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped)
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
total_num_densepose_annotations_skipped)
def main(_):
......@@ -347,20 +492,26 @@ def main(_):
train_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
densepose_annotations_file=FLAGS.train_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
num_shards=100,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
num_shards=50,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
densepose_annotations_file=FLAGS.val_densepose_annotations_file,
remove_non_person_annotations=FLAGS.remove_non_person_annotations,
remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks,
num_shards=100)
num_shards=50)
if __name__ == '__main__':
......
......@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
......@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
......@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
}
(_, example, _,
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict)
_, example, _, num_keypoint_annotation_skipped, _ = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual(
......@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_tf_example_with_dense_pose(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
num_points = 45
dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
dp_u = np.random.randn(num_points)
dp_v = np.random.randn(num_points)
dp_x = np.random.rand(num_points)*256.
dp_y = np.random.rand(num_points)*256.
densepose_annotations_dict = {
1000: {
'dp_I': dp_i,
'dp_U': dp_u,
'dp_V': dp_v,
'dp_x': dp_x,
'dp_y': dp_y,
'bbox': [64, 64, 128, 128],
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
_, example, _, _, num_densepose_annotation_skipped = (
create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
densepose_annotations_dict=densepose_annotations_dict))
self.assertEqual(num_densepose_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoEqual(
example.features.feature['image/object/densepose/num'].int64_list.value,
[num_points])
self.assertAllEqual(
example.features.feature[
'image/object/densepose/part_index'].int64_list.value,
dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
self.assertAllClose(
example.features.feature['image/object/densepose/u'].float_list.value,
dp_u)
self.assertAllClose(
example.features.feature['image/object/densepose/v'].float_list.value,
dp_v)
expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
self.assertAllClose(
example.features.feature['image/object/densepose/x'].float_list.value,
expected_dp_x)
self.assertAllClose(
example.features.feature['image/object/densepose/y'].float_list.value,
expected_dp_y)
def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
......@@ -74,6 +74,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......
......@@ -76,6 +76,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......
......@@ -105,6 +105,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def loss(self, prediction_dict, true_image_shapes):
pass
......
......@@ -30,9 +30,12 @@ pip install apache-beam
```
and can be run locally, or on a cluster for efficient processing of large
amounts of data. See the
amounts of data. Note that generate_detection_data.py and
generate_embedding_data.py both involve running inference, and may be very slow
to run locally. See the
[Apache Beam documentation](https://beam.apache.org/documentation/runners/dataflow/)
for more information.
for more information, and Google Cloud Documentation for a tutorial on
[running Beam jobs on DataFlow](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python).
### Generating TfRecords from a set of images and a COCO-CameraTraps style JSON
......@@ -191,3 +194,6 @@ python export_inference_graph.py \
--side_input_types float,int
```
If you have questions about Context R-CNN, please contact
[Sara Beery](https://beerys.github.io/).
......@@ -185,6 +185,9 @@ class FakeDetectionModel(model.DetectionModel):
"""
return {var.op.name: var for var in tf.global_variables()}
def restore_from_objects(self, fine_tune_checkpoint_type):
pass
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -2330,8 +2330,39 @@ class CenterNetMetaArch(model.DetectionModel):
def regularization_losses(self):
return []
def restore_map(self, fine_tune_checkpoint_type='classification',
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
raise RuntimeError('CenterNetMetaArch not supported under TF1.x.')
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (not implemented
in CenterNet) is intended to be used to restore Slim-based models when
running Tensorflow 1.x.
TODO(jonathanhuang): Make this function consistent with other
meta-architectures.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self._feature_extractor.get_base_model()}
......@@ -2340,7 +2371,7 @@ class CenterNetMetaArch(model.DetectionModel):
return {'feature_extractor': self._feature_extractor.get_model()}
else:
raise ValueError('Unknown fine tune checkpoint type - {}'.format(
raise ValueError('Not supported fine tune checkpoint type - {}'.format(
fine_tune_checkpoint_type))
def updates(self):
......
......@@ -1574,8 +1574,9 @@ class CenterNetMetaArchRestoreTest(test_case.TestCase):
"""Test restore map for a resnet backbone."""
model = build_center_net_meta_arch(build_resnet=True)
restore_map = model.restore_map('classification')
self.assertIsInstance(restore_map['feature_extractor'], tf.keras.Model)
restore_from_objects_map = model.restore_from_objects('classification')
self.assertIsInstance(restore_from_objects_map['feature_extractor'],
tf.keras.Model)
class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
......@@ -1601,9 +1602,6 @@ class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
def postprocess(self):
pass
def restore_map(self):
pass
def call(self, inputs):
batch_size, input_height, input_width, _ = inputs.shape
fake_output = tf.ones([
......
......@@ -261,31 +261,6 @@ class FasterRCNNKerasFeatureExtractor(object):
"""Get model that extracts second stage box classifier features."""
pass
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in variables_helper.get_global_variables_safely():
for scope_name in [first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope]:
if variable.op.name.startswith(scope_name):
var_name = variable.op.name.replace(scope_name + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
......@@ -2801,6 +2776,43 @@ class FasterRCNNMetaArch(model.DetectionModel):
variables_to_restore, include_patterns=include_patterns)
return {var.op.name: var for var in feature_extractor_variables}
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self.classification_backbone}
elif fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint(
_feature_extractor_for_box_classifier_features=
self._feature_extractor_for_box_classifier_features,
_feature_extractor_for_proposal_features=
self._feature_extractor_for_proposal_features)
return {'model': fake_model}
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -250,35 +250,6 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def call(self, inputs, **kwargs):
return self._extract_features(inputs)
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
if tf.executing_eagerly():
for variable in self.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
else:
# b/137854499: use global_variables.
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if var_name.startswith(feature_extractor_scope + '/'):
var_name = var_name.replace(feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
class SSDMetaArch(model.DetectionModel):
"""SSD Meta-architecture definition."""
......@@ -1295,8 +1266,8 @@ class SSDMetaArch(model.DetectionModel):
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type='detection'`). If False, only variables
within the appropriate scopes are included. Default False.
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
......@@ -1311,36 +1282,53 @@ class SSDMetaArch(model.DetectionModel):
elif fine_tune_checkpoint_type == 'detection':
variables_to_restore = {}
if tf.executing_eagerly():
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
# Grab all detection vars by name
for variable in self.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
variables_to_restore[var_name] = variable
variables_to_restore[var_name] = variable
else:
# Grab just the feature extractor vars by name
for variable in self._feature_extractor.variables:
# variable.name includes ":0" at the end, but the names in the
# checkpoint do not have the suffix ":0". So, we strip it here.
var_name = variable.name[:-2]
variables_to_restore[var_name] = variable
else:
for variable in variables_helper.get_global_variables_safely():
var_name = variable.op.name
if load_all_detection_checkpoint_vars:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
else:
if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
return variables_to_restore
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of Trackable objects to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self.classification_backbone}
elif fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor)
return {'model': fake_model}
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
def updates(self):
"""Returns a list of update operators for this model.
......
......@@ -123,6 +123,9 @@ class SimpleModel(model.DetectionModel):
return []
def restore_map(self, *args, **kwargs):
pass
def restore_from_objects(self, fine_tune_checkpoint_type):
return {'model': self}
def preprocess(self, _):
......@@ -174,7 +177,7 @@ class ModelCheckpointTest(tf.test.TestCase):
class IncompatibleModel(SimpleModel):
def restore_map(self, *args, **kwargs):
def restore_from_objects(self, *args, **kwargs):
return {'weight': self.weight}
......@@ -207,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
self._model, self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
np.testing.assert_allclose(self._model.weight.numpy(), 42)
......@@ -220,7 +222,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
IncompatibleModel(), self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
......
......@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import ops
from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vutils
# pylint: disable=g-import-not-at-top
......@@ -47,13 +46,6 @@ except ImportError:
MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP
### NOTE: This file is a wip.
### TODO(kaftan): Explore adding unit tests for individual methods
### TODO(kaftan): Add unit test that checks training on a single image w/
#### groundtruth, and verfiy that loss goes to zero.
#### Possibly have version that takes it as the whole train & eval dataset,
#### & verify the loss output from the eval_loop method.
### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro
RESTORE_MAP_ERROR_TEMPLATE = (
'Since we are restoring a v2 style checkpoint'
......@@ -277,14 +269,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
"""
for key, value in checkpoint_restore_map.items():
if not (isinstance(key, str) and isinstance(value, tf.Module)):
if not (isinstance(key, str) and
(isinstance(value, tf.Module)
or isinstance(value, tf.train.Checkpoint))):
raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format(
key.__class__.__name__, value.__class__.__name__))
def is_object_based_checkpoint(checkpoint_path):
"""Returns true if `checkpoint_path` points to an object-based checkpoint."""
var_names = [var[0] for var in tf.train.list_variables(checkpoint_path)]
return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names
def load_fine_tune_checkpoint(
model, checkpoint_path, checkpoint_type, checkpoint_version,
load_all_detection_checkpoint_vars, input_dataset,
model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset,
unpad_groundtruth_tensors):
"""Load a fine tuning classification or detection checkpoint.
......@@ -292,8 +291,7 @@ def load_fine_tune_checkpoint(
the model by computing a dummy loss. (Models might not have built their
variables before their first execution)
It then loads a variable-name based classification or detection checkpoint
that comes from converted TF 1.x slim model checkpoints.
It then loads an object-based classification or detection checkpoint.
This method updates the model in-place and does not return a value.
......@@ -306,14 +304,22 @@ def load_fine_tune_checkpoint(
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`.
checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
whether to load checkpoints in V1 style or V2 style.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scopes are included. Default False.
whether to load checkpoints in V1 style or V2 style. In this binary
we only support V2 style (object-based) checkpoints.
input_dataset: The tf.data Dataset the model is being trained on. Needed
to get the shapes for the dummy loss computation.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
Raises:
IOError: if `checkpoint_path` does not point at a valid object-based
checkpoint
ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
"""
if not is_object_based_checkpoint(checkpoint_path):
raise IOError('Checkpoint is expected to be an object-based checkpoint.')
if checkpoint_version == train_pb2.CheckpointVersion.V1:
raise ValueError('Checkpoint version should be V2')
features, labels = iter(input_dataset).next()
@tf.function
......@@ -336,26 +342,11 @@ def load_fine_tune_checkpoint(
labels,
))
if checkpoint_version == train_pb2.CheckpointVersion.V1:
var_map = model.restore_map(
fine_tune_checkpoint_type=checkpoint_type,
load_all_detection_checkpoint_vars=(
load_all_detection_checkpoint_vars))
available_var_map = variables_helper.get_variables_available_in_checkpoint(
var_map,
checkpoint_path,
include_global_step=False)
tf.train.init_from_checkpoint(checkpoint_path,
available_var_map)
elif checkpoint_version == train_pb2.CheckpointVersion.V2:
restore_map = model.restore_map(
fine_tune_checkpoint_type=checkpoint_type,
load_all_detection_checkpoint_vars=(
load_all_detection_checkpoint_vars))
validate_tf_v2_checkpoint_restore_map(restore_map)
ckpt = tf.train.Checkpoint(**restore_map)
ckpt.restore(checkpoint_path).assert_existing_objects_matched()
restore_from_objects_dict = model.restore_from_objects(
fine_tune_checkpoint_type=checkpoint_type)
validate_tf_v2_checkpoint_restore_map(restore_from_objects_dict)
ckpt = tf.train.Checkpoint(**restore_from_objects_dict)
ckpt.restore(checkpoint_path).assert_existing_objects_matched()
def get_filepath(strategy, filepath):
......@@ -464,8 +455,10 @@ def train_loop(
if kwargs['use_bfloat16']:
tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')
load_all_detection_checkpoint_vars = (
train_config.load_all_detection_checkpoint_vars)
if train_config.load_all_detection_checkpoint_vars:
raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
'unsupported in TF2')
config_util.update_fine_tune_checkpoint_type(train_config)
fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version
......@@ -533,7 +526,6 @@ def train_loop(
train_config.fine_tune_checkpoint,
fine_tune_checkpoint_type,
fine_tune_checkpoint_version,
load_all_detection_checkpoint_vars,
train_input,
unpad_groundtruth_tensors)
......@@ -807,8 +799,10 @@ def eager_eval_loop(
eval_metrics[loss_key] = loss_metrics[loss_key].result()
eval_metrics = {str(k): v for k, v in eval_metrics.items()}
tf.logging.info('Eval metrics at step %d', global_step)
for k in eval_metrics:
tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
tf.logging.info('\t+ %s: %f', k, eval_metrics[k])
return eval_metrics
......
......@@ -16,14 +16,6 @@
r"""Creates and runs TF2 object detection models.
##################################
NOTE: This module has not been fully tested; please bear with us while we iron
out the kinks.
##################################
When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
For local training/evaluation run:
PIPELINE_CONFIG_PATH=path/to/pipeline.config
MODEL_DIR=/tmp/model_outputs
......@@ -60,6 +52,8 @@ flags.DEFINE_string(
flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
'evaluation checkpoint before exiting.')
flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
flags.DEFINE_integer(
'num_workers', 1, 'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
......@@ -84,7 +78,7 @@ def main(unused_argv):
checkpoint_dir=FLAGS.checkpoint_dir,
wait_interval=300, timeout=FLAGS.eval_timeout)
else:
if tf.config.get_visible_devices('TPU'):
if FLAGS.use_tpu:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
......
......@@ -73,7 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
proposal_classifier_features = (
model(proposal_feature_maps))
features_shape = tf.shape(proposal_classifier_features)
self.assertAllEqual(features_shape.numpy(), [2, 8, 8, 1536])
self.assertAllEqual(features_shape.numpy(), [2, 9, 9, 1536])
if __name__ == '__main__':
......
......@@ -175,23 +175,6 @@ class FasterRCNNResnetKerasFeatureExtractor(
self._variable_dict[variable.name[:-2]] = variable
return keras_model
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor (unused).
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
class FasterRCNNResnet50KerasFeatureExtractor(
FasterRCNNResnetKerasFeatureExtractor):
......
......@@ -163,14 +163,3 @@ class SSDMobileNetV1KerasFeatureExtractor(
'Conv2d_13_pointwise': image_features[1]})
return list(feature_maps.values())
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
......@@ -241,14 +241,3 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
last_feature_map = layer(last_feature_map)
feature_maps.append(last_feature_map)
return feature_maps
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
......@@ -166,14 +166,3 @@ class SSDMobileNetV2KerasFeatureExtractor(
'layer_19': image_features[1]})
return list(feature_maps.values())
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
......@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor(
feature_maps.append(last_feature_map)
return feature_maps
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
class SSDResNet50V1FpnKerasFeatureExtractor(
SSDResNetV1FpnKerasFeatureExtractor):
......
......@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
self, inserted_layer_counter, target_channel):
projection_layers = []
if inserted_layer_counter >= 0:
use_bias = False if self._apply_batch_norm else True
use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
projection_layers.append(keras.Conv2D(
target_channel, [1, 1], strides=1, padding='SAME',
name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
......@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
conv_layers = []
batch_norm_layers = []
activation_layers = []
use_bias = False if self._apply_batch_norm else True
use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
for additional_conv_layer_idx in range(self._num_layers_before_predictor):
layer_name = '{}/conv2d_{}'.format(
tower_name_scope, additional_conv_layer_idx)
......@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
training=(self._is_training and not self._freeze_batchnorm),
name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index)))
activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6))
activation_layers.append(self._conv_hyperparams.build_activation_layer(
name='{}/conv2d_{}/activation_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index)))
# Set conv layers as the shared conv layers for different feature maps with
# the same tower_name_scope.
......
......@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
}
// Next id: 31
// Next id: 32
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
......@@ -119,6 +119,10 @@ message InputReader {
// Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
// Whether to load DensePose data. If set, must also set load_instance_masks
// to true.
optional bool load_dense_pose = 31 [default = false];
// Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false];
......
......@@ -59,7 +59,8 @@ message TrainConfig {
// Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is
// True.
// True. This option is *not* supported for TF2 --- setting it to true
// will raise an error.
optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model
......
# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
# example input data with context_features.
# This model uses attention into contextual features within the Faster R-CNN
# object detection framework to improve object detection performance.
# See https://arxiv.org/abs/1912.03538 for more information.
# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
# configured.
model {
faster_rcnn {
num_classes: 48
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
feature_extractor {
type: "faster_rcnn_resnet101"
first_stage_features_stride: 16
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
height_stride: 16
width_stride: 16
scales: 0.25
scales: 0.5
scales: 1.0
scales: 2.0
aspect_ratios: 0.5
aspect_ratios: 1.0
aspect_ratios: 2.0
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.00999999977648
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.699999988079
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
use_dropout: false
dropout_keep_probability: 1.0
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_matmul_gather_in_matcher: true
use_static_balanced_label_sampler: true
use_static_shapes: true
context_config {
max_num_context_features: 2000
context_feature_length: 2057
}
}
}
train_config {
batch_size: 8
data_augmentation_options {
random_horizontal_flip {
}
}
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
manual_step_learning_rate {
initial_learning_rate: 0.0
schedule {
step: 400000
learning_rate: 0.002
}
schedule {
step: 500000
learning_rate: 0.0002
}
schedule {
step: 600000
learning_rate: 0.00002
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
from_detection_checkpoint: true
num_steps: 5000000
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
eval_config {
max_evals: 50
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
......@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils
def create_conv_block(name, num_filters, kernel_size, strides, padding,
use_separable, apply_batchnorm, apply_activation,
conv_hyperparams, is_training, freeze_batchnorm):
conv_hyperparams, is_training, freeze_batchnorm,
conv_bn_act_pattern=True):
"""Create Keras layers for regular or separable convolutions.
Args:
......@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
conv_bn_act_pattern: Bool. By default, when True, the layers returned by
this function are in the order [conv, batchnorm, activation]. Otherwise,
when False, the order of the layers is [activation, conv, batchnorm].
Returns:
A list of keras layers, including (regular or seperable) convolution, and
......@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
depth_multiplier=1,
padding=padding,
strides=strides,
name=name + '_separable_conv',
name=name + 'separable_conv',
**kwargs))
else:
layers.append(
......@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
kernel_size=kernel_size,
padding=padding,
strides=strides,
name=name + '_conv',
name=name + 'conv',
**conv_hyperparams.params()))
if apply_batchnorm:
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=name + '_batchnorm'))
name=name + 'batchnorm'))
if apply_activation:
layers.append(
conv_hyperparams.build_activation_layer(name=name + '_activation'))
activation_layer = conv_hyperparams.build_activation_layer(
name=name + 'activation')
if conv_bn_act_pattern:
layers.append(activation_layer)
else:
layers = [activation_layer] + layers
return layers
......@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
pool_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_max_x{}'.format(stride)))
name=name + 'downsample_max_x{}'.format(stride)))
elif downsample_method == 'avg_pooling':
layers.append(
tf.keras.layers.AveragePooling2D(
pool_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_avg_x{}'.format(stride)))
name=name + 'downsample_avg_x{}'.format(stride)))
elif downsample_method == 'depthwise_conv':
layers.append(
tf.keras.layers.DepthwiseConv2D(
kernel_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_depthwise_x{}'.format(stride)))
name=name + 'downsample_depthwise_x{}'.format(stride)))
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=name + '_downsample_batchnorm'))
name=name + 'downsample_batchnorm'))
layers.append(
conv_hyperparams.build_activation_layer(name=name +
'_downsample_activation'))
'downsample_activation'))
else:
raise ValueError('Unknown downsample method: {}'.format(downsample_method))
......
......@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path,
"""Clears fine_tune_checkpoint and writes a new pipeline config file."""
configs = get_configs_from_pipeline_file(pipeline_config_path)
configs["train_config"].fine_tune_checkpoint = ""
configs["train_config"].load_all_detection_checkpoint_vars = False
pipeline_proto = create_pipeline_proto_from_configs(configs)
with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
f.write(text_format.MessageToString(pipeline_proto))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册