提交 c9d2886a 编写于 作者: Y Yu-hui Chen 提交者: TF Object Detection Team

Updated the input pipeline and data augmentation to process the new keypoint

depth and weight fields.

PiperOrigin-RevId: 353975078
上级 d04c9e9b
......@@ -315,7 +315,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
is_annotated_list=None,
groundtruth_labeled_classes=None,
groundtruth_verified_neg_classes=None,
groundtruth_not_exhaustive_classes=None):
groundtruth_not_exhaustive_classes=None,
groundtruth_keypoint_depths_list=None,
groundtruth_keypoint_depth_weights_list=None):
"""Provide groundtruth tensors.
Args:
......@@ -379,6 +381,11 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
groundtruth_not_exhaustive_classes: A list of 1-D tf.float32 tensors of
shape [num_classes], containing a K-hot representation of classes
which don't have all of their instances marked exhaustively.
groundtruth_keypoint_depths_list: a list of 2-D tf.float32 tensors
of shape [num_boxes, num_keypoints] containing keypoint relative depths.
groundtruth_keypoint_depth_weights_list: a list of 2-D tf.float32 tensors
of shape [num_boxes, num_keypoints] containing the weights of the
relative depths.
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
......@@ -399,6 +406,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._groundtruth_lists[
fields.BoxListFields.keypoint_visibilities] = (
groundtruth_keypoint_visibilities_list)
if groundtruth_keypoint_depths_list:
self._groundtruth_lists[
fields.BoxListFields.keypoint_depths] = (
groundtruth_keypoint_depths_list)
if groundtruth_keypoint_depth_weights_list:
self._groundtruth_lists[
fields.BoxListFields.keypoint_depth_weights] = (
groundtruth_keypoint_depth_weights_list)
if groundtruth_dp_num_points_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_num_points] = (
......
......@@ -571,6 +571,8 @@ def random_horizontal_flip(image,
keypoint_visibilities=None,
densepose_part_ids=None,
densepose_surface_coords=None,
keypoint_depths=None,
keypoint_depth_weights=None,
keypoint_flip_permutation=None,
probability=0.5,
seed=None,
......@@ -602,6 +604,12 @@ def random_horizontal_flip(image,
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
keypoint_depths: (optional) rank 2 float32 tensor with shape [num_instances,
num_keypoints] representing the relative depth of the
keypoints.
keypoint_depth_weights: (optional) rank 2 float32 tensor with shape
[num_instances, num_keypoints] representing the
weights of the relative depth of the keypoints.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
probability: the probability of performing this augmentation.
......@@ -631,6 +639,10 @@ def random_horizontal_flip(image,
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
keypoint_depths: rank 2 float32 tensor with shape [num_instances,
num_keypoints]
keypoint_depth_weights: rank 2 float32 tensor with shape [num_instances,
num_keypoints].
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
......@@ -708,6 +720,21 @@ def random_horizontal_flip(image,
lambda: (densepose_part_ids, densepose_surface_coords))
result.extend(densepose_tensors)
# flip keypoint depths and weights.
if (keypoint_depths is not None and
keypoint_flip_permutation is not None):
kpt_flip_perm = keypoint_flip_permutation
keypoint_depths = tf.cond(
do_a_flip_random,
lambda: tf.gather(keypoint_depths, kpt_flip_perm, axis=1),
lambda: keypoint_depths)
keypoint_depth_weights = tf.cond(
do_a_flip_random,
lambda: tf.gather(keypoint_depth_weights, kpt_flip_perm, axis=1),
lambda: keypoint_depth_weights)
result.append(keypoint_depths)
result.append(keypoint_depth_weights)
return tuple(result)
......@@ -4293,7 +4320,8 @@ def get_default_func_arg_map(include_label_weights=True,
include_instance_masks=False,
include_keypoints=False,
include_keypoint_visibilities=False,
include_dense_pose=False):
include_dense_pose=False,
include_keypoint_depths=False):
"""Returns the default mapping from a preprocessor function to its args.
Args:
......@@ -4311,6 +4339,8 @@ def get_default_func_arg_map(include_label_weights=True,
the keypoint visibilities, too.
include_dense_pose: If True, preprocessing functions will modify the
DensePose labels, too.
include_keypoint_depths: If True, preprocessing functions will modify the
keypoint depth labels, too.
Returns:
A map from preprocessing functions to the arguments they receive.
......@@ -4353,6 +4383,13 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_dp_part_ids)
groundtruth_dp_surface_coords = (
fields.InputDataFields.groundtruth_dp_surface_coords)
groundtruth_keypoint_depths = None
groundtruth_keypoint_depth_weights = None
if include_keypoint_depths:
groundtruth_keypoint_depths = (
fields.InputDataFields.groundtruth_keypoint_depths)
groundtruth_keypoint_depth_weights = (
fields.InputDataFields.groundtruth_keypoint_depth_weights)
prep_func_arg_map = {
normalize_image: (fields.InputDataFields.image,),
......@@ -4364,6 +4401,8 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_keypoint_visibilities,
groundtruth_dp_part_ids,
groundtruth_dp_surface_coords,
groundtruth_keypoint_depths,
groundtruth_keypoint_depth_weights,
),
random_vertical_flip: (
fields.InputDataFields.image,
......
......@@ -105,6 +105,17 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
])
return keypoints, keypoint_visibilities
def createTestKeypointDepths(self):
keypoint_depths = tf.constant([
[1.0, 0.9, 0.8],
[0.7, 0.6, 0.5]
], dtype=tf.float32)
keypoint_depth_weights = tf.constant([
[0.5, 0.6, 0.7],
[0.8, 0.9, 1.0]
], dtype=tf.float32)
return keypoint_depths, keypoint_depth_weights
def createTestKeypointsInsideCrop(self):
keypoints = np.array([
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
......@@ -713,6 +724,59 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_keypoints=True)
def testRunRandomHorizontalFlipWithKeypointDepth(self):
def graph_fn():
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, keypoint_visibilities = self.createTestKeypoints()
keypoint_depths, keypoint_depth_weights = self.createTestKeypointDepths()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image:
images,
fields.InputDataFields.groundtruth_boxes:
boxes,
fields.InputDataFields.groundtruth_instance_masks:
masks,
fields.InputDataFields.groundtruth_keypoints:
keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities:
keypoint_visibilities,
fields.InputDataFields.groundtruth_keypoint_depths:
keypoint_depths,
fields.InputDataFields.groundtruth_keypoint_depth_weights:
keypoint_depth_weights,
}
preprocess_options = [(preprocessor.random_horizontal_flip, {
'keypoint_flip_permutation': keypoint_flip_permutation,
'probability': 1.0
})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True,
include_dense_pose=False,
include_keypoint_depths=True)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
keypoint_depths = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_depths]
keypoint_depth_weights = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_depth_weights]
output_tensors = [keypoint_depths, keypoint_depth_weights]
return output_tensors
output_tensors = self.execute_cpu(graph_fn, [])
expected_keypoint_depths = [[1.0, 0.8, 0.9], [0.7, 0.5, 0.6]]
expected_keypoint_depth_weights = [[0.5, 0.7, 0.6], [0.8, 1.0, 0.9]]
self.assertAllClose(expected_keypoint_depths, output_tensors[0])
self.assertAllClose(expected_keypoint_depth_weights, output_tensors[1])
def testRandomVerticalFlip(self):
def graph_fn():
......
......@@ -307,6 +307,14 @@ def transform_input_data(tensor_dict,
out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
out_tensor_dict[flds_gt_kpt][:, :, 0],
dtype=tf.bool)
flds_gt_kpt_depth = fields.InputDataFields.groundtruth_keypoint_depths
flds_gt_kpt_depth_weight = (
fields.InputDataFields.groundtruth_keypoint_depth_weights)
if flds_gt_kpt_depth in out_tensor_dict:
out_tensor_dict[flds_gt_kpt_depth] = out_tensor_dict[flds_gt_kpt_depth]
out_tensor_dict[flds_gt_kpt_depth_weight] = out_tensor_dict[
flds_gt_kpt_depth_weight]
out_tensor_dict[flds_gt_kpt_weights] = (
keypoint_ops.keypoint_weights_from_visibilities(
out_tensor_dict[flds_gt_kpt_vis],
......@@ -506,6 +514,15 @@ def pad_input_data_to_static_shapes(tensor_dict,
padding_shapes[input_fields.
groundtruth_keypoint_visibilities] = padding_shape
if fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict:
tensor_shape = tensor_dict[fields.InputDataFields.
groundtruth_keypoint_depths].shape
padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
padding_shapes[fields.InputDataFields.
groundtruth_keypoint_depths] = padding_shape
padding_shapes[fields.InputDataFields.
groundtruth_keypoint_depth_weights] = padding_shape
if input_fields.groundtruth_keypoint_weights in tensor_dict:
tensor_shape = (
tensor_dict[input_fields.groundtruth_keypoint_weights].shape)
......@@ -587,6 +604,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict)
include_keypoint_visibilities = (
fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict)
include_keypoint_depths = (
fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict)
include_label_weights = (fields.InputDataFields.groundtruth_weights
in tensor_dict)
include_label_confidences = (fields.InputDataFields.groundtruth_confidences
......@@ -606,7 +625,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints,
include_keypoint_visibilities=include_keypoint_visibilities,
include_dense_pose=include_dense_pose))
include_dense_pose=include_dense_pose,
include_keypoint_depths=include_keypoint_depths))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
tensor_dict[fields.InputDataFields.image], axis=0)
return tensor_dict
......@@ -628,6 +648,8 @@ def _get_labels_dict(input_dict):
fields.InputDataFields.groundtruth_confidences,
fields.InputDataFields.groundtruth_labeled_classes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_depths,
fields.InputDataFields.groundtruth_keypoint_depth_weights,
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_is_crowd,
......
......@@ -1420,6 +1420,49 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
[[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
[[0.1, 0.1, 0.3, 0.4,], [0.6, 0.4, 0.6, 0.7,]]])
def test_groundtruth_keypoint_depths(self):
def graph_fn():
tensor_dict = {
fields.InputDataFields.image:
tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
fields.InputDataFields.groundtruth_boxes:
tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1, 2], np.int32)),
fields.InputDataFields.groundtruth_keypoints:
tf.constant([[[0.1, 0.2], [0.3, 0.4]],
[[0.5, 0.6], [0.7, 0.8]]]),
fields.InputDataFields.groundtruth_keypoint_visibilities:
tf.constant([[True, False], [True, True]]),
fields.InputDataFields.groundtruth_keypoint_depths:
tf.constant([[1.0, 0.9], [0.8, 0.7]]),
fields.InputDataFields.groundtruth_keypoint_depth_weights:
tf.constant([[0.7, 0.8], [0.9, 1.0]]),
}
num_classes = 3
keypoint_type_weight = [1.0, 2.0]
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_resize50_preprocess_fn,
image_resizer_fn=_fake_image_resizer_fn,
num_classes=num_classes,
keypoint_type_weight=keypoint_type_weight)
transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
return (transformed_inputs[
fields.InputDataFields.groundtruth_keypoint_depths],
transformed_inputs[
fields.InputDataFields.groundtruth_keypoint_depth_weights])
keypoint_depths, keypoint_depth_weights = self.execute_cpu(graph_fn, [])
self.assertAllClose(
keypoint_depths,
[[1.0, 0.9], [0.8, 0.7]])
self.assertAllClose(
keypoint_depth_weights,
[[0.7, 0.8], [0.9, 1.0]])
class PadInputDataToStaticShapesFnTest(test_case.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册