提交 c1d142fe 编写于 作者: W Waleed Abdulla

Add image shape and scaling factor to image_meta

The image_meta tensor stores meta data about the
image. This update adds the scale by which the
image was resized and it’s final shape.
上级 0a3d49fc
......@@ -789,7 +789,7 @@ class DetectionLayer(KE.Layer):
image_meta = inputs[3]
# Run detection refinement graph on each item in the batch
_, _, window, _ = parse_image_meta_graph(image_meta)
window = parse_image_meta_graph(image_meta)['window']
window = norm_boxes_graph(window, self.config.IMAGE_SHAPE[:2])
detections_batch = utils.batch_slice(
[rois, mrcnn_class, mrcnn_bbox, window],
......@@ -1193,7 +1193,7 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
# Load image and mask
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
shape = image.shape
original_shape = image.shape
image, window, scale, padding = utils.resize_image(
image,
min_dim=config.IMAGE_MIN_DIM,
......@@ -1262,7 +1262,8 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
# Image meta data
image_meta = compose_image_meta(image_id, shape, window, active_class_ids)
image_meta = compose_image_meta(image_id, original_shape, image.shape,
window, scale, active_class_ids)
return image, image_meta, class_ids, bbox, mask
......@@ -1926,8 +1927,9 @@ class MaskRCNN():
if mode == "training":
# Class ID mask to mark class IDs supported by the dataset the image
# came from.
_, _, _, active_class_ids = KL.Lambda(lambda x: parse_image_meta_graph(x),
mask=[None, None, None, None])(input_image_meta)
active_class_ids = KL.Lambda(
lambda x: parse_image_meta_graph(x)["active_class_ids"]
)(input_image_meta)
if not config.USE_RPN_ROIS:
# Ignore predicted ROIs and use ROIs provided as an input.
......@@ -2321,7 +2323,7 @@ class MaskRCNN():
molded_image = mold_image(molded_image, self.config)
# Build image_meta
image_meta = compose_image_meta(
0, image.shape, window,
0, image.shape, molded_image.shape, window, scale,
np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
# Append
molded_images.append(molded_image)
......@@ -2544,22 +2546,27 @@ class MaskRCNN():
# Data Formatting
############################################################
def compose_image_meta(image_id, image_shape, window, active_class_ids):
def compose_image_meta(image_id, original_image_shape, image_shape,
window, scale, active_class_ids):
"""Takes attributes of an image and puts them in one 1D array.
image_id: An int ID of the image. Useful for debugging.
image_shape: [height, width, channels]
original_image_shape: [H, W, C] before resizing or padding.
image_shape: [H, W, C] after resizing and padding
window: (y1, x1, y2, x2) in pixels. The area of the image where the real
image is (excluding the padding)
scale: The scaling factor applied to the original image (float32)
active_class_ids: List of class_ids available in the dataset from which
the image came. Useful if training on images from multiple datasets
where not all classes are present in all datasets.
"""
meta = np.array(
[image_id] + # size=1
list(image_shape) + # size=3
list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates
list(active_class_ids) # size=num_classes
[image_id] + # size=1
list(original_image_shape) + # size=3
list(image_shape) + # size=3
list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates
[scale] + # size=1
list(active_class_ids) # size=num_classes
)
return meta
......@@ -2569,12 +2576,23 @@ def parse_image_meta_graph(meta):
See compose_image_meta() for more details.
meta: [batch, meta length] where meta length depends on NUM_CLASSES
Returns a dict of the parsed tensors.
"""
image_id = meta[:, 0]
image_shape = meta[:, 1:4]
window = meta[:, 4:8] # (y1, x1, y2, x2) window of image in in pixels
active_class_ids = meta[:, 8:]
return [image_id, image_shape, window, active_class_ids]
original_image_shape = meta[:, 1:4]
image_shape = meta[:, 4:7]
window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels
scale = meta[:, 11]
active_class_ids = meta[:, 12:]
return {
"image_id": image_id,
"original_image_shape": original_image_shape,
"image_shape": image_shape,
"window": window,
"scale": scale,
"active_class_ids": active_class_ids,
}
def mold_image(images, config):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册