Add image shape and scaling factor to image_meta

The image_meta tensor stores meta data about the image. This update adds the scale by which the image was resized and it’s final shape.

Add image shape and scaling factor to image_meta
The image_meta tensor stores meta data about the image. This update adds the scale by which the image was resized and it’s final shape.
c1d142fe · Waleed Abdulla · 0a3d49fc · c1d142fe
隐藏空白更改
内联并排

Showing with 34 addition and 16 deletion

model.py model.py +34 -16

未找到文件。
--- a/model.py
+++ b/model.py
@@ -789,7 +789,7 @@ class DetectionLayer(KE.Layer):
        image_meta = inputs[3]

        # Run detection refinement graph on each item in the batch
-        _, _, window, _ = parse_image_meta_graph(image_meta)
+        window = parse_image_meta_graph(image_meta)['window']
        window = norm_boxes_graph(window, self.config.IMAGE_SHAPE[:2])
        detections_batch = utils.batch_slice(
            [rois, mrcnn_class, mrcnn_bbox, window],
@@ -1193,7 +1193,7 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
-    shape = image.shape
+    original_shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
@@ -1262,7 +1262,8 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # Image meta data
-    image_meta = compose_image_meta(image_id, shape, window, active_class_ids)
+    image_meta = compose_image_meta(image_id, original_shape, image.shape,
+                                    window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask

@@ -1926,8 +1927,9 @@ class MaskRCNN():
        if mode == "training":
            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
-            _, _, _, active_class_ids = KL.Lambda(lambda x: parse_image_meta_graph(x),
-                                                  mask=[None, None, None, None])(input_image_meta)
+            active_class_ids = KL.Lambda(
+                lambda x: parse_image_meta_graph(x)["active_class_ids"]
+                )(input_image_meta)

            if not config.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
@@ -2321,7 +2323,7 @@ class MaskRCNN():
            molded_image = mold_image(molded_image, self.config)
            # Build image_meta
            image_meta = compose_image_meta(
-                0, image.shape, window,
+                0, image.shape, molded_image.shape, window, scale,
                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
            # Append
            molded_images.append(molded_image)
@@ -2544,22 +2546,27 @@ class MaskRCNN():
 #  Data Formatting
 ############################################################

-def compose_image_meta(image_id, image_shape, window, active_class_ids):
+def compose_image_meta(image_id, original_image_shape, image_shape,
+                       window, scale, active_class_ids):
    """Takes attributes of an image and puts them in one 1D array.

    image_id: An int ID of the image. Useful for debugging.
-    image_shape: [height, width, channels]
+    original_image_shape: [H, W, C] before resizing or padding.
+    image_shape: [H, W, C] after resizing and padding
    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
            image is (excluding the padding)
+    scale: The scaling factor applied to the original image (float32)
    active_class_ids: List of class_ids available in the dataset from which
        the image came. Useful if training on images from multiple datasets
        where not all classes are present in all datasets.
    """
    meta = np.array(
-        [image_id] +            # size=1
-        list(image_shape) +     # size=3
-        list(window) +          # size=4 (y1, x1, y2, x2) in image cooredinates
-        list(active_class_ids)  # size=num_classes
+        [image_id] +                  # size=1
+        list(original_image_shape) +  # size=3
+        list(image_shape) +           # size=3
+        list(window) +                # size=4 (y1, x1, y2, x2) in image cooredinates
+        [scale] +                     # size=1
+        list(active_class_ids)        # size=num_classes
    )
    return meta

@@ -2569,12 +2576,23 @@ def parse_image_meta_graph(meta):
    See compose_image_meta() for more details.

    meta: [batch, meta length] where meta length depends on NUM_CLASSES
+
+    Returns a dict of the parsed tensors.
    """
    image_id = meta[:, 0]
-    image_shape = meta[:, 1:4]
-    window = meta[:, 4:8]   # (y1, x1, y2, x2) window of image in in pixels
-    active_class_ids = meta[:, 8:]
-    return [image_id, image_shape, window, active_class_ids]
+    original_image_shape = meta[:, 1:4]
+    image_shape = meta[:, 4:7]
+    window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
+    scale = meta[:, 11]
+    active_class_ids = meta[:, 12:]
+    return {
+        "image_id": image_id,
+        "original_image_shape": original_image_shape,
+        "image_shape": image_shape,
+        "window": window,
+        "scale": scale,
+        "active_class_ids": active_class_ids,
+    }


 def mold_image(images, config):