Refactoring and clean up. No functionality change

13701da8 · Waleed Abdulla · 7444fbaa · 13701da8 · 13701da8 · 13701da8
隐藏空白更改
内联并排

Showing with 56 addition and 30 deletion

config.py config.py +3 -0

model.py model.py +11 -16

utils.py utils.py +42 -14

未找到文件。
--- a/config.py
+++ b/config.py
@@ -115,6 +115,9 @@ class Config(object):
    # Pooled ROIs
    POOL_SIZE = 7
    MASK_POOL_SIZE = 14
+
+    # Shape of output mask
+    # To change this you also need to change the neural network mask branch
    MASK_SHAPE = [28, 28]

    # Maximum number of ground truth instances to use in one image

--- a/model.py
+++ b/model.py
@@ -1728,12 +1728,9 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
-                if config.USE_MINI_MASK:
-                    batch_gt_masks = np.zeros((batch_size, config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1],
-                                               config.MAX_GT_INSTANCES))
-                else:
-                    batch_gt_masks = np.zeros(
-                        (batch_size, image.shape[0], image.shape[1], config.MAX_GT_INSTANCES))
+                batch_gt_masks = np.zeros(
+                    (batch_size, gt_masks.shape[1], gt_masks.shape[1],
+                     config.MAX_GT_INSTANCES), dtype=gt_masks.dtype)
                if random_rois:
                    batch_rpn_rois = np.zeros(
                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
@@ -2446,15 +2443,15 @@ class MaskRCNN():
            for image in images:
                log("image", image)

+        # Mold inputs to format expected by the neural network
+        molded_images, image_metas, windows = self.mold_inputs(images)
+
        # Validate image sizes
-        if self.config.IMAGE_RESIZE_MODE == "square":
-            image_shape = self.config.IMAGE_SHAPE
-        else:
-            # All images MUST be of the same size
-            image_shape = images[0].shape
-            for g in images[1:]:
-                assert g.shape == image_shape,\
-                    "Images must have the same size unless IMAGE_RESIZE_MODE is 'square'"
+        # All images in a batch MUST be of the same size
+        image_shape = molded_images[0].shape
+        for g in molded_images[1:]:
+            assert g.shape == image_shape,\
+                "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes."

        # Anchors
        anchors = self.get_anchors(image_shape)
@@ -2462,8 +2459,6 @@ class MaskRCNN():
        # TODO: can this be optimized to avoid duplicating the anchors?
        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)

-        # Mold inputs to format expected by the neural network
-        molded_images, image_metas, windows = self.mold_inputs(images)
        if verbose:
            log("molded_images", molded_images)
            log("image_metas", image_metas)

--- a/utils.py
+++ b/utils.py
@@ -602,23 +602,25 @@ def trim_zeros(x):
    return x[~np.all(x == 0, axis=1)]


-def compute_ap(gt_boxes, gt_class_ids, gt_masks,
-               pred_boxes, pred_class_ids, pred_scores, pred_masks,
-               iou_threshold=0.5):
-    """Compute Average Precision at a set IoU threshold (default 0.5).
+def compute_matches(gt_boxes, gt_class_ids, gt_masks,
+                    pred_boxes, pred_class_ids, pred_scores, pred_masks,
+                    iou_threshold=0.5, score_threshold=0.0):
+    """Finds matches between prediction and ground truth instances.

    Returns:
-    mAP: Mean Average Precision
-    precisions: List of precisions at different class score thresholds.
-    recalls: List of recall values at different class score thresholds.
-    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+        gt_match: 1-D array. For each GT box it has the index of the matched
+                  predicted box.
+        pred_match: 1-D array. For each predicted box, it has the index of
+                    the matched ground truth box.
+        overlaps: [pred_boxes, gt_boxes] IoU overlaps.
    """
-    # Trim zero padding and sort predictions by score from high to low
+    # Trim zero padding
    # TODO: cleaner to do zero unpadding upstream
    gt_boxes = trim_zeros(gt_boxes)
    gt_masks = gt_masks[..., :gt_boxes.shape[0]]
    pred_boxes = trim_zeros(pred_boxes)
    pred_scores = pred_scores[:pred_boxes.shape[0]]
+    # Sort predictions by score from high to low
    indices = np.argsort(pred_scores)[::-1]
    pred_boxes = pred_boxes[indices]
    pred_class_ids = pred_class_ids[indices]
@@ -634,10 +636,16 @@ def compute_ap(gt_boxes, gt_class_ids, gt_masks,
    gt_match = np.zeros([gt_boxes.shape[0]])
    for i in range(len(pred_boxes)):
        # Find best matching ground truth box
+        # 1. Sort matches by score
        sorted_ixs = np.argsort(overlaps[i])[::-1]
+        # 2. Remove low scores
+        low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
+        if low_score_idx.size > 0:
+            sorted_ixs = sorted_ixs[:low_score_idx[0]]
+        # 3. Find the match
        for j in sorted_ixs:
            # If ground truth box is already matched, go to next one
-            if gt_match[j] == 1:
+            if gt_match[j] > 0:
                continue
            # If we reach IoU smaller than the threshold, end the loop
            iou = overlaps[i, j]
@@ -646,13 +654,33 @@ def compute_ap(gt_boxes, gt_class_ids, gt_masks,
            # Do we have a match?
            if pred_class_ids[i] == gt_class_ids[j]:
                match_count += 1
-                gt_match[j] = 1
-                pred_match[i] = 1
+                gt_match[j] = i
+                pred_match[i] = j
                break

+    return gt_match, pred_match, overlaps
+
+
+def compute_ap(gt_boxes, gt_class_ids, gt_masks,
+               pred_boxes, pred_class_ids, pred_scores, pred_masks,
+               iou_threshold=0.5):
+    """Compute Average Precision at a set IoU threshold (default 0.5).
+
+    Returns:
+    mAP: Mean Average Precision
+    precisions: List of precisions at different class score thresholds.
+    recalls: List of recall values at different class score thresholds.
+    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Get matches and overlaps
+    gt_match, pred_match, overlaps = compute_matches(
+        gt_boxes, gt_class_ids, gt_masks,
+        pred_boxes, pred_class_ids, pred_scores, pred_masks,
+        iou_threshold)
+
    # Compute precision and recall at each prediction box step
-    precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
-    recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
+    precisions = np.cumsum(pred_match > 0) / (np.arange(len(pred_match)) + 1)
+    recalls = np.cumsum(pred_match > 0).astype(np.float32) / len(gt_match)

    # Pad with start and end values to simplify the math
    precisions = np.concatenate([[0], precisions, [0]])