diff --git a/config.py b/config.py index b85bd66d2cccdb136b41fadc1440e398fafd61fe..ff280f82ef7d8bcab9dbc9a4236f9d31c833480b 100644 --- a/config.py +++ b/config.py @@ -72,7 +72,7 @@ class Config(object): RPN_ANCHOR_STRIDE = 1 # Non-max suppression threshold to filter RPN proposals. - # You can reduce this during training to generate more propsals. + # You can increase this during training to generate more propsals. RPN_NMS_THRESHOLD = 0.7 # How many anchors per image to use for RPN training diff --git a/model.py b/model.py index e45dae91fdbf2f62ce07dc9aae377df5ae81b479..53faa54c80a43c0bae8a0fcd0e2126bcd4d8ab21 100644 --- a/model.py +++ b/model.py @@ -808,7 +808,9 @@ class DetectionLayer(KE.Layer): return (None, self.config.DETECTION_MAX_INSTANCES, 6) -# Region Proposal Network (RPN) +############################################################ +# Region Proposal Network (RPN) +############################################################ def rpn_graph(feature_map, anchors_per_location, anchor_stride): """Builds the computation graph of Region Proposal Network. @@ -1223,7 +1225,7 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, """Determines which augmenters to apply to masks.""" return (augmenter.__class__.__name__ in MASK_AUGMENTERS) - # Store original shapes to compare + # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks @@ -1390,7 +1392,7 @@ def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config): # Normalize bbox refinements bboxes /= config.BBOX_STD_DEV - # Generate class-specific target masks. + # Generate class-specific target masks masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES), dtype=np.float32) for i in pos_ids: @@ -2223,9 +2225,17 @@ class MaskRCNN(): 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up - augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. - For example, passing imgaug.augmenters.Fliplr(0.5) flips images - right/left 50% of the time. + augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) + augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) + flips images right/left 50% of the time. You can pass complex + augmentations as well. This augmentation applies 50% of the + time, and when it does it flips images right/left half the time + and adds a Gausssian blur with a random sigma in range 0 to 5. + + augmentation = imgaug.augmenters.Sometimes(0.5, [ + imgaug.augmenters.Fliplr(0.5), + imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0)) + ]) """ assert self.mode == "training", "Create model in training mode." diff --git a/utils.py b/utils.py index 7ed2fe15a3e2e636381965b9bd6c2be8f8c25ca0..ba8b09555343a49df747c067b2921e1e5b8c0959 100644 --- a/utils.py +++ b/utils.py @@ -456,8 +456,8 @@ def resize_mask(mask, scale, padding): def minimize_mask(bbox, mask, mini_shape): - """Resize masks to a smaller version to cut memory load. - Mini-masks can then resized back to image scale using expand_masks() + """Resize masks to a smaller version to reduce memory load. + Mini-masks can be resized back to image scale using expand_masks() See inspect_data.ipynb notebook for more details. """ @@ -498,8 +498,8 @@ def mold_mask(mask, config): def unmold_mask(mask, bbox, image_shape): - """Converts a mask generated by the neural network into a format similar - to it's original shape. + """Converts a mask generated by the neural network to a format similar + to its original shape. mask: [height, width] of type float. A small, typically 28x28 mask. bbox: [y1, x1, y2, x2]. The box to fit the mask in.