提交 7f16ff44 编写于 作者: W Waleed Abdulla

Cleanup and prepare branch to be merged.

上级 bb98305a
...@@ -666,6 +666,7 @@ def clip_to_window(window, boxes): ...@@ -666,6 +666,7 @@ def clip_to_window(window, boxes):
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], window[3]), window[1]) boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], window[3]), window[1])
return boxes return boxes
def refine_detections_graph(rois, probs, deltas, window, config): def refine_detections_graph(rois, probs, deltas, window, config):
"""Refine classified proposals and filter overlaps and return final """Refine classified proposals and filter overlaps and return final
detections. detections.
...@@ -678,25 +679,20 @@ def refine_detections_graph(rois, probs, deltas, window, config): ...@@ -678,25 +679,20 @@ def refine_detections_graph(rois, probs, deltas, window, config):
window: (y1, x1, y2, x2) in image coordinates. The part of the image window: (y1, x1, y2, x2) in image coordinates. The part of the image
that contains the image excluding the padding. that contains the image excluding the padding.
Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where
coordinates are in image domain.
""" """
# Class IDs per ROI # Class IDs per ROI
class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
# Class probability of the top class of each ROI # Class probability of the top class of each ROI
scores_select_size = class_ids.shape[0] indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
scores_select = tf.range(scores_select_size) class_scores = tf.gather_nd(probs, indices)
score_indices = tf.stack([scores_select,class_ids], axis=1)
class_scores = tf.gather_nd(probs, score_indices)
# Class-specific bounding box deltas # Class-specific bounding box deltas
deltas_range_size = deltas.shape[0] deltas_specific = tf.gather_nd(deltas, indices)
deltas_range = tf.range(deltas_range_size)
deltas_indices = tf.stack([deltas_range, class_ids], axis=1)
deltas_specific = tf.gather_nd(deltas, deltas_indices)
# Apply bounding box deltas # Apply bounding box deltas
# Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
refined_rois = apply_box_deltas_graph( refined_rois = apply_box_deltas_graph(
rois, deltas_specific * config.BBOX_STD_DEV) rois, deltas_specific * config.BBOX_STD_DEV)
# Convert coordiates to image domain # Convert coordiates to image domain
# TODO: better to keep them normalized until later # TODO: better to keep them normalized until later
height, width = config.IMAGE_SHAPE[:2] height, width = config.IMAGE_SHAPE[:2]
...@@ -705,97 +701,83 @@ def refine_detections_graph(rois, probs, deltas, window, config): ...@@ -705,97 +701,83 @@ def refine_detections_graph(rois, probs, deltas, window, config):
refined_rois = clip_boxes_graph(refined_rois, window) refined_rois = clip_boxes_graph(refined_rois, window)
# Round and cast to int since we're deadling with pixels now # Round and cast to int since we're deadling with pixels now
refined_rois = tf.to_int32(tf.rint(refined_rois)) refined_rois = tf.to_int32(tf.rint(refined_rois))
# TODO: Filter out boxes with zero area # TODO: Filter out boxes with zero area
# Filter out background boxes # Filter out background boxes
keep = tf.where(class_ids > 0)[:,0] keep = tf.where(class_ids > 0)[:, 0]
# Filter out low confidence boxes # Filter out low confidence boxes
conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:,0]
if config.DETECTION_MIN_CONFIDENCE: if config.DETECTION_MIN_CONFIDENCE:
keep = tf.sparse_tensor_to_dense(tf.sets.set_intersection( conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)))[0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
tf.expand_dims(conf_keep, 0))
keep = tf.sparse_tensor_to_dense(keep)[0]
# Apply per-class NMS # Apply per-class NMS
# 1. Prepare variables
pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_class_ids = tf.gather(class_ids, keep)
pre_nms_scores = tf.gather(class_scores, keep) pre_nms_scores = tf.gather(class_scores, keep)
pre_nms_rois = tf.gather(refined_rois, keep) pre_nms_rois = tf.gather(refined_rois, keep)
unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
uniq_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id):
"""Apply Non-Maximum Suppression on ROIs of the given class."""
# sort unique class ids # Indices of ROIs of the given class
_,max_index = tf.nn.top_k(-uniq_pre_nms_class_ids, tf.size(uniq_pre_nms_class_ids)) ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
uniq_pre_nms_class_ids = tf.gather(uniq_pre_nms_class_ids,max_index)
nms_keep = []
def nms_keep_map(i, ret):
class_id = uniq_pre_nms_class_ids[i]
scale = tf.fill(tf.shape(pre_nms_class_ids), class_id)
ixs = tf.cast(tf.where(tf.equal(scale, pre_nms_class_ids))[:,0], tf.int32)
# Apply NMS # Apply NMS
class_keep = tf.image.non_max_suppression( class_keep = tf.image.non_max_suppression(
tf.to_float(tf.gather(pre_nms_rois,ixs)), tf.to_float(tf.gather(pre_nms_rois, ixs)),
tf.gather(pre_nms_scores, ixs), tf.gather(pre_nms_scores, ixs),
max_output_size=tf.shape(ixs)[0], max_output_size=config.DETECTION_MAX_INSTANCES,
iou_threshold=config.DETECTION_NMS_THRESHOLD) iou_threshold=config.DETECTION_NMS_THRESHOLD)
# Map indicies # Map indicies
cur_keep_indexes = tf.gather(tf.cast(keep,tf.int32), tf.gather(ixs, class_keep)) class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
return i+1, tf.concat([ret,cur_keep_indexes], axis=0) # Pad with -1 so returned tensors have the same shape
gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
nums_iters = tf.shape(uniq_pre_nms_class_ids)[0] # unique class ids class_keep = tf.pad(class_keep, [(0, gap)],
i = tf.constant(0) mode='CONSTANT', constant_values=-1)
ret = tf.ones([1], dtype=tf.int32) # Set shape so map_fn() can infer result shape
c = lambda i, unique_pre_nms:tf.less(i, nums_iters) class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
b = nms_keep_map return class_keep
r = tf.while_loop(c, b, [i, -ret],
shape_invariants=[i.get_shape(), tf.TensorShape([None])]) # 2. Map over class IDs
nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
nms_keep = r[1] dtype=tf.int64)
# 3. Merge results into one list, and remove -1 padding
# remove initial_value background nms_keep = tf.reshape(nms_keep, [-1])
nms_keep = tf.gather(nms_keep, tf.where(nms_keep >= 0)[:,0]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
keep = tf.cast(keep, tf.int32) # 4. Compute intersection between keep and nms_keep
keep = tf.sparse_tensor_to_dense(tf.sets.set_intersection(tf.expand_dims(keep,0), tf.expand_dims(nms_keep,0)))[0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
tf.expand_dims(nms_keep, 0))
keep = tf.sparse_tensor_to_dense(keep)[0]
# Keep top detections # Keep top detections
roi_count = tf.convert_to_tensor(config.DETECTION_MAX_INSTANCES) roi_count = config.DETECTION_MAX_INSTANCES
class_scores_keep = tf.gather(class_scores, keep) class_scores_keep = tf.gather(class_scores, keep)
num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
keep = tf.gather(keep, top_ids) keep = tf.gather(keep, top_ids)
refined_rois_keep = tf.gather(tf.to_float(refined_rois), keep)
class_ids_keep = tf.gather(tf.to_float(class_ids), keep)[..., tf.newaxis]
class_scores_keep = tf.gather(class_scores, keep)[..., tf.newaxis]
# Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
# Coordinates are in image domain. # Coordinates are in image domain.
detections = tf.concat((refined_rois_keep, class_ids_keep, detections = tf.concat([
class_scores_keep), axis=1) tf.to_float(tf.gather(refined_rois, keep)),
tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
tf.gather(class_scores, keep)[..., tf.newaxis]
], axis=1)
# Pad with zeros if detections < DETECTION_MAX_INSTANCES # Pad with zeros if detections < DETECTION_MAX_INSTANCES
num_detections = tf.shape(detections)[0] gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
gap = roi_count - num_detections detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
pred = tf.less(tf.constant(0), gap) return detections
def pad_detections():
return tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
detections = tf.cond(pred, pad_detections, lambda: detections)
return tf.to_float(detections)
class DetectionLayer(KE.Layer): class DetectionLayer(KE.Layer):
"""Takes classified proposal boxes and their bounding box deltas and """Takes classified proposal boxes and their bounding box deltas and
returns the final detection boxes. returns the final detection boxes.
Returns: Returns:
[batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
coordinates are in image domain
""" """
def __init__(self, config=None, **kwargs): def __init__(self, config=None, **kwargs):
...@@ -803,13 +785,12 @@ class DetectionLayer(KE.Layer): ...@@ -803,13 +785,12 @@ class DetectionLayer(KE.Layer):
self.config = config self.config = config
def call(self, inputs): def call(self, inputs):
config = self.config
rois = inputs[0] rois = inputs[0]
mrcnn_class = inputs[1] mrcnn_class = inputs[1]
mrcnn_bbox = inputs[2] mrcnn_bbox = inputs[2]
image_meta = inputs[3] image_meta = inputs[3]
#parse_image_meta can be reused as slicing works same way in TF & numpy # Run detection refinement graph on each item in the batch
_, _, window, _ = parse_image_meta_graph(image_meta) _, _, window, _ = parse_image_meta_graph(image_meta)
detections_batch = utils.batch_slice( detections_batch = utils.batch_slice(
[rois, mrcnn_class, mrcnn_bbox, window], [rois, mrcnn_class, mrcnn_bbox, window],
...@@ -822,7 +803,6 @@ class DetectionLayer(KE.Layer): ...@@ -822,7 +803,6 @@ class DetectionLayer(KE.Layer):
detections_batch, detections_batch,
[self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6]) [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def compute_output_shape(self, input_shape): def compute_output_shape(self, input_shape):
return (None, self.config.DETECTION_MAX_INSTANCES, 6) return (None, self.config.DETECTION_MAX_INSTANCES, 6)
...@@ -839,7 +819,7 @@ def rpn_graph(feature_map, anchors_per_location, anchor_stride): ...@@ -839,7 +819,7 @@ def rpn_graph(feature_map, anchors_per_location, anchor_stride):
Returns: Returns:
rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax) rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
rpn_probs: [batch, W, W, 2] Anchor classifier probabilities. rpn_probs: [batch, H, W, 2] Anchor classifier probabilities.
rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
applied to anchors. applied to anchors.
""" """
...@@ -2504,8 +2484,7 @@ class MaskRCNN(): ...@@ -2504,8 +2484,7 @@ class MaskRCNN():
############################################################ ############################################################
def compose_image_meta(image_id, image_shape, window, active_class_ids): def compose_image_meta(image_id, image_shape, window, active_class_ids):
"""Takes attributes of an image and puts them in one 1D array. Use """Takes attributes of an image and puts them in one 1D array.
parse_image_meta() to parse the values back.
image_id: An int ID of the image. Useful for debugging. image_id: An int ID of the image. Useful for debugging.
image_shape: [height, width, channels] image_shape: [height, width, channels]
...@@ -2523,6 +2502,7 @@ def compose_image_meta(image_id, image_shape, window, active_class_ids): ...@@ -2523,6 +2502,7 @@ def compose_image_meta(image_id, image_shape, window, active_class_ids):
) )
return meta return meta
def parse_image_meta_graph(meta): def parse_image_meta_graph(meta):
"""Parses a tensor that contains image attributes to its components. """Parses a tensor that contains image attributes to its components.
See compose_image_meta() for more details. See compose_image_meta() for more details.
...@@ -2535,6 +2515,7 @@ def parse_image_meta_graph(meta): ...@@ -2535,6 +2515,7 @@ def parse_image_meta_graph(meta):
active_class_ids = meta[:, 8:] active_class_ids = meta[:, 8:]
return [image_id, image_shape, window, active_class_ids] return [image_id, image_shape, window, active_class_ids]
def mold_image(images, config): def mold_image(images, config):
"""Takes RGB images with 0-255 values and subtraces """Takes RGB images with 0-255 values and subtraces
the mean pixel and converts it to float. Expects image the mean pixel and converts it to float. Expects image
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册