Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
27号BigBang
Mask_RCNN
提交
7f16ff44
M
Mask_RCNN
项目概览
27号BigBang
/
Mask_RCNN
与 Fork 源项目一致
从无法访问的项目Fork
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
Mask_RCNN
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
GitCode(gitcode.net)2024年7月9日维护升级公告
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7f16ff44
编写于
2月 04, 2018
作者:
W
Waleed Abdulla
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cleanup and prepare branch to be merged.
上级
bb98305a
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
63 addition
and
82 deletion
+63
-82
model.py
model.py
+63
-82
未找到文件。
model.py
浏览文件 @
7f16ff44
...
@@ -666,6 +666,7 @@ def clip_to_window(window, boxes):
...
@@ -666,6 +666,7 @@ def clip_to_window(window, boxes):
boxes
[:,
3
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
],
window
[
3
]),
window
[
1
])
boxes
[:,
3
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
],
window
[
3
]),
window
[
1
])
return
boxes
return
boxes
def
refine_detections_graph
(
rois
,
probs
,
deltas
,
window
,
config
):
def
refine_detections_graph
(
rois
,
probs
,
deltas
,
window
,
config
):
"""Refine classified proposals and filter overlaps and return final
"""Refine classified proposals and filter overlaps and return final
detections.
detections.
...
@@ -678,25 +679,20 @@ def refine_detections_graph(rois, probs, deltas, window, config):
...
@@ -678,25 +679,20 @@ def refine_detections_graph(rois, probs, deltas, window, config):
window: (y1, x1, y2, x2) in image coordinates. The part of the image
window: (y1, x1, y2, x2) in image coordinates. The part of the image
that contains the image excluding the padding.
that contains the image excluding the padding.
Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)]
Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where
coordinates are in image domain.
"""
"""
# Class IDs per ROI
# Class IDs per ROI
class_ids
=
tf
.
argmax
(
probs
,
axis
=
1
,
output_type
=
tf
.
int32
)
class_ids
=
tf
.
argmax
(
probs
,
axis
=
1
,
output_type
=
tf
.
int32
)
# Class probability of the top class of each ROI
# Class probability of the top class of each ROI
scores_select_size
=
class_ids
.
shape
[
0
]
indices
=
tf
.
stack
([
tf
.
range
(
probs
.
shape
[
0
]),
class_ids
],
axis
=
1
)
scores_select
=
tf
.
range
(
scores_select_size
)
class_scores
=
tf
.
gather_nd
(
probs
,
indices
)
score_indices
=
tf
.
stack
([
scores_select
,
class_ids
],
axis
=
1
)
class_scores
=
tf
.
gather_nd
(
probs
,
score_indices
)
# Class-specific bounding box deltas
# Class-specific bounding box deltas
deltas_range_size
=
deltas
.
shape
[
0
]
deltas_specific
=
tf
.
gather_nd
(
deltas
,
indices
)
deltas_range
=
tf
.
range
(
deltas_range_size
)
deltas_indices
=
tf
.
stack
([
deltas_range
,
class_ids
],
axis
=
1
)
deltas_specific
=
tf
.
gather_nd
(
deltas
,
deltas_indices
)
# Apply bounding box deltas
# Apply bounding box deltas
# Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
# Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
refined_rois
=
apply_box_deltas_graph
(
refined_rois
=
apply_box_deltas_graph
(
rois
,
deltas_specific
*
config
.
BBOX_STD_DEV
)
rois
,
deltas_specific
*
config
.
BBOX_STD_DEV
)
# Convert coordiates to image domain
# Convert coordiates to image domain
# TODO: better to keep them normalized until later
# TODO: better to keep them normalized until later
height
,
width
=
config
.
IMAGE_SHAPE
[:
2
]
height
,
width
=
config
.
IMAGE_SHAPE
[:
2
]
...
@@ -705,97 +701,83 @@ def refine_detections_graph(rois, probs, deltas, window, config):
...
@@ -705,97 +701,83 @@ def refine_detections_graph(rois, probs, deltas, window, config):
refined_rois
=
clip_boxes_graph
(
refined_rois
,
window
)
refined_rois
=
clip_boxes_graph
(
refined_rois
,
window
)
# Round and cast to int since we're deadling with pixels now
# Round and cast to int since we're deadling with pixels now
refined_rois
=
tf
.
to_int32
(
tf
.
rint
(
refined_rois
))
refined_rois
=
tf
.
to_int32
(
tf
.
rint
(
refined_rois
))
# TODO: Filter out boxes with zero area
# TODO: Filter out boxes with zero area
# Filter out background boxes
# Filter out background boxes
keep
=
tf
.
where
(
class_ids
>
0
)[:,
0
]
keep
=
tf
.
where
(
class_ids
>
0
)[:,
0
]
# Filter out low confidence boxes
# Filter out low confidence boxes
conf_keep
=
tf
.
where
(
class_scores
>=
config
.
DETECTION_MIN_CONFIDENCE
)[:,
0
]
if
config
.
DETECTION_MIN_CONFIDENCE
:
if
config
.
DETECTION_MIN_CONFIDENCE
:
keep
=
tf
.
sparse_tensor_to_dense
(
tf
.
sets
.
set_intersection
(
conf_keep
=
tf
.
where
(
class_scores
>=
config
.
DETECTION_MIN_CONFIDENCE
)[:,
0
]
tf
.
expand_dims
(
keep
,
0
),
tf
.
expand_dims
(
conf_keep
,
0
)))[
0
]
keep
=
tf
.
sets
.
set_intersection
(
tf
.
expand_dims
(
keep
,
0
),
tf
.
expand_dims
(
conf_keep
,
0
))
keep
=
tf
.
sparse_tensor_to_dense
(
keep
)[
0
]
# Apply per-class NMS
# Apply per-class NMS
# 1. Prepare variables
pre_nms_class_ids
=
tf
.
gather
(
class_ids
,
keep
)
pre_nms_class_ids
=
tf
.
gather
(
class_ids
,
keep
)
pre_nms_scores
=
tf
.
gather
(
class_scores
,
keep
)
pre_nms_scores
=
tf
.
gather
(
class_scores
,
keep
)
pre_nms_rois
=
tf
.
gather
(
refined_rois
,
keep
)
pre_nms_rois
=
tf
.
gather
(
refined_rois
,
keep
)
unique_pre_nms_class_ids
=
tf
.
unique
(
pre_nms_class_ids
)[
0
]
uniq_pre_nms_class_ids
=
tf
.
unique
(
pre_nms_class_ids
)[
0
]
def
nms_keep_map
(
class_id
):
"""Apply Non-Maximum Suppression on ROIs of the given class."""
# sort unique class ids
# Indices of ROIs of the given class
_
,
max_index
=
tf
.
nn
.
top_k
(
-
uniq_pre_nms_class_ids
,
tf
.
size
(
uniq_pre_nms_class_ids
))
ixs
=
tf
.
where
(
tf
.
equal
(
pre_nms_class_ids
,
class_id
))[:,
0
]
uniq_pre_nms_class_ids
=
tf
.
gather
(
uniq_pre_nms_class_ids
,
max_index
)
nms_keep
=
[]
def
nms_keep_map
(
i
,
ret
):
class_id
=
uniq_pre_nms_class_ids
[
i
]
scale
=
tf
.
fill
(
tf
.
shape
(
pre_nms_class_ids
),
class_id
)
ixs
=
tf
.
cast
(
tf
.
where
(
tf
.
equal
(
scale
,
pre_nms_class_ids
))[:,
0
],
tf
.
int32
)
# Apply NMS
# Apply NMS
class_keep
=
tf
.
image
.
non_max_suppression
(
class_keep
=
tf
.
image
.
non_max_suppression
(
tf
.
to_float
(
tf
.
gather
(
pre_nms_rois
,
ixs
)),
tf
.
to_float
(
tf
.
gather
(
pre_nms_rois
,
ixs
)),
tf
.
gather
(
pre_nms_scores
,
ixs
),
tf
.
gather
(
pre_nms_scores
,
ixs
),
max_output_size
=
tf
.
shape
(
ixs
)[
0
]
,
max_output_size
=
config
.
DETECTION_MAX_INSTANCES
,
iou_threshold
=
config
.
DETECTION_NMS_THRESHOLD
)
iou_threshold
=
config
.
DETECTION_NMS_THRESHOLD
)
# Map indicies
# Map indicies
c
ur_keep_indexes
=
tf
.
gather
(
tf
.
cast
(
keep
,
tf
.
int32
)
,
tf
.
gather
(
ixs
,
class_keep
))
c
lass_keep
=
tf
.
gather
(
keep
,
tf
.
gather
(
ixs
,
class_keep
))
return
i
+
1
,
tf
.
concat
([
ret
,
cur_keep_indexes
],
axis
=
0
)
# Pad with -1 so returned tensors have the same shape
gap
=
config
.
DETECTION_MAX_INSTANCES
-
tf
.
shape
(
class_keep
)[
0
]
nums_iters
=
tf
.
shape
(
uniq_pre_nms_class_ids
)[
0
]
# unique class ids
class_keep
=
tf
.
pad
(
class_keep
,
[(
0
,
gap
)],
i
=
tf
.
constant
(
0
)
mode
=
'CONSTANT'
,
constant_values
=-
1
)
ret
=
tf
.
ones
([
1
],
dtype
=
tf
.
int32
)
# Set shape so map_fn() can infer result shape
c
=
lambda
i
,
unique_pre_nms
:
tf
.
less
(
i
,
nums_iters
)
class_keep
.
set_shape
([
config
.
DETECTION_MAX_INSTANCES
]
)
b
=
nms_keep_ma
p
return
class_kee
p
r
=
tf
.
while_loop
(
c
,
b
,
[
i
,
-
ret
],
shape_invariants
=
[
i
.
get_shape
(),
tf
.
TensorShape
([
None
])])
# 2. Map over class IDs
nms_keep
=
tf
.
map_fn
(
nms_keep_map
,
unique_pre_nms_class_ids
,
nms_keep
=
r
[
1
]
dtype
=
tf
.
int64
)
# 3. Merge results into one list, and remove -1 padding
# remove initial_value background
nms_keep
=
tf
.
reshape
(
nms_keep
,
[
-
1
])
nms_keep
=
tf
.
gather
(
nms_keep
,
tf
.
where
(
nms_keep
>
=
0
)[:,
0
])
nms_keep
=
tf
.
gather
(
nms_keep
,
tf
.
where
(
nms_keep
>
-
1
)[:,
0
])
keep
=
tf
.
cast
(
keep
,
tf
.
int32
)
# 4. Compute intersection between keep and nms_keep
keep
=
tf
.
s
parse_tensor_to_dense
(
tf
.
sets
.
set_intersection
(
tf
.
expand_dims
(
keep
,
0
),
tf
.
expand_dims
(
nms_keep
,
0
)))[
0
]
keep
=
tf
.
s
ets
.
set_intersection
(
tf
.
expand_dims
(
keep
,
0
),
tf
.
expand_dims
(
nms_keep
,
0
))
keep
=
tf
.
sparse_tensor_to_dense
(
keep
)[
0
]
# Keep top detections
# Keep top detections
roi_count
=
tf
.
convert_to_tensor
(
config
.
DETECTION_MAX_INSTANCES
)
roi_count
=
config
.
DETECTION_MAX_INSTANCES
class_scores_keep
=
tf
.
gather
(
class_scores
,
keep
)
class_scores_keep
=
tf
.
gather
(
class_scores
,
keep
)
num_keep
=
tf
.
minimum
(
tf
.
shape
(
class_scores_keep
)[
0
],
roi_count
)
num_keep
=
tf
.
minimum
(
tf
.
shape
(
class_scores_keep
)[
0
],
roi_count
)
top_ids
=
tf
.
nn
.
top_k
(
class_scores_keep
,
k
=
num_keep
,
sorted
=
True
)[
1
]
top_ids
=
tf
.
nn
.
top_k
(
class_scores_keep
,
k
=
num_keep
,
sorted
=
True
)[
1
]
keep
=
tf
.
gather
(
keep
,
top_ids
)
keep
=
tf
.
gather
(
keep
,
top_ids
)
refined_rois_keep
=
tf
.
gather
(
tf
.
to_float
(
refined_rois
),
keep
)
class_ids_keep
=
tf
.
gather
(
tf
.
to_float
(
class_ids
),
keep
)[...,
tf
.
newaxis
]
class_scores_keep
=
tf
.
gather
(
class_scores
,
keep
)[...,
tf
.
newaxis
]
# Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
# Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
# Coordinates are in image domain.
# Coordinates are in image domain.
detections
=
tf
.
concat
((
refined_rois_keep
,
class_ids_keep
,
detections
=
tf
.
concat
([
class_scores_keep
),
axis
=
1
)
tf
.
to_float
(
tf
.
gather
(
refined_rois
,
keep
)),
tf
.
to_float
(
tf
.
gather
(
class_ids
,
keep
))[...,
tf
.
newaxis
],
tf
.
gather
(
class_scores
,
keep
)[...,
tf
.
newaxis
]
],
axis
=
1
)
# Pad with zeros if detections < DETECTION_MAX_INSTANCES
# Pad with zeros if detections < DETECTION_MAX_INSTANCES
num_detections
=
tf
.
shape
(
detections
)[
0
]
gap
=
config
.
DETECTION_MAX_INSTANCES
-
tf
.
shape
(
detections
)[
0
]
gap
=
roi_count
-
num_detections
detections
=
tf
.
pad
(
detections
,
[(
0
,
gap
),
(
0
,
0
)],
"CONSTANT"
)
pred
=
tf
.
less
(
tf
.
constant
(
0
),
gap
)
return
detections
def
pad_detections
():
return
tf
.
pad
(
detections
,
[(
0
,
gap
),
(
0
,
0
)],
"CONSTANT"
)
detections
=
tf
.
cond
(
pred
,
pad_detections
,
lambda
:
detections
)
return
tf
.
to_float
(
detections
)
class
DetectionLayer
(
KE
.
Layer
):
class
DetectionLayer
(
KE
.
Layer
):
"""Takes classified proposal boxes and their bounding box deltas and
"""Takes classified proposal boxes and their bounding box deltas and
returns the final detection boxes.
returns the final detection boxes.
Returns:
Returns:
[batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels
[batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
coordinates are in image domain
"""
"""
def
__init__
(
self
,
config
=
None
,
**
kwargs
):
def
__init__
(
self
,
config
=
None
,
**
kwargs
):
...
@@ -803,13 +785,12 @@ class DetectionLayer(KE.Layer):
...
@@ -803,13 +785,12 @@ class DetectionLayer(KE.Layer):
self
.
config
=
config
self
.
config
=
config
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
config
=
self
.
config
rois
=
inputs
[
0
]
rois
=
inputs
[
0
]
mrcnn_class
=
inputs
[
1
]
mrcnn_class
=
inputs
[
1
]
mrcnn_bbox
=
inputs
[
2
]
mrcnn_bbox
=
inputs
[
2
]
image_meta
=
inputs
[
3
]
image_meta
=
inputs
[
3
]
#
parse_image_meta can be reused as slicing works same way in TF & numpy
#
Run detection refinement graph on each item in the batch
_
,
_
,
window
,
_
=
parse_image_meta_graph
(
image_meta
)
_
,
_
,
window
,
_
=
parse_image_meta_graph
(
image_meta
)
detections_batch
=
utils
.
batch_slice
(
detections_batch
=
utils
.
batch_slice
(
[
rois
,
mrcnn_class
,
mrcnn_bbox
,
window
],
[
rois
,
mrcnn_class
,
mrcnn_bbox
,
window
],
...
@@ -822,7 +803,6 @@ class DetectionLayer(KE.Layer):
...
@@ -822,7 +803,6 @@ class DetectionLayer(KE.Layer):
detections_batch
,
detections_batch
,
[
self
.
config
.
BATCH_SIZE
,
self
.
config
.
DETECTION_MAX_INSTANCES
,
6
])
[
self
.
config
.
BATCH_SIZE
,
self
.
config
.
DETECTION_MAX_INSTANCES
,
6
])
def
compute_output_shape
(
self
,
input_shape
):
def
compute_output_shape
(
self
,
input_shape
):
return
(
None
,
self
.
config
.
DETECTION_MAX_INSTANCES
,
6
)
return
(
None
,
self
.
config
.
DETECTION_MAX_INSTANCES
,
6
)
...
@@ -839,7 +819,7 @@ def rpn_graph(feature_map, anchors_per_location, anchor_stride):
...
@@ -839,7 +819,7 @@ def rpn_graph(feature_map, anchors_per_location, anchor_stride):
Returns:
Returns:
rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
rpn_probs: [batch,
W
, W, 2] Anchor classifier probabilities.
rpn_probs: [batch,
H
, W, 2] Anchor classifier probabilities.
rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
applied to anchors.
applied to anchors.
"""
"""
...
@@ -2504,8 +2484,7 @@ class MaskRCNN():
...
@@ -2504,8 +2484,7 @@ class MaskRCNN():
############################################################
############################################################
def
compose_image_meta
(
image_id
,
image_shape
,
window
,
active_class_ids
):
def
compose_image_meta
(
image_id
,
image_shape
,
window
,
active_class_ids
):
"""Takes attributes of an image and puts them in one 1D array. Use
"""Takes attributes of an image and puts them in one 1D array.
parse_image_meta() to parse the values back.
image_id: An int ID of the image. Useful for debugging.
image_id: An int ID of the image. Useful for debugging.
image_shape: [height, width, channels]
image_shape: [height, width, channels]
...
@@ -2523,6 +2502,7 @@ def compose_image_meta(image_id, image_shape, window, active_class_ids):
...
@@ -2523,6 +2502,7 @@ def compose_image_meta(image_id, image_shape, window, active_class_ids):
)
)
return
meta
return
meta
def
parse_image_meta_graph
(
meta
):
def
parse_image_meta_graph
(
meta
):
"""Parses a tensor that contains image attributes to its components.
"""Parses a tensor that contains image attributes to its components.
See compose_image_meta() for more details.
See compose_image_meta() for more details.
...
@@ -2535,6 +2515,7 @@ def parse_image_meta_graph(meta):
...
@@ -2535,6 +2515,7 @@ def parse_image_meta_graph(meta):
active_class_ids
=
meta
[:,
8
:]
active_class_ids
=
meta
[:,
8
:]
return
[
image_id
,
image_shape
,
window
,
active_class_ids
]
return
[
image_id
,
image_shape
,
window
,
active_class_ids
]
def
mold_image
(
images
,
config
):
def
mold_image
(
images
,
config
):
"""Takes RGB images with 0-255 values and subtraces
"""Takes RGB images with 0-255 values and subtraces
the mean pixel and converts it to float. Expects image
the mean pixel and converts it to float. Expects image
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录