Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
27ec95d7
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
27ec95d7
编写于
10月 14, 2019
作者:
W
wangguanzhong
提交者:
GitHub
10月 14, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add multi-scale test (#3376)
* add multi-scale test
上级
bdf0fdc1
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1523 addition
and
196 deletion
+1523
-196
configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
...s/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
+255
-0
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
+177
-0
ppdet/core/workspace.py
ppdet/core/workspace.py
+23
-5
ppdet/data/data_feed.py
ppdet/data/data_feed.py
+40
-9
ppdet/data/reader.py
ppdet/data/reader.py
+12
-7
ppdet/data/transform/arrange_sample.py
ppdet/data/transform/arrange_sample.py
+17
-7
ppdet/data/transform/operators.py
ppdet/data/transform/operators.py
+129
-23
ppdet/data/transform/post_map.py
ppdet/data/transform/post_map.py
+39
-5
ppdet/modeling/architectures/cascade_mask_rcnn.py
ppdet/modeling/architectures/cascade_mask_rcnn.py
+161
-53
ppdet/modeling/architectures/cascade_rcnn.py
ppdet/modeling/architectures/cascade_rcnn.py
+98
-7
ppdet/modeling/architectures/faster_rcnn.py
ppdet/modeling/architectures/faster_rcnn.py
+59
-1
ppdet/modeling/architectures/mask_rcnn.py
ppdet/modeling/architectures/mask_rcnn.py
+123
-49
ppdet/modeling/backbones/fpn.py
ppdet/modeling/backbones/fpn.py
+7
-6
ppdet/modeling/model_input.py
ppdet/modeling/model_input.py
+54
-1
ppdet/modeling/roi_heads/bbox_head.py
ppdet/modeling/roi_heads/bbox_head.py
+8
-1
ppdet/modeling/roi_heads/cascade_head.py
ppdet/modeling/roi_heads/cascade_head.py
+4
-2
ppdet/utils/cli.py
ppdet/utils/cli.py
+1
-1
ppdet/utils/eval_utils.py
ppdet/utils/eval_utils.py
+69
-1
ppdet/utils/post_process.py
ppdet/utils/post_process.py
+212
-0
tools/configure.py
tools/configure.py
+1
-13
tools/eval.py
tools/eval.py
+29
-4
tools/train.py
tools/train.py
+5
-1
未找到文件。
configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
0 → 100644
浏览文件 @
27ec95d7
architecture
:
CascadeMaskRCNN
train_feed
:
MaskRCNNTrainFeed
eval_feed
:
MaskRCNNEvalFeed
test_feed
:
MaskRCNNTestFeed
max_iters
:
300000
snapshot_iter
:
10000
use_gpu
:
true
log_iter
:
20
log_smooth_window
:
20
save_dir
:
output
pretrain_weights
:
https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar
weights
:
output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
metric
:
COCO
num_classes
:
81
CascadeMaskRCNN
:
backbone
:
SENet
fpn
:
FPN
rpn_head
:
FPNRPNHead
roi_extractor
:
FPNRoIAlign
bbox_head
:
CascadeBBoxHead
bbox_assigner
:
CascadeBBoxAssigner
mask_assigner
:
MaskAssigner
mask_head
:
MaskHead
SENet
:
depth
:
152
feature_maps
:
[
2
,
3
,
4
,
5
]
freeze_at
:
2
group_width
:
4
groups
:
64
norm_type
:
bn
freeze_norm
:
True
variant
:
d
dcn_v2_stages
:
[
3
,
4
,
5
]
std_senet
:
True
FPN
:
max_level
:
6
min_level
:
2
num_chan
:
256
spatial_scale
:
[
0.03125
,
0.0625
,
0.125
,
0.25
]
freeze_norm
:
False
norm_type
:
gn
FPNRPNHead
:
anchor_generator
:
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
variance
:
[
1.0
,
1.0
,
1.0
,
1.0
]
anchor_start_size
:
32
max_level
:
6
min_level
:
2
num_chan
:
256
rpn_target_assign
:
rpn_batch_size_per_im
:
256
rpn_fg_fraction
:
0.5
rpn_negative_overlap
:
0.3
rpn_positive_overlap
:
0.7
rpn_straddle_thresh
:
0.0
train_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
2000
post_nms_top_n
:
2000
test_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
1000
post_nms_top_n
:
1000
FPNRoIAlign
:
canconical_level
:
4
canonical_size
:
224
max_level
:
5
min_level
:
2
box_resolution
:
7
sampling_ratio
:
2
mask_resolution
:
14
MaskHead
:
dilation
:
1
conv_dim
:
256
num_convs
:
4
resolution
:
28
norm_type
:
gn
CascadeBBoxAssigner
:
batch_size_per_im
:
512
bbox_reg_weights
:
[
10
,
20
,
30
]
bg_thresh_hi
:
[
0.5
,
0.6
,
0.7
]
bg_thresh_lo
:
[
0.0
,
0.0
,
0.0
]
fg_fraction
:
0.25
fg_thresh
:
[
0.5
,
0.6
,
0.7
]
MaskAssigner
:
resolution
:
28
CascadeBBoxHead
:
head
:
CascadeXConvNormHead
nms
:
keep_top_k
:
100
nms_threshold
:
0.5
score_threshold
:
0.05
CascadeXConvNormHead
:
norm_type
:
gn
MultiScaleTEST
:
score_thresh
:
0.05
nms_thresh
:
0.5
detections_per_im
:
100
enable_voting
:
true
vote_thresh
:
0.9
LearningRate
:
base_lr
:
0.01
schedulers
:
-
!PiecewiseDecay
gamma
:
0.1
milestones
:
[
240000
,
280000
]
-
!LinearWarmup
start_factor
:
0.01
steps
:
2000
OptimizerBuilder
:
optimizer
:
momentum
:
0.9
type
:
Momentum
regularizer
:
factor
:
0.0001
type
:
L2
MaskRCNNTrainFeed
:
# batch size per device
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
image_dir
:
train2017
annotation
:
annotations/instances_train2017.json
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
with_mixup
:
False
-
!RandomFlipImage
is_mask_flip
:
true
is_normalized
:
false
prob
:
0.5
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!ResizeImage
interp
:
1
target_size
:
-
416
-
448
-
480
-
512
-
544
-
576
-
608
-
640
-
672
-
704
-
736
-
768
-
800
-
832
-
864
-
896
-
928
-
960
-
992
-
1024
-
1056
-
1088
-
1120
-
1152
-
1184
-
1216
-
1248
-
1280
-
1312
-
1344
-
1376
-
1408
max_size
:
1600
use_cv2
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
num_workers
:
8
MaskRCNNEvalFeed
:
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_val2017.json
image_dir
:
val2017
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!MultiscaleTestResize
origin_target_size
:
800
origin_max_size
:
1333
target_size
:
-
400
-
500
-
600
-
700
-
900
-
1000
-
1100
-
1200
max_size
:
2000
use_flip
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadMSTest
pad_to_stride
:
32
# num_scale = (len(target_size) + 1) * (1 + use_flip)
num_scale
:
18
num_workers
:
2
MaskRCNNTestFeed
:
batch_size
:
1
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
num_workers
:
2
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
0 → 100644
浏览文件 @
27ec95d7
architecture
:
CascadeRCNN
train_feed
:
FasterRCNNTrainFeed
eval_feed
:
FasterRCNNEvalFeed
test_feed
:
FasterRCNNTestFeed
max_iters
:
90000
snapshot_iter
:
10000
use_gpu
:
true
log_smooth_window
:
20
save_dir
:
output
pretrain_weights
:
https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights
:
output/cascade_rcnn_r50_fpn_1x/model_final
metric
:
COCO
num_classes
:
81
CascadeRCNN
:
backbone
:
ResNet
fpn
:
FPN
rpn_head
:
FPNRPNHead
roi_extractor
:
FPNRoIAlign
bbox_head
:
CascadeBBoxHead
bbox_assigner
:
CascadeBBoxAssigner
ResNet
:
norm_type
:
affine_channel
depth
:
50
feature_maps
:
[
2
,
3
,
4
,
5
]
freeze_at
:
2
variant
:
b
FPN
:
min_level
:
2
max_level
:
6
num_chan
:
256
spatial_scale
:
[
0.03125
,
0.0625
,
0.125
,
0.25
]
FPNRPNHead
:
anchor_generator
:
anchor_sizes
:
[
32
,
64
,
128
,
256
,
512
]
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
stride
:
[
16.0
,
16.0
]
variance
:
[
1.0
,
1.0
,
1.0
,
1.0
]
anchor_start_size
:
32
min_level
:
2
max_level
:
6
num_chan
:
256
rpn_target_assign
:
rpn_batch_size_per_im
:
256
rpn_fg_fraction
:
0.5
rpn_positive_overlap
:
0.7
rpn_negative_overlap
:
0.3
rpn_straddle_thresh
:
0.0
train_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
2000
post_nms_top_n
:
2000
test_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
1000
post_nms_top_n
:
1000
FPNRoIAlign
:
canconical_level
:
4
canonical_size
:
224
min_level
:
2
max_level
:
5
box_resolution
:
7
sampling_ratio
:
2
CascadeBBoxAssigner
:
batch_size_per_im
:
512
bbox_reg_weights
:
[
10
,
20
,
30
]
bg_thresh_lo
:
[
0.0
,
0.0
,
0.0
]
bg_thresh_hi
:
[
0.5
,
0.6
,
0.7
]
fg_thresh
:
[
0.5
,
0.6
,
0.7
]
fg_fraction
:
0.25
CascadeBBoxHead
:
head
:
CascadeTwoFCHead
nms
:
keep_top_k
:
100
nms_threshold
:
0.5
score_threshold
:
0.05
CascadeTwoFCHead
:
mlp_dim
:
1024
MultiScaleTEST
:
score_thresh
:
0.05
nms_thresh
:
0.5
detections_per_im
:
100
enable_voting
:
true
vote_thresh
:
0.9
LearningRate
:
base_lr
:
0.02
schedulers
:
-
!PiecewiseDecay
gamma
:
0.1
milestones
:
[
60000
,
80000
]
-
!LinearWarmup
start_factor
:
0.3333333333333333
steps
:
500
OptimizerBuilder
:
optimizer
:
momentum
:
0.9
type
:
Momentum
regularizer
:
factor
:
0.0001
type
:
L2
FasterRCNNTrainFeed
:
batch_size
:
2
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_train2017.json
image_dir
:
train2017
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
drop_last
:
false
num_workers
:
2
FasterRCNNEvalFeed
:
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_val2017.json
image_dir
:
val2017
sample_transforms
:
-
!DecodeImage
to_rgb
:
true
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
true
mean
:
-
0.485
-
0.456
-
0.406
std
:
-
0.229
-
0.224
-
0.225
-
!MultiscaleTestResize
origin_target_size
:
800
origin_max_size
:
1333
target_size
:
-
400
-
500
-
600
-
700
-
900
-
1000
-
1100
-
1200
max_size
:
2000
use_flip
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadMSTest
pad_to_stride
:
32
num_scale
:
18
num_workers
:
2
FasterRCNNTestFeed
:
batch_size
:
1
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
drop_last
:
false
num_workers
:
2
ppdet/core/workspace.py
浏览文件 @
27ec95d7
...
...
@@ -27,11 +27,29 @@ from .config.schema import SchemaDict, SharedConfig, extract_schema
from
.config.yaml_helpers
import
serializable
__all__
=
[
'global_config'
,
'load_config'
,
'merge_config'
,
'get_registered_modules'
,
'create'
,
'register'
,
'serializable'
'global_config'
,
'load_config'
,
'merge_config'
,
'get_registered_modules'
,
'create'
,
'register'
,
'serializable'
,
'dump_value'
,
]
def
dump_value
(
value
):
# XXX this is hackish, but collections.abc is not available in python 2
if
hasattr
(
value
,
'__dict__'
)
or
isinstance
(
value
,
(
dict
,
tuple
,
list
)):
value
=
yaml
.
dump
(
value
,
default_flow_style
=
True
)
value
=
value
.
replace
(
'
\n
'
,
''
)
value
=
value
.
replace
(
'...'
,
''
)
return
"'{}'"
.
format
(
value
)
else
:
# primitive types
return
str
(
value
)
class
AttrDict
(
dict
):
"""Single level attribute dict, NOT recursive"""
...
...
@@ -154,9 +172,9 @@ def create(cls_or_name, **kwargs):
target_key
=
config
[
k
]
shared_conf
=
config
.
schema
[
k
].
default
assert
isinstance
(
shared_conf
,
SharedConfig
)
if
target_key
is
not
None
and
not
isinstance
(
target_key
,
SharedConfig
):
continue
# value is given for the module
if
target_key
is
not
None
and
not
isinstance
(
target_key
,
SharedConfig
):
continue
# value is given for the module
elif
shared_conf
.
key
in
global_config
:
# `key` is present in config
kwargs
[
k
]
=
global_config
[
shared_conf
.
key
]
...
...
ppdet/data/data_feed.py
浏览文件 @
27ec95d7
...
...
@@ -27,18 +27,18 @@ from ppdet.data.reader import Reader
from
ppdet.data.transform.operators
import
(
DecodeImage
,
MixupImage
,
NormalizeBox
,
NormalizeImage
,
RandomDistort
,
RandomFlipImage
,
RandomInterpImage
,
ResizeImage
,
ExpandImage
,
CropImage
,
Permute
)
Permute
,
MultiscaleTestResize
)
from
ppdet.data.transform.arrange_sample
import
(
ArrangeRCNN
,
ArrangeEvalRCNN
,
ArrangeTestRCNN
,
ArrangeSSD
,
ArrangeEvalSSD
,
ArrangeTestSSD
,
ArrangeYOLO
,
ArrangeEvalYOLO
,
ArrangeTestYOLO
)
__all__
=
[
'PadBatch'
,
'MultiScale'
,
'RandomShape'
,
'DataSet'
,
'CocoDataSet'
,
'DataFeed'
,
'TrainFeed'
,
'EvalFeed'
,
'FasterRCNNTrainFeed'
,
'MaskRCNNTrainFeed'
,
'FasterRCNNTestFeed'
,
'MaskRCNNTestFeed'
,
'SSDTrainFeed'
,
'SSDEvalFeed'
,
'SSDTestFeed'
,
'YoloTrainFeed'
,
'YoloEvalFeed'
,
'YoloTestFeed'
,
'create_reader'
'PadBatch'
,
'MultiScale'
,
'RandomShape'
,
'PadMSTest'
,
'DataSet'
,
'CocoDataSet'
,
'DataFeed'
,
'TrainFeed'
,
'EvalFeed'
,
'FasterRCNNTrainFeed'
,
'MaskRCNNTrainFeed'
,
'FasterRCNNEvalFeed'
,
'MaskRCNNEvalFeed'
,
'FasterRCNNTestFeed'
,
'MaskRCNNTestFeed'
,
'SSDTrainFeed'
,
'SSDEvalFeed'
,
'SSDTestFeed'
,
'YoloTrainFeed'
,
'YoloEvalFeed'
,
'YoloTestFeed'
,
'create_reader'
]
...
...
@@ -113,6 +113,7 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
pad
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
PadBatch
)]
rand_shape
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
RandomShape
)]
multi_scale
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
MultiScale
)]
pad_ms_test
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
PadMSTest
)]
if
any
(
pad
):
transform_config
[
'IS_PADDING'
]
=
True
...
...
@@ -122,6 +123,10 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
transform_config
[
'RANDOM_SHAPES'
]
=
rand_shape
[
0
].
sizes
if
any
(
multi_scale
):
transform_config
[
'MULTI_SCALES'
]
=
multi_scale
[
0
].
scales
if
any
(
pad_ms_test
):
transform_config
[
'ENABLE_MULTISCALE_TEST'
]
=
True
transform_config
[
'NUM_SCALE'
]
=
feed
.
num_scale
transform_config
[
'COARSEST_STRIDE'
]
=
pad_ms_test
[
0
].
pad_to_stride
if
hasattr
(
inspect
,
'getfullargspec'
):
argspec
=
inspect
.
getfullargspec
...
...
@@ -186,6 +191,20 @@ class RandomShape(object):
self
.
sizes
=
sizes
@
serializable
class
PadMSTest
(
object
):
"""
Padding for multi-scale test
Args:
pad_to_stride (int): pad to multiple of strides, e.g., 32
"""
def
__init__
(
self
,
pad_to_stride
=
0
):
super
(
PadMSTest
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
@
serializable
class
DataSet
(
object
):
"""
...
...
@@ -502,7 +521,10 @@ class FasterRCNNEvalFeed(DataFeed):
samples
=-
1
,
drop_last
=
False
,
num_workers
=
2
,
use_padded_im_info
=
True
):
use_padded_im_info
=
True
,
enable_multiscale
=
False
,
num_scale
=
1
,
enable_aug_flip
=
False
):
sample_transforms
.
append
(
ArrangeEvalRCNN
())
super
(
FasterRCNNEvalFeed
,
self
).
__init__
(
dataset
,
...
...
@@ -517,6 +539,9 @@ class FasterRCNNEvalFeed(DataFeed):
num_workers
=
num_workers
,
use_padded_im_info
=
use_padded_im_info
)
self
.
mode
=
'VAL'
self
.
enable_multiscale
=
enable_multiscale
self
.
num_scale
=
num_scale
self
.
enable_aug_flip
=
enable_aug_flip
@
register
...
...
@@ -640,7 +665,10 @@ class MaskRCNNEvalFeed(DataFeed):
drop_last
=
False
,
num_workers
=
2
,
use_process
=
False
,
use_padded_im_info
=
True
):
use_padded_im_info
=
True
,
enable_multiscale
=
False
,
num_scale
=
1
,
enable_aug_flip
=
False
):
sample_transforms
.
append
(
ArrangeTestRCNN
())
super
(
MaskRCNNEvalFeed
,
self
).
__init__
(
dataset
,
...
...
@@ -656,6 +684,9 @@ class MaskRCNNEvalFeed(DataFeed):
use_process
=
use_process
,
use_padded_im_info
=
use_padded_im_info
)
self
.
mode
=
'VAL'
self
.
enable_multiscale
=
enable_multiscale
self
.
num_scale
=
num_scale
self
.
enable_aug_flip
=
enable_aug_flip
@
register
...
...
ppdet/data/reader.py
浏览文件 @
27ec95d7
...
...
@@ -68,8 +68,8 @@ class Reader(object):
mapped_ds
=
map
(
sc
,
mapper
,
worker_args
)
# In VAL mode, gt_bbox, gt_label can be empty, and should
# not be dropped
batched_ds
=
batch
(
mapped_ds
,
batchsize
,
drop_last
,
drop_empty
=
(
mode
!=
"VAL"
))
batched_ds
=
batch
(
mapped_ds
,
batchsize
,
drop_last
,
drop_empty
=
(
mode
!=
"VAL"
))
trans_conf
=
{
k
.
lower
():
v
for
k
,
v
in
self
.
_trans_conf
[
mode
].
items
()}
need_keys
=
{
...
...
@@ -78,6 +78,8 @@ class Reader(object):
'random_shapes'
,
'multi_scales'
,
'use_padded_im_info'
,
'enable_multiscale_test'
,
'num_scale'
,
}
bm_config
=
{
key
:
value
...
...
@@ -125,12 +127,15 @@ class Reader(object):
return
self
.
_make_reader
(
'TEST'
)
@
classmethod
def
create
(
cls
,
mode
,
data_config
,
transform_config
,
max_iter
=-
1
,
my_source
=
None
,
ret_iter
=
True
):
def
create
(
cls
,
mode
,
data_config
,
transform_config
,
max_iter
=-
1
,
my_source
=
None
,
ret_iter
=
True
):
""" create a specific reader """
reader
=
Reader
({
mode
:
data_config
},
{
mode
:
transform_config
},
max_iter
)
reader
=
Reader
({
mode
:
data_config
},
{
mode
:
transform_config
},
max_iter
)
if
ret_iter
:
return
reader
.
_make_reader
(
mode
,
my_source
)
else
:
...
...
ppdet/data/transform/arrange_sample.py
浏览文件 @
27ec95d7
...
...
@@ -110,8 +110,11 @@ class ArrangeEvalRCNN(BaseOperator):
(image, im_info, im_id, im_shape, gt_bbox,
gt_class, difficult)
"""
im
=
sample
[
'image'
]
keys
=
list
(
sample
.
keys
())
ims
=
[]
keys
=
sorted
(
list
(
sample
.
keys
()))
for
k
in
keys
:
if
'image'
in
k
:
ims
.
append
(
sample
[
k
])
if
'im_info'
in
keys
:
im_info
=
sample
[
'im_info'
]
else
:
...
...
@@ -127,7 +130,9 @@ class ArrangeEvalRCNN(BaseOperator):
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
difficult
=
sample
[
'difficult'
]
outs
=
(
im
,
im_info
,
im_id
,
im_shape
,
gt_bbox
,
gt_class
,
difficult
)
remain_list
=
[
im_info
,
im_id
,
im_shape
,
gt_bbox
,
gt_class
,
difficult
]
ims
.
extend
(
remain_list
)
outs
=
tuple
(
ims
)
return
outs
...
...
@@ -148,10 +153,13 @@ class ArrangeTestRCNN(BaseOperator):
context: a dict which contains additional info.
Returns:
sample: a tuple containing the following items:
(image, im_info, im_id)
(image, im_info, im_id
, im_shape
)
"""
im
=
sample
[
'image'
]
keys
=
list
(
sample
.
keys
())
ims
=
[]
keys
=
sorted
(
list
(
sample
.
keys
()))
for
k
in
keys
:
if
'image'
in
k
:
ims
.
append
(
sample
[
k
])
if
'im_info'
in
keys
:
im_info
=
sample
[
'im_info'
]
else
:
...
...
@@ -164,7 +172,9 @@ class ArrangeTestRCNN(BaseOperator):
# bbox prediction needs im_info as input in format of [N, 3],
# so im_shape is appended by 1 to match dimension.
im_shape
=
np
.
array
((
h
,
w
,
1
),
dtype
=
np
.
float32
)
outs
=
(
im
,
im_info
,
im_id
,
im_shape
)
remain_list
=
[
im_info
,
im_id
,
im_shape
]
ims
.
extend
(
remain_list
)
outs
=
tuple
(
ims
)
return
outs
...
...
ppdet/data/transform/operators.py
浏览文件 @
27ec95d7
...
...
@@ -121,6 +121,105 @@ class DecodeImage(BaseOperator):
return
sample
@
register_op
class
MultiscaleTestResize
(
BaseOperator
):
def
__init__
(
self
,
origin_target_size
=
800
,
origin_max_size
=
1333
,
target_size
=
[],
max_size
=
2000
,
interp
=
cv2
.
INTER_LINEAR
,
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
target_size (list): A list of target sizes of image's short side.
max_size (int): the max size of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super
(
MultiscaleTestResize
,
self
).
__init__
()
self
.
origin_target_size
=
int
(
origin_target_size
)
self
.
origin_max_size
=
int
(
origin_max_size
)
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_flip
=
use_flip
if
not
isinstance
(
target_size
,
list
):
raise
TypeError
(
"Type of target_size is invalid. Must be List, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
if
not
(
isinstance
(
self
.
origin_target_size
,
int
)
and
isinstance
(
self
.
origin_max_size
,
int
)
and
isinstance
(
self
.
max_size
,
int
)
and
isinstance
(
self
.
interp
,
int
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy for multi-scale test.
"""
origin_ims
=
{}
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
base_name_list
=
[
'image'
]
origin_ims
[
'image'
]
=
im
if
self
.
use_flip
:
sample
[
'flip_image'
]
=
im
[:,
::
-
1
,
:]
base_name_list
.
append
(
'flip_image'
)
origin_ims
[
'flip_image'
]
=
sample
[
'flip_image'
]
im_info
=
[]
for
base_name
in
base_name_list
:
im_scale
=
float
(
self
.
origin_target_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
origin_max_size
:
im_scale
=
float
(
self
.
origin_max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
im_info
.
extend
([
resize_h
,
resize_w
,
im_scale
])
sample
[
base_name
]
=
im_resize
for
i
,
size
in
enumerate
(
self
.
target_size
):
im_scale
=
float
(
size
)
/
float
(
im_size_min
)
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
im_info
.
extend
([
resize_h
,
resize_w
,
im_scale
])
name
=
base_name
+
'_scale_'
+
str
(
i
)
sample
[
name
]
=
im_resize
sample
[
'im_info'
]
=
np
.
array
(
im_info
,
dtype
=
np
.
float32
)
return
sample
@
register_op
class
ResizeImage
(
BaseOperator
):
def
__init__
(
self
,
...
...
@@ -183,9 +282,12 @@ class ResizeImage(BaseOperator):
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
sample
[
'im_info'
]
=
np
.
array
(
[
resize_h
,
resize_w
,
im_scale
],
dtype
=
np
.
float32
)
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
if
'im_info'
in
sample
and
sample
[
'im_info'
][
2
]
!=
1.
:
sample
[
'im_info'
]
=
np
.
append
(
list
(
sample
[
'im_info'
]),
im_info
).
astype
(
np
.
float32
)
else
:
sample
[
'im_info'
]
=
np
.
array
(
im_info
).
astype
(
np
.
float32
)
else
:
im_scale_x
=
float
(
selected_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
selected_size
)
/
float
(
im_shape
[
0
])
...
...
@@ -331,19 +433,21 @@ class NormalizeImage(BaseOperator):
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
sample
[
'image'
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
'image'
]
=
im
for
k
in
sample
.
keys
():
if
'image'
in
k
:
im
=
sample
[
k
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
k
]
=
im
return
sample
...
...
@@ -785,13 +889,15 @@ class Permute(BaseOperator):
def
__call__
(
self
,
sample
,
context
=
None
):
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
sample
[
'image'
]
=
im
for
k
in
sample
.
keys
():
if
'image'
in
k
:
im
=
sample
[
k
]
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
sample
[
k
]
=
im
return
sample
...
...
ppdet/data/transform/post_map.py
浏览文件 @
27ec95d7
...
...
@@ -27,7 +27,9 @@ def build_post_map(coarsest_stride=1,
is_padding
=
False
,
random_shapes
=
[],
multi_scales
=
[],
use_padded_im_info
=
False
):
use_padded_im_info
=
False
,
enable_multiscale_test
=
False
,
num_scale
=
1
):
"""
Build a mapper for post-processing batches
...
...
@@ -36,10 +38,13 @@ def build_post_map(coarsest_stride=1,
{
coarsest_stride (int): stride of the coarsest FPN level
is_padding (bool): whether to padding in minibatch
random_shapes: (list of int): resize to image to random
shapes, [] for not resize.
multi_scales: (list of int): resize image by random
scales, [] for not resize.
random_shapes (list of int): resize to image to random shapes,
[] for not resize.
multi_scales (list of int): resize image by random scales,
[] for not resize.
use_padded_im_info (bool): whether to update im_info after padding
enable_multiscale_test (bool): whether to use multiscale test.
num_scale (int) : the number of scales for multiscale test.
}
Returns:
a mapper function which accept one argument 'batch' and
...
...
@@ -66,6 +71,33 @@ def build_post_map(coarsest_stride=1,
padding_batch
.
append
((
padding_im
,
)
+
data
[
1
:])
return
padding_batch
def
padding_multiscale_test
(
batch_data
):
if
len
(
batch_data
)
!=
1
:
raise
NotImplementedError
(
"Batch size must be 1 when using multiscale test, but now batch size is {}"
.
format
(
len
(
batch_data
)))
if
coarsest_stride
>
1
:
padding_batch
=
[]
padding_images
=
[]
data
=
batch_data
[
0
]
for
i
,
input
in
enumerate
(
data
):
if
i
<
num_scale
:
im_c
,
im_h
,
im_w
=
input
.
shape
max_h
=
int
(
np
.
ceil
(
im_h
/
coarsest_stride
)
*
coarsest_stride
)
max_w
=
int
(
np
.
ceil
(
im_w
/
coarsest_stride
)
*
coarsest_stride
)
padding_im
=
np
.
zeros
(
(
im_c
,
max_h
,
max_w
),
dtype
=
np
.
float32
)
padding_im
[:,
:
im_h
,
:
im_w
]
=
input
data
[
num_scale
][
3
*
i
:
3
*
i
+
2
]
=
[
max_h
,
max_w
]
padding_batch
.
append
(
padding_im
)
else
:
padding_batch
.
append
(
input
)
return
[
tuple
(
padding_batch
)]
# no need to padding
return
batch_data
def
random_shape
(
batch_data
):
# For YOLO: gt_bbox is normalized, is scale invariant.
shape
=
np
.
random
.
choice
(
random_shapes
)
...
...
@@ -108,6 +140,8 @@ def build_post_map(coarsest_stride=1,
batch_data
=
random_shape
(
batch_data
)
if
len
(
multi_scales
)
>
0
:
batch_data
=
multi_scale_resize
(
batch_data
)
if
enable_multiscale_test
:
batch_data
=
padding_multiscale_test
(
batch_data
)
except
Exception
as
e
:
errmsg
=
"post-process failed with error: "
+
str
(
e
)
logger
.
warn
(
errmsg
)
...
...
ppdet/modeling/architectures/cascade_mask_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -80,21 +80,15 @@ class CascadeMaskRCNN(object):
self
.
cascade_rcnn_loss_weight
=
[
1.0
,
0.5
,
0.25
]
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'gt_mask'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
im
=
feed_vars
[
'image'
]
if
mode
==
'train'
:
gt_box
=
feed_vars
[
'gt_box'
]
is_crowd
=
feed_vars
[
'is_crowd'
]
...
...
@@ -199,55 +193,167 @@ class CascadeMaskRCNN(object):
loss
.
update
({
'loss'
:
total_loss
})
return
loss
else
:
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
mask_name
=
'mask_pred'
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
feed_vars
[
'im_shape'
])
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
def
build_multi_scale
(
self
,
feed_vars
,
mask_branch
=
False
):
required_fields
=
[
'image'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
if
not
mask_branch
:
assert
'im_shape'
in
feed_vars
,
\
"{} has no im_shape field"
.
format
(
feed_vars
)
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
not
mask_branch
:
im_shape
=
feed_vars
[
'im_shape'
]
body_feat_names
=
list
(
body_feats
.
keys
())
proposal_list
=
[]
roi_feat_list
=
[]
rcnn_pred_list
=
[]
proposals
=
None
bbox_pred
=
None
for
i
in
range
(
3
):
if
i
>
0
:
refined_bbox
=
self
.
_decode_box
(
proposals
,
bbox_pred
,
curr_stage
=
i
-
1
,
)
else
:
refined_bbox
=
rois
proposals
=
refined_bbox
proposal_list
.
append
(
proposals
)
# extract roi features
roi_feat
=
self
.
roi_extractor
(
body_feats
,
proposals
,
spatial_scale
)
roi_feat_list
.
append
(
roi_feat
)
# bbox head
cls_score
,
bbox_pred
=
self
.
bbox_head
.
get_output
(
roi_feat
,
wb_scalar
=
1.0
/
self
.
cascade_rcnn_loss_weight
[
i
],
name
=
'_'
+
str
(
i
+
1
)
if
i
>
0
else
''
)
rcnn_pred_list
.
append
((
cls_score
,
bbox_pred
))
# get mask rois
if
self
.
fpn
is
None
:
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
else
:
mask_name
=
'mask_pred_'
+
str
(
i
)
bbox_pred
=
feed_vars
[
'bbox'
]
result
.
update
({
im
.
name
:
im
})
if
'flip'
in
im
.
name
:
mask_name
+=
'_flip'
bbox_pred
=
feed_vars
[
'bbox_flip'
]
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
=
bbox_pred
,
use_multi_test
=
True
)
result
[
mask_name
]
=
mask_pred
return
result
def
single_scale_eval
(
self
,
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
,
roi_feat_list
=
None
,
rcnn_pred_list
=
None
,
proposal_list
=
None
,
im_shape
=
None
,
use_multi_test
=
False
):
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
if
not
use_multi_test
:
bbox_pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
feed_vars
[
'im_shape'
],
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
self
.
cls_agnostic_bbox_reg
)
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
)
bbox_pred
=
bbox_pred
[
'bbox'
]
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
'mask_pred'
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
mask_name
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
mask_pred
,
bbox_pred
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
_decode_box
(
self
,
proposals
,
bbox_pred
,
curr_stage
):
rcnn_loc_delta_r
=
fluid
.
layers
.
reshape
(
...
...
@@ -269,7 +375,9 @@ class CascadeMaskRCNN(object):
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
,
mask_branch
=
False
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
,
mask_branch
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/cascade_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -74,16 +74,13 @@ class CascadeRCNN(object):
self
.
cascade_rcnn_loss_weight
=
[
1.0
,
0.5
,
0.25
]
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
if
mode
==
'train'
:
...
...
@@ -171,6 +168,98 @@ class CascadeRCNN(object):
self
.
cls_agnostic_bbox_reg
)
return
pred
def
build_multi_scale
(
self
,
feed_vars
):
required_fields
=
[
'image'
,
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
im_shape
=
feed_vars
[
'im_shape'
]
# backbone
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
body_feat_names
=
list
(
body_feats
.
keys
())
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
# rpn proposals
rpn_rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
proposal_list
=
[]
roi_feat_list
=
[]
rcnn_pred_list
=
[]
proposals
=
None
bbox_pred
=
None
for
i
in
range
(
3
):
if
i
>
0
:
refined_bbox
=
self
.
_decode_box
(
proposals
,
bbox_pred
,
curr_stage
=
i
-
1
,
)
else
:
refined_bbox
=
rpn_rois
proposals
=
refined_bbox
proposal_list
.
append
(
proposals
)
# extract roi features
roi_feat
=
self
.
roi_extractor
(
body_feats
,
proposals
,
spatial_scale
)
roi_feat_list
.
append
(
roi_feat
)
# bbox head
cls_score
,
bbox_pred
=
self
.
bbox_head
.
get_output
(
roi_feat
,
wb_scalar
=
1.0
/
self
.
cascade_rcnn_loss_weight
[
i
],
name
=
'_'
+
str
(
i
+
1
)
if
i
>
0
else
''
)
rcnn_pred_list
.
append
((
cls_score
,
bbox_pred
))
# get mask rois
rois
=
proposal_list
[
2
]
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
self
.
cls_agnostic_bbox_reg
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
return
result
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
_decode_box
(
self
,
proposals
,
bbox_pred
,
curr_stage
):
rcnn_loc_delta_r
=
fluid
.
layers
.
reshape
(
bbox_pred
,
(
-
1
,
self
.
cls_agnostic_bbox_reg
,
4
))
...
...
@@ -191,7 +280,9 @@ class CascadeRCNN(object):
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/faster_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -63,6 +63,12 @@ class FasterRCNN(object):
self
.
rpn_only
=
rpn_only
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
if
mode
==
'train'
:
...
...
@@ -136,10 +142,62 @@ class FasterRCNN(object):
im_shape
)
return
pred
def
build_multi_scale
(
self
,
feed_vars
):
required_fields
=
[
'image'
,
'im_info'
,
'im_shape'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
im_shape
=
feed_vars
[
'im_shape'
]
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
body_feat_names
=
list
(
body_feats
.
keys
())
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
self
.
fpn
is
None
:
# in models without FPN, roi extractor only uses the last level of
# feature maps. And body_feat_names[-1] represents the name of
# last feature map.
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
body_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
return
result
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/mask_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -69,18 +69,14 @@ class MaskRCNN(object):
self
.
fpn
=
fpn
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'gt_mask'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
mixed_precision_enabled
=
mixed_precision_global_state
()
is
not
None
...
...
@@ -153,57 +149,135 @@ class MaskRCNN(object):
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
rois
)
rois
=
rois
/
im_scale
return
{
'proposal'
:
rois
}
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
mask_name
=
'mask_pred'
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
mask_name
,
rois
,
im_info
,
feed_vars
[
'im_shape'
],
spatial_scale
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
def
build_multi_scale
(
self
,
feed_vars
,
mask_branch
=
False
):
required_fields
=
[
'image'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
if
not
mask_branch
:
assert
'im_shape'
in
feed_vars
,
\
"{} has no im_shape field"
.
format
(
feed_vars
)
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
not
mask_branch
:
im_shape
=
feed_vars
[
'im_shape'
]
body_feat_names
=
list
(
body_feats
.
keys
())
if
self
.
fpn
is
None
:
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
body_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
else
:
mask_name
=
'mask_pred_'
+
str
(
i
)
bbox_pred
=
feed_vars
[
'bbox'
]
result
.
update
({
im
.
name
:
im
})
if
'flip'
in
im
.
name
:
mask_name
+=
'_flip'
bbox_pred
=
feed_vars
[
'bbox_flip'
]
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
mask_name
,
rois
,
im_info
,
feed_vars
[
'im_shape'
],
spatial_scale
,
bbox_pred
)
result
[
mask_name
]
=
mask_pred
return
result
def
single_scale_eval
(
self
,
body_feats
,
mask_name
,
rois
,
im_info
,
im_shape
,
spatial_scale
,
bbox_pred
=
None
):
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
if
not
bbox_pred
:
bbox_pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
feed_vars
[
'im_shape'
]
)
im_shape
)
bbox_pred
=
bbox_pred
[
'bbox'
]
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
'mask_pred'
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
mask_name
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
mask_pred
,
bbox_pred
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
,
mask_branch
=
False
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
,
mask_branch
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/backbones/fpn.py
浏览文件 @
27ec95d7
...
...
@@ -17,7 +17,7 @@ from __future__ import division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
copy
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
Xavier
...
...
@@ -110,6 +110,7 @@ class FPN(object):
their name.
spatial_scale(list): A list of multiplicative spatial scale factor.
"""
spatial_scale
=
copy
.
deepcopy
(
self
.
spatial_scale
)
body_name_list
=
list
(
body_dict
.
keys
())[::
-
1
]
num_backbone_stages
=
len
(
body_name_list
)
self
.
fpn_inner_output
=
[[]
for
_
in
range
(
num_backbone_stages
)]
...
...
@@ -179,7 +180,7 @@ class FPN(object):
fpn_dict
[
fpn_name
]
=
fpn_output
fpn_name_list
.
append
(
fpn_name
)
if
not
self
.
has_extra_convs
and
self
.
max_level
-
self
.
min_level
==
len
(
s
elf
.
s
patial_scale
):
spatial_scale
):
body_top_name
=
fpn_name_list
[
0
]
body_top_extension
=
fluid
.
layers
.
pool2d
(
fpn_dict
[
body_top_name
],
...
...
@@ -189,9 +190,9 @@ class FPN(object):
name
=
body_top_name
+
'_subsampled_2x'
)
fpn_dict
[
body_top_name
+
'_subsampled_2x'
]
=
body_top_extension
fpn_name_list
.
insert
(
0
,
body_top_name
+
'_subsampled_2x'
)
s
elf
.
spatial_scale
.
insert
(
0
,
self
.
spatial_scale
[
0
]
*
0.5
)
s
patial_scale
.
insert
(
0
,
spatial_scale
[
0
]
*
0.5
)
# Coarser FPN levels introduced for RetinaNet
highest_backbone_level
=
self
.
min_level
+
len
(
s
elf
.
s
patial_scale
)
-
1
highest_backbone_level
=
self
.
min_level
+
len
(
spatial_scale
)
-
1
if
self
.
has_extra_convs
and
self
.
max_level
>
highest_backbone_level
:
fpn_blob
=
body_dict
[
body_name_list
[
0
]]
for
i
in
range
(
highest_backbone_level
+
1
,
self
.
max_level
+
1
):
...
...
@@ -215,6 +216,6 @@ class FPN(object):
name
=
fpn_name
)
fpn_dict
[
fpn_name
]
=
fpn_blob
fpn_name_list
.
insert
(
0
,
fpn_name
)
s
elf
.
spatial_scale
.
insert
(
0
,
self
.
spatial_scale
[
0
]
*
0.5
)
s
patial_scale
.
insert
(
0
,
spatial_scale
[
0
]
*
0.5
)
res_dict
=
OrderedDict
([(
k
,
fpn_dict
[
k
])
for
k
in
fpn_name_list
])
return
res_dict
,
s
elf
.
s
patial_scale
return
res_dict
,
spatial_scale
ppdet/modeling/model_input.py
浏览文件 @
27ec95d7
...
...
@@ -17,6 +17,7 @@ from __future__ import print_function
from
__future__
import
division
from
collections
import
OrderedDict
from
ppdet.data.transform.operators
import
*
from
paddle
import
fluid
...
...
@@ -38,7 +39,7 @@ feed_var_def = [
# yapf: enable
def
create_feed
(
feed
,
use_pyreader
=
True
):
def
create_feed
(
feed
,
use_pyreader
=
True
,
sub_prog_feed
=
False
):
image_shape
=
feed
.
image_shape
feed_var_map
=
{
var
[
'name'
]:
var
for
var
in
feed_var_def
}
feed_var_map
[
'image'
]
=
{
...
...
@@ -60,6 +61,58 @@ def create_feed(feed, use_pyreader=True):
feed_var_map
[
'gt_box'
][
'lod_level'
]
=
0
feed_var_map
[
'is_difficult'
][
'lod_level'
]
=
0
base_name_list
=
[
'image'
]
num_scale
=
getattr
(
feed
,
'num_scale'
,
1
)
sample_transform
=
feed
.
sample_transforms
multiscale_test
=
False
aug_flip
=
False
for
t
in
sample_transform
:
if
isinstance
(
t
,
MultiscaleTestResize
):
multiscale_test
=
True
aug_flip
=
t
.
use_flip
assert
(
len
(
t
.
target_size
)
+
1
)
*
(
aug_flip
+
1
)
==
num_scale
,
\
"num_scale: {} is not equal to the actual number of scale: {}."
\
.
format
(
num_scale
,
(
len
(
t
.
target_size
)
+
1
)
*
(
aug_flip
+
1
))
break
if
aug_flip
:
num_scale
//=
2
base_name_list
.
insert
(
0
,
'flip_image'
)
feed_var_map
[
'flip_image'
]
=
{
'name'
:
'flip_image'
,
'shape'
:
image_shape
,
'dtype'
:
'float32'
,
'lod_level'
:
0
}
image_name_list
=
[]
if
multiscale_test
:
for
base_name
in
base_name_list
:
for
i
in
range
(
0
,
num_scale
):
name
=
base_name
if
i
==
0
else
base_name
+
'_scale_'
+
str
(
i
-
1
)
feed_var_map
[
name
]
=
{
'name'
:
name
,
'shape'
:
image_shape
,
'dtype'
:
'float32'
,
'lod_level'
:
0
}
image_name_list
.
append
(
name
)
feed_var_map
[
'im_info'
][
'shape'
]
=
[
feed
.
num_scale
*
3
]
feed
.
fields
=
image_name_list
+
feed
.
fields
[
1
:]
if
sub_prog_feed
:
box_names
=
[
'bbox'
,
'bbox_flip'
]
for
box_name
in
box_names
:
sub_prog_feed
=
{
'name'
:
box_name
,
'shape'
:
[
6
],
'dtype'
:
'float32'
,
'lod_level'
:
1
}
feed
.
fields
=
feed
.
fields
+
[
box_name
]
feed_var_map
[
box_name
]
=
sub_prog_feed
feed_vars
=
OrderedDict
([(
key
,
fluid
.
layers
.
data
(
name
=
feed_var_map
[
key
][
'name'
],
shape
=
feed_var_map
[
key
][
'shape'
],
...
...
ppdet/modeling/roi_heads/bbox_head.py
浏览文件 @
27ec95d7
...
...
@@ -280,7 +280,12 @@ class BBoxHead(object):
loss_bbox
=
fluid
.
layers
.
reduce_mean
(
loss_bbox
)
return
{
'loss_cls'
:
loss_cls
,
'loss_bbox'
:
loss_bbox
}
def
get_prediction
(
self
,
roi_feat
,
rois
,
im_info
,
im_shape
):
def
get_prediction
(
self
,
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
False
):
"""
Get prediction bounding box in test stage.
...
...
@@ -308,5 +313,7 @@ class BBoxHead(object):
bbox_pred
=
fluid
.
layers
.
reshape
(
bbox_pred
,
(
-
1
,
self
.
num_classes
,
4
))
decoded_box
=
self
.
box_coder
(
prior_box
=
boxes
,
target_box
=
bbox_pred
)
cliped_box
=
fluid
.
layers
.
box_clip
(
input
=
decoded_box
,
im_info
=
im_shape
)
if
return_box_score
:
return
{
'bbox'
:
cliped_box
,
'score'
:
cls_prob
}
pred_result
=
self
.
nms
(
bboxes
=
cliped_box
,
scores
=
cls_prob
)
return
{
'bbox'
:
pred_result
}
ppdet/modeling/roi_heads/cascade_head.py
浏览文件 @
27ec95d7
...
...
@@ -146,7 +146,8 @@ class CascadeBBoxHead(object):
rcnn_pred_list
,
proposal_list
,
cascade_bbox_reg_weights
,
cls_agnostic_bbox_reg
=
2
):
cls_agnostic_bbox_reg
=
2
,
return_box_score
=
False
):
"""
Get prediction bounding box in test stage.
:
...
...
@@ -214,7 +215,8 @@ class CascadeBBoxHead(object):
axis
=
1
)
box_out
=
fluid
.
layers
.
box_clip
(
input
=
decoded_box
,
im_info
=
im_shape
)
if
return_box_score
:
return
{
'bbox'
:
box_out
,
'score'
:
boxes_cls_prob_mean
}
pred_result
=
self
.
nms
(
bboxes
=
box_out
,
scores
=
boxes_cls_prob_mean
)
return
{
"bbox"
:
pred_result
}
...
...
ppdet/utils/cli.py
浏览文件 @
27ec95d7
...
...
@@ -16,7 +16,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import
yaml
import
re
from
ppdet.core.workspace
import
get_registered_modules
from
ppdet.core.workspace
import
get_registered_modules
,
dump_value
__all__
=
[
'ColorTTY'
,
'ArgsParser'
]
...
...
ppdet/utils/eval_utils.py
浏览文件 @
27ec95d7
...
...
@@ -24,6 +24,7 @@ import time
import
paddle.fluid
as
fluid
from
ppdet.utils.voc_eval
import
bbox_eval
as
voc_bbox_eval
from
ppdet.utils.post_process
import
mstest_box_post_process
,
mstest_mask_post_process
,
box_flip
__all__
=
[
'parse_fetches'
,
'eval_run'
,
'eval_results'
,
'json_eval_results'
]
...
...
@@ -57,7 +58,52 @@ def parse_fetches(fetches, prog=None, extra_keys=None):
return
keys
,
values
,
cls
def
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
):
def
length2lod
(
length_lod
):
offset_lod
=
[
0
]
for
i
in
length_lod
:
offset_lod
.
append
(
offset_lod
[
-
1
]
+
i
)
return
[
offset_lod
]
def
get_sub_feed
(
input
,
place
):
new_dict
=
{}
res_feed
=
{}
key_name
=
[
'bbox'
,
'im_info'
,
'im_id'
,
'im_shape'
,
'bbox_flip'
]
for
k
in
key_name
:
if
k
in
input
.
keys
():
new_dict
[
k
]
=
input
[
k
]
for
k
in
input
.
keys
():
if
'image'
in
k
:
new_dict
[
k
]
=
input
[
k
]
for
k
,
v
in
new_dict
.
items
():
data_t
=
fluid
.
LoDTensor
()
data_t
.
set
(
v
[
0
],
place
)
if
'bbox'
in
k
:
lod
=
length2lod
(
v
[
1
][
0
])
data_t
.
set_lod
(
lod
)
res_feed
[
k
]
=
data_t
return
res_feed
def
clean_res
(
result
,
keep_name_list
):
clean_result
=
{}
for
k
in
result
.
keys
():
if
k
in
keep_name_list
:
clean_result
[
k
]
=
result
[
k
]
result
.
clear
()
return
clean_result
def
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
,
cfg
=
None
,
sub_prog
=
None
,
sub_keys
=
None
,
sub_values
=
None
):
"""
Run evaluation program, return program outputs.
"""
...
...
@@ -84,6 +130,28 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
k
:
(
np
.
array
(
v
),
v
.
recursive_sequence_lengths
())
for
k
,
v
in
zip
(
keys
,
outs
)
}
multi_scale_test
=
getattr
(
cfg
,
'MultiScaleTEST'
,
None
)
mask_multi_scale_test
=
multi_scale_test
and
'Mask'
in
cfg
.
architecture
if
multi_scale_test
:
post_res
=
mstest_box_post_process
(
res
,
cfg
)
res
.
update
(
post_res
)
if
mask_multi_scale_test
:
place
=
fluid
.
CUDAPlace
(
0
)
if
cfg
.
use_gpu
else
fluid
.
CPUPlace
()
sub_feed
=
get_sub_feed
(
res
,
place
)
sub_prog_outs
=
exe
.
run
(
sub_prog
,
feed
=
sub_feed
,
fetch_list
=
sub_values
,
return_numpy
=
False
)
sub_prog_res
=
{
k
:
(
np
.
array
(
v
),
v
.
recursive_sequence_lengths
())
for
k
,
v
in
zip
(
sub_keys
,
sub_prog_outs
)
}
post_res
=
mstest_mask_post_process
(
sub_prog_res
,
cfg
)
res
.
update
(
post_res
)
if
multi_scale_test
:
res
=
clean_res
(
res
,
[
'im_info'
,
'bbox'
,
'im_id'
,
'im_shape'
,
'mask'
])
results
.
append
(
res
)
if
iter_id
%
100
==
0
:
logger
.
info
(
'Test iter {}'
.
format
(
iter_id
))
...
...
ppdet/utils/post_process.py
0 → 100644
浏览文件 @
27ec95d7
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
logging
import
numpy
as
np
import
paddle.fluid
as
fluid
__all__
=
[
'nms'
]
logger
=
logging
.
getLogger
(
__name__
)
def
box_flip
(
boxes
,
im_shape
):
im_width
=
im_shape
[
0
][
1
]
flipped_boxes
=
boxes
.
copy
()
flipped_boxes
[:,
0
::
4
]
=
im_width
-
boxes
[:,
2
::
4
]
-
1
flipped_boxes
[:,
2
::
4
]
=
im_width
-
boxes
[:,
0
::
4
]
-
1
return
flipped_boxes
def
nms
(
dets
,
thresh
):
"""Apply classic DPM-style greedy NMS."""
if
dets
.
shape
[
0
]
==
0
:
return
[]
scores
=
dets
[:,
0
]
x1
=
dets
[:,
1
]
y1
=
dets
[:,
2
]
x2
=
dets
[:,
3
]
y2
=
dets
[:,
4
]
areas
=
(
x2
-
x1
+
1
)
*
(
y2
-
y1
+
1
)
order
=
scores
.
argsort
()[::
-
1
]
ndets
=
dets
.
shape
[
0
]
suppressed
=
np
.
zeros
((
ndets
),
dtype
=
np
.
int
)
# nominal indices
# _i, _j
# sorted indices
# i, j
# temp variables for box i's (the box currently under consideration)
# ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
# xx1, yy1, xx2, yy2
# w, h
# inter, ovr
for
_i
in
range
(
ndets
):
i
=
order
[
_i
]
if
suppressed
[
i
]
==
1
:
continue
ix1
=
x1
[
i
]
iy1
=
y1
[
i
]
ix2
=
x2
[
i
]
iy2
=
y2
[
i
]
iarea
=
areas
[
i
]
for
_j
in
range
(
_i
+
1
,
ndets
):
j
=
order
[
_j
]
if
suppressed
[
j
]
==
1
:
continue
xx1
=
max
(
ix1
,
x1
[
j
])
yy1
=
max
(
iy1
,
y1
[
j
])
xx2
=
min
(
ix2
,
x2
[
j
])
yy2
=
min
(
iy2
,
y2
[
j
])
w
=
max
(
0.0
,
xx2
-
xx1
+
1
)
h
=
max
(
0.0
,
yy2
-
yy1
+
1
)
inter
=
w
*
h
ovr
=
inter
/
(
iarea
+
areas
[
j
]
-
inter
)
if
ovr
>=
thresh
:
suppressed
[
j
]
=
1
return
np
.
where
(
suppressed
==
0
)[
0
]
def
bbox_area
(
box
):
w
=
box
[
2
]
-
box
[
0
]
+
1
h
=
box
[
3
]
-
box
[
1
]
+
1
return
w
*
h
def
bbox_overlaps
(
x
,
y
):
N
=
x
.
shape
[
0
]
K
=
y
.
shape
[
0
]
overlaps
=
np
.
zeros
((
N
,
K
),
dtype
=
np
.
float32
)
for
k
in
range
(
K
):
y_area
=
bbox_area
(
y
[
k
])
for
n
in
range
(
N
):
iw
=
min
(
x
[
n
,
2
],
y
[
k
,
2
])
-
max
(
x
[
n
,
0
],
y
[
k
,
0
])
+
1
if
iw
>
0
:
ih
=
min
(
x
[
n
,
3
],
y
[
k
,
3
])
-
max
(
x
[
n
,
1
],
y
[
k
,
1
])
+
1
if
ih
>
0
:
x_area
=
bbox_area
(
x
[
n
])
ua
=
x_area
+
y_area
-
iw
*
ih
overlaps
[
n
,
k
]
=
iw
*
ih
/
ua
return
overlaps
def
box_voting
(
nms_dets
,
dets
,
vote_thresh
):
top_dets
=
nms_dets
.
copy
()
top_boxes
=
nms_dets
[:,
1
:]
all_boxes
=
dets
[:,
1
:]
all_scores
=
dets
[:,
0
]
top_to_all_overlaps
=
bbox_overlaps
(
top_boxes
,
all_boxes
)
for
k
in
range
(
nms_dets
.
shape
[
0
]):
inds_to_vote
=
np
.
where
(
top_to_all_overlaps
[
k
]
>=
vote_thresh
)[
0
]
boxes_to_vote
=
all_boxes
[
inds_to_vote
,
:]
ws
=
all_scores
[
inds_to_vote
]
top_dets
[
k
,
1
:]
=
np
.
average
(
boxes_to_vote
,
axis
=
0
,
weights
=
ws
)
return
top_dets
def
get_nms_result
(
boxes
,
scores
,
cfg
):
cls_boxes
=
[[]
for
_
in
range
(
cfg
.
num_classes
)]
for
j
in
range
(
1
,
cfg
.
num_classes
):
inds
=
np
.
where
(
scores
[:,
j
]
>
cfg
.
MultiScaleTEST
[
'score_thresh'
])[
0
]
scores_j
=
scores
[
inds
,
j
]
boxes_j
=
boxes
[
inds
,
j
*
4
:(
j
+
1
)
*
4
]
dets_j
=
np
.
hstack
((
scores_j
[:,
np
.
newaxis
],
boxes_j
)).
astype
(
np
.
float32
,
copy
=
False
)
keep
=
nms
(
dets_j
,
cfg
.
MultiScaleTEST
[
'nms_thresh'
])
nms_dets
=
dets_j
[
keep
,
:]
if
cfg
.
MultiScaleTEST
[
'enable_voting'
]:
nms_dets
=
box_voting
(
nms_dets
,
dets_j
,
cfg
.
MultiScaleTEST
[
'vote_thresh'
])
#add labels
label
=
np
.
array
([
j
for
_
in
range
(
len
(
keep
))])
nms_dets
=
np
.
hstack
((
label
[:,
np
.
newaxis
],
nms_dets
)).
astype
(
np
.
float32
,
copy
=
False
)
cls_boxes
[
j
]
=
nms_dets
# Limit to max_per_image detections **over all classes**
image_scores
=
np
.
hstack
(
[
cls_boxes
[
j
][:,
1
]
for
j
in
range
(
1
,
cfg
.
num_classes
)])
if
len
(
image_scores
)
>
cfg
.
MultiScaleTEST
[
'detections_per_im'
]:
image_thresh
=
np
.
sort
(
image_scores
)[
-
cfg
.
MultiScaleTEST
[
'detections_per_im'
]]
for
j
in
range
(
1
,
cfg
.
num_classes
):
keep
=
np
.
where
(
cls_boxes
[
j
][:,
1
]
>=
image_thresh
)[
0
]
cls_boxes
[
j
]
=
cls_boxes
[
j
][
keep
,
:]
im_results
=
np
.
vstack
([
cls_boxes
[
j
]
for
j
in
range
(
1
,
cfg
.
num_classes
)])
return
im_results
def
mstest_box_post_process
(
result
,
cfg
):
"""
Multi-scale Test
Only available for batch_size=1 now.
"""
post_bbox
=
{}
use_flip
=
False
ms_boxes
=
[]
ms_scores
=
[]
im_shape
=
result
[
'im_shape'
][
0
]
for
k
in
result
.
keys
():
if
'bbox'
in
k
:
boxes
=
result
[
k
][
0
]
boxes
=
np
.
reshape
(
boxes
,
(
-
1
,
4
*
cfg
.
num_classes
))
scores
=
result
[
'score'
+
k
[
4
:]][
0
]
if
'flip'
in
k
:
boxes
=
box_flip
(
boxes
,
im_shape
)
use_flip
=
True
ms_boxes
.
append
(
boxes
)
ms_scores
.
append
(
scores
)
ms_boxes
=
np
.
concatenate
(
ms_boxes
)
ms_scores
=
np
.
concatenate
(
ms_scores
)
bbox_pred
=
get_nms_result
(
ms_boxes
,
ms_scores
,
cfg
)
post_bbox
.
update
({
'bbox'
:
(
bbox_pred
,
[[
len
(
bbox_pred
)]])})
if
use_flip
:
bbox
=
bbox_pred
[:,
2
:]
bbox_flip
=
np
.
append
(
bbox_pred
[:,
:
2
],
box_flip
(
bbox
,
im_shape
),
axis
=
1
)
post_bbox
.
update
({
'bbox_flip'
:
(
bbox_flip
,
[[
len
(
bbox_flip
)]])})
return
post_bbox
def
mstest_mask_post_process
(
result
,
cfg
):
mask_list
=
[]
im_shape
=
result
[
'im_shape'
][
0
]
M
=
cfg
.
FPNRoIAlign
[
'mask_resolution'
]
for
k
in
result
.
keys
():
if
'mask'
in
k
:
masks
=
result
[
k
][
0
]
if
len
(
masks
.
shape
)
!=
4
:
masks
=
np
.
zeros
((
0
,
M
,
M
))
mask_list
.
append
(
masks
)
continue
if
'flip'
in
k
:
masks
=
masks
[:,
:,
:,
::
-
1
]
mask_list
.
append
(
masks
)
mask_pred
=
np
.
mean
(
mask_list
,
axis
=
0
)
return
{
'mask'
:
(
mask_pred
,
[[
len
(
mask_pred
)]])}
tools/configure.py
浏览文件 @
27ec95d7
...
...
@@ -19,7 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import
yaml
from
ppdet.core.workspace
import
get_registered_modules
,
load_config
from
ppdet.core.workspace
import
get_registered_modules
,
load_config
,
dump_value
from
ppdet.utils.cli
import
ColorTTY
,
print_total_cfg
color_tty
=
ColorTTY
()
...
...
@@ -43,18 +43,6 @@ MISC_CONFIG = {
}
def
dump_value
(
value
):
# XXX this is hackish, but collections.abc is not available in python 2
if
hasattr
(
value
,
'__dict__'
)
or
isinstance
(
value
,
(
dict
,
tuple
,
list
)):
value
=
yaml
.
dump
(
value
,
default_flow_style
=
True
)
value
=
value
.
replace
(
'
\n
'
,
''
)
value
=
value
.
replace
(
'...'
,
''
)
return
"'{}'"
.
format
(
value
)
else
:
# primitive types
return
str
(
value
)
def
dump_config
(
module
,
minimal
=
False
):
args
=
module
.
schema
.
values
()
if
minimal
:
...
...
tools/eval.py
浏览文件 @
27ec95d7
...
...
@@ -59,7 +59,6 @@ def main():
raise
ValueError
(
"'architecture' not specified in config file."
)
merge_config
(
FLAGS
.
opt
)
# check if set use_gpu=True in paddlepaddle cpu version
check_gpu
(
cfg
.
use_gpu
)
print_total_cfg
(
cfg
)
...
...
@@ -69,6 +68,8 @@ def main():
else
:
eval_feed
=
create
(
cfg
.
eval_feed
)
multi_scale_test
=
getattr
(
cfg
,
'MultiScaleTEST'
,
None
)
# define executor
place
=
fluid
.
CUDAPlace
(
0
)
if
cfg
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -80,9 +81,8 @@ def main():
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
pyreader
,
feed_vars
=
create_feed
(
eval_feed
)
fetches
=
model
.
eval
(
feed_vars
)
fetches
=
model
.
eval
(
feed_vars
,
multi_scale_test
)
eval_prog
=
eval_prog
.
clone
(
True
)
reader
=
create_reader
(
eval_feed
,
args_path
=
FLAGS
.
dataset_dir
)
pyreader
.
decorate_sample_list_generator
(
reader
,
place
)
...
...
@@ -120,7 +120,32 @@ def main():
callable
(
model
.
is_bbox_normalized
):
is_bbox_normalized
=
model
.
is_bbox_normalized
()
results
=
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
)
sub_eval_prog
=
None
sub_keys
=
None
sub_values
=
None
# build sub-program
if
'Mask'
in
main_arch
and
multi_scale_test
:
sub_eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
sub_eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
_
,
feed_vars
=
create_feed
(
eval_feed
,
use_pyreader
=
False
,
sub_prog_feed
=
True
)
sub_fetches
=
model
.
eval
(
feed_vars
,
multi_scale_test
,
mask_branch
=
True
)
extra_keys
=
[]
if
cfg
.
metric
==
'COCO'
:
extra_keys
=
[
'im_id'
,
'im_shape'
]
if
cfg
.
metric
==
'VOC'
:
extra_keys
=
[
'gt_box'
,
'gt_label'
,
'is_difficult'
]
sub_keys
,
sub_values
,
_
=
parse_fetches
(
sub_fetches
,
sub_eval_prog
,
extra_keys
)
sub_eval_prog
=
sub_eval_prog
.
clone
(
True
)
if
'weights'
in
cfg
:
checkpoint
.
load_params
(
exe
,
sub_eval_prog
,
cfg
.
weights
)
results
=
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
,
cfg
,
sub_eval_prog
,
sub_keys
,
sub_values
)
# evaluation
resolution
=
None
...
...
tools/train.py
浏览文件 @
27ec95d7
...
...
@@ -73,9 +73,13 @@ def main():
raise
ValueError
(
"'architecture' not specified in config file."
)
merge_config
(
FLAGS
.
opt
)
if
'log_iter'
not
in
cfg
:
cfg
.
log_iter
=
20
if
'multi_scale_test'
not
in
cfg
:
cfg
.
multi_scale_test
=
False
ignore_params
=
cfg
.
finetune_exclude_pretrained_params
\
if
'finetune_exclude_pretrained_params'
in
cfg
else
[]
...
...
@@ -140,7 +144,7 @@ def main():
with
fluid
.
unique_name
.
guard
():
model
=
create
(
main_arch
)
eval_pyreader
,
feed_vars
=
create_feed
(
eval_feed
)
fetches
=
model
.
eval
(
feed_vars
)
fetches
=
model
.
eval
(
feed_vars
,
cfg
.
multi_scale_test
)
eval_prog
=
eval_prog
.
clone
(
True
)
eval_reader
=
create_reader
(
eval_feed
,
args_path
=
FLAGS
.
dataset_dir
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录