Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
27ec95d7
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
27ec95d7
编写于
10月 14, 2019
作者:
W
wangguanzhong
提交者:
GitHub
10月 14, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add multi-scale test (#3376)
* add multi-scale test
上级
bdf0fdc1
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1523 addition
and
196 deletion
+1523
-196
configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
...s/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
+255
-0
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
+177
-0
ppdet/core/workspace.py
ppdet/core/workspace.py
+23
-5
ppdet/data/data_feed.py
ppdet/data/data_feed.py
+40
-9
ppdet/data/reader.py
ppdet/data/reader.py
+12
-7
ppdet/data/transform/arrange_sample.py
ppdet/data/transform/arrange_sample.py
+17
-7
ppdet/data/transform/operators.py
ppdet/data/transform/operators.py
+129
-23
ppdet/data/transform/post_map.py
ppdet/data/transform/post_map.py
+39
-5
ppdet/modeling/architectures/cascade_mask_rcnn.py
ppdet/modeling/architectures/cascade_mask_rcnn.py
+161
-53
ppdet/modeling/architectures/cascade_rcnn.py
ppdet/modeling/architectures/cascade_rcnn.py
+98
-7
ppdet/modeling/architectures/faster_rcnn.py
ppdet/modeling/architectures/faster_rcnn.py
+59
-1
ppdet/modeling/architectures/mask_rcnn.py
ppdet/modeling/architectures/mask_rcnn.py
+123
-49
ppdet/modeling/backbones/fpn.py
ppdet/modeling/backbones/fpn.py
+7
-6
ppdet/modeling/model_input.py
ppdet/modeling/model_input.py
+54
-1
ppdet/modeling/roi_heads/bbox_head.py
ppdet/modeling/roi_heads/bbox_head.py
+8
-1
ppdet/modeling/roi_heads/cascade_head.py
ppdet/modeling/roi_heads/cascade_head.py
+4
-2
ppdet/utils/cli.py
ppdet/utils/cli.py
+1
-1
ppdet/utils/eval_utils.py
ppdet/utils/eval_utils.py
+69
-1
ppdet/utils/post_process.py
ppdet/utils/post_process.py
+212
-0
tools/configure.py
tools/configure.py
+1
-13
tools/eval.py
tools/eval.py
+29
-4
tools/train.py
tools/train.py
+5
-1
未找到文件。
configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
0 → 100644
浏览文件 @
27ec95d7
architecture
:
CascadeMaskRCNN
train_feed
:
MaskRCNNTrainFeed
eval_feed
:
MaskRCNNEvalFeed
test_feed
:
MaskRCNNTestFeed
max_iters
:
300000
snapshot_iter
:
10000
use_gpu
:
true
log_iter
:
20
log_smooth_window
:
20
save_dir
:
output
pretrain_weights
:
https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar
weights
:
output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
metric
:
COCO
num_classes
:
81
CascadeMaskRCNN
:
backbone
:
SENet
fpn
:
FPN
rpn_head
:
FPNRPNHead
roi_extractor
:
FPNRoIAlign
bbox_head
:
CascadeBBoxHead
bbox_assigner
:
CascadeBBoxAssigner
mask_assigner
:
MaskAssigner
mask_head
:
MaskHead
SENet
:
depth
:
152
feature_maps
:
[
2
,
3
,
4
,
5
]
freeze_at
:
2
group_width
:
4
groups
:
64
norm_type
:
bn
freeze_norm
:
True
variant
:
d
dcn_v2_stages
:
[
3
,
4
,
5
]
std_senet
:
True
FPN
:
max_level
:
6
min_level
:
2
num_chan
:
256
spatial_scale
:
[
0.03125
,
0.0625
,
0.125
,
0.25
]
freeze_norm
:
False
norm_type
:
gn
FPNRPNHead
:
anchor_generator
:
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
variance
:
[
1.0
,
1.0
,
1.0
,
1.0
]
anchor_start_size
:
32
max_level
:
6
min_level
:
2
num_chan
:
256
rpn_target_assign
:
rpn_batch_size_per_im
:
256
rpn_fg_fraction
:
0.5
rpn_negative_overlap
:
0.3
rpn_positive_overlap
:
0.7
rpn_straddle_thresh
:
0.0
train_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
2000
post_nms_top_n
:
2000
test_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
1000
post_nms_top_n
:
1000
FPNRoIAlign
:
canconical_level
:
4
canonical_size
:
224
max_level
:
5
min_level
:
2
box_resolution
:
7
sampling_ratio
:
2
mask_resolution
:
14
MaskHead
:
dilation
:
1
conv_dim
:
256
num_convs
:
4
resolution
:
28
norm_type
:
gn
CascadeBBoxAssigner
:
batch_size_per_im
:
512
bbox_reg_weights
:
[
10
,
20
,
30
]
bg_thresh_hi
:
[
0.5
,
0.6
,
0.7
]
bg_thresh_lo
:
[
0.0
,
0.0
,
0.0
]
fg_fraction
:
0.25
fg_thresh
:
[
0.5
,
0.6
,
0.7
]
MaskAssigner
:
resolution
:
28
CascadeBBoxHead
:
head
:
CascadeXConvNormHead
nms
:
keep_top_k
:
100
nms_threshold
:
0.5
score_threshold
:
0.05
CascadeXConvNormHead
:
norm_type
:
gn
MultiScaleTEST
:
score_thresh
:
0.05
nms_thresh
:
0.5
detections_per_im
:
100
enable_voting
:
true
vote_thresh
:
0.9
LearningRate
:
base_lr
:
0.01
schedulers
:
-
!PiecewiseDecay
gamma
:
0.1
milestones
:
[
240000
,
280000
]
-
!LinearWarmup
start_factor
:
0.01
steps
:
2000
OptimizerBuilder
:
optimizer
:
momentum
:
0.9
type
:
Momentum
regularizer
:
factor
:
0.0001
type
:
L2
MaskRCNNTrainFeed
:
# batch size per device
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
image_dir
:
train2017
annotation
:
annotations/instances_train2017.json
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
with_mixup
:
False
-
!RandomFlipImage
is_mask_flip
:
true
is_normalized
:
false
prob
:
0.5
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!ResizeImage
interp
:
1
target_size
:
-
416
-
448
-
480
-
512
-
544
-
576
-
608
-
640
-
672
-
704
-
736
-
768
-
800
-
832
-
864
-
896
-
928
-
960
-
992
-
1024
-
1056
-
1088
-
1120
-
1152
-
1184
-
1216
-
1248
-
1280
-
1312
-
1344
-
1376
-
1408
max_size
:
1600
use_cv2
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
num_workers
:
8
MaskRCNNEvalFeed
:
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_val2017.json
image_dir
:
val2017
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!MultiscaleTestResize
origin_target_size
:
800
origin_max_size
:
1333
target_size
:
-
400
-
500
-
600
-
700
-
900
-
1000
-
1100
-
1200
max_size
:
2000
use_flip
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadMSTest
pad_to_stride
:
32
# num_scale = (len(target_size) + 1) * (1 + use_flip)
num_scale
:
18
num_workers
:
2
MaskRCNNTestFeed
:
batch_size
:
1
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
num_workers
:
2
configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
0 → 100644
浏览文件 @
27ec95d7
architecture
:
CascadeRCNN
train_feed
:
FasterRCNNTrainFeed
eval_feed
:
FasterRCNNEvalFeed
test_feed
:
FasterRCNNTestFeed
max_iters
:
90000
snapshot_iter
:
10000
use_gpu
:
true
log_smooth_window
:
20
save_dir
:
output
pretrain_weights
:
https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights
:
output/cascade_rcnn_r50_fpn_1x/model_final
metric
:
COCO
num_classes
:
81
CascadeRCNN
:
backbone
:
ResNet
fpn
:
FPN
rpn_head
:
FPNRPNHead
roi_extractor
:
FPNRoIAlign
bbox_head
:
CascadeBBoxHead
bbox_assigner
:
CascadeBBoxAssigner
ResNet
:
norm_type
:
affine_channel
depth
:
50
feature_maps
:
[
2
,
3
,
4
,
5
]
freeze_at
:
2
variant
:
b
FPN
:
min_level
:
2
max_level
:
6
num_chan
:
256
spatial_scale
:
[
0.03125
,
0.0625
,
0.125
,
0.25
]
FPNRPNHead
:
anchor_generator
:
anchor_sizes
:
[
32
,
64
,
128
,
256
,
512
]
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
stride
:
[
16.0
,
16.0
]
variance
:
[
1.0
,
1.0
,
1.0
,
1.0
]
anchor_start_size
:
32
min_level
:
2
max_level
:
6
num_chan
:
256
rpn_target_assign
:
rpn_batch_size_per_im
:
256
rpn_fg_fraction
:
0.5
rpn_positive_overlap
:
0.7
rpn_negative_overlap
:
0.3
rpn_straddle_thresh
:
0.0
train_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
2000
post_nms_top_n
:
2000
test_proposal
:
min_size
:
0.0
nms_thresh
:
0.7
pre_nms_top_n
:
1000
post_nms_top_n
:
1000
FPNRoIAlign
:
canconical_level
:
4
canonical_size
:
224
min_level
:
2
max_level
:
5
box_resolution
:
7
sampling_ratio
:
2
CascadeBBoxAssigner
:
batch_size_per_im
:
512
bbox_reg_weights
:
[
10
,
20
,
30
]
bg_thresh_lo
:
[
0.0
,
0.0
,
0.0
]
bg_thresh_hi
:
[
0.5
,
0.6
,
0.7
]
fg_thresh
:
[
0.5
,
0.6
,
0.7
]
fg_fraction
:
0.25
CascadeBBoxHead
:
head
:
CascadeTwoFCHead
nms
:
keep_top_k
:
100
nms_threshold
:
0.5
score_threshold
:
0.05
CascadeTwoFCHead
:
mlp_dim
:
1024
MultiScaleTEST
:
score_thresh
:
0.05
nms_thresh
:
0.5
detections_per_im
:
100
enable_voting
:
true
vote_thresh
:
0.9
LearningRate
:
base_lr
:
0.02
schedulers
:
-
!PiecewiseDecay
gamma
:
0.1
milestones
:
[
60000
,
80000
]
-
!LinearWarmup
start_factor
:
0.3333333333333333
steps
:
500
OptimizerBuilder
:
optimizer
:
momentum
:
0.9
type
:
Momentum
regularizer
:
factor
:
0.0001
type
:
L2
FasterRCNNTrainFeed
:
batch_size
:
2
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_train2017.json
image_dir
:
train2017
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
drop_last
:
false
num_workers
:
2
FasterRCNNEvalFeed
:
batch_size
:
1
dataset
:
dataset_dir
:
dataset/coco
annotation
:
annotations/instances_val2017.json
image_dir
:
val2017
sample_transforms
:
-
!DecodeImage
to_rgb
:
true
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
true
mean
:
-
0.485
-
0.456
-
0.406
std
:
-
0.229
-
0.224
-
0.225
-
!MultiscaleTestResize
origin_target_size
:
800
origin_max_size
:
1333
target_size
:
-
400
-
500
-
600
-
700
-
900
-
1000
-
1100
-
1200
max_size
:
2000
use_flip
:
true
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
-
!PadMSTest
pad_to_stride
:
32
num_scale
:
18
num_workers
:
2
FasterRCNNTestFeed
:
batch_size
:
1
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
batch_transforms
:
-
!PadBatch
pad_to_stride
:
32
drop_last
:
false
num_workers
:
2
ppdet/core/workspace.py
浏览文件 @
27ec95d7
...
...
@@ -27,11 +27,29 @@ from .config.schema import SchemaDict, SharedConfig, extract_schema
from
.config.yaml_helpers
import
serializable
__all__
=
[
'global_config'
,
'load_config'
,
'merge_config'
,
'get_registered_modules'
,
'create'
,
'register'
,
'serializable'
'global_config'
,
'load_config'
,
'merge_config'
,
'get_registered_modules'
,
'create'
,
'register'
,
'serializable'
,
'dump_value'
,
]
def
dump_value
(
value
):
# XXX this is hackish, but collections.abc is not available in python 2
if
hasattr
(
value
,
'__dict__'
)
or
isinstance
(
value
,
(
dict
,
tuple
,
list
)):
value
=
yaml
.
dump
(
value
,
default_flow_style
=
True
)
value
=
value
.
replace
(
'
\n
'
,
''
)
value
=
value
.
replace
(
'...'
,
''
)
return
"'{}'"
.
format
(
value
)
else
:
# primitive types
return
str
(
value
)
class
AttrDict
(
dict
):
"""Single level attribute dict, NOT recursive"""
...
...
@@ -154,9 +172,9 @@ def create(cls_or_name, **kwargs):
target_key
=
config
[
k
]
shared_conf
=
config
.
schema
[
k
].
default
assert
isinstance
(
shared_conf
,
SharedConfig
)
if
target_key
is
not
None
and
not
isinstance
(
target_key
,
SharedConfig
):
continue
# value is given for the module
if
target_key
is
not
None
and
not
isinstance
(
target_key
,
SharedConfig
):
continue
# value is given for the module
elif
shared_conf
.
key
in
global_config
:
# `key` is present in config
kwargs
[
k
]
=
global_config
[
shared_conf
.
key
]
...
...
ppdet/data/data_feed.py
浏览文件 @
27ec95d7
...
...
@@ -27,18 +27,18 @@ from ppdet.data.reader import Reader
from
ppdet.data.transform.operators
import
(
DecodeImage
,
MixupImage
,
NormalizeBox
,
NormalizeImage
,
RandomDistort
,
RandomFlipImage
,
RandomInterpImage
,
ResizeImage
,
ExpandImage
,
CropImage
,
Permute
)
Permute
,
MultiscaleTestResize
)
from
ppdet.data.transform.arrange_sample
import
(
ArrangeRCNN
,
ArrangeEvalRCNN
,
ArrangeTestRCNN
,
ArrangeSSD
,
ArrangeEvalSSD
,
ArrangeTestSSD
,
ArrangeYOLO
,
ArrangeEvalYOLO
,
ArrangeTestYOLO
)
__all__
=
[
'PadBatch'
,
'MultiScale'
,
'RandomShape'
,
'DataSet'
,
'CocoDataSet'
,
'DataFeed'
,
'TrainFeed'
,
'EvalFeed'
,
'FasterRCNNTrainFeed'
,
'MaskRCNNTrainFeed'
,
'FasterRCNNTestFeed'
,
'MaskRCNNTestFeed'
,
'SSDTrainFeed'
,
'SSDEvalFeed'
,
'SSDTestFeed'
,
'YoloTrainFeed'
,
'YoloEvalFeed'
,
'YoloTestFeed'
,
'create_reader'
'PadBatch'
,
'MultiScale'
,
'RandomShape'
,
'PadMSTest'
,
'DataSet'
,
'CocoDataSet'
,
'DataFeed'
,
'TrainFeed'
,
'EvalFeed'
,
'FasterRCNNTrainFeed'
,
'MaskRCNNTrainFeed'
,
'FasterRCNNEvalFeed'
,
'MaskRCNNEvalFeed'
,
'FasterRCNNTestFeed'
,
'MaskRCNNTestFeed'
,
'SSDTrainFeed'
,
'SSDEvalFeed'
,
'SSDTestFeed'
,
'YoloTrainFeed'
,
'YoloEvalFeed'
,
'YoloTestFeed'
,
'create_reader'
]
...
...
@@ -113,6 +113,7 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
pad
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
PadBatch
)]
rand_shape
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
RandomShape
)]
multi_scale
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
MultiScale
)]
pad_ms_test
=
[
t
for
t
in
batch_transforms
if
isinstance
(
t
,
PadMSTest
)]
if
any
(
pad
):
transform_config
[
'IS_PADDING'
]
=
True
...
...
@@ -122,6 +123,10 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
transform_config
[
'RANDOM_SHAPES'
]
=
rand_shape
[
0
].
sizes
if
any
(
multi_scale
):
transform_config
[
'MULTI_SCALES'
]
=
multi_scale
[
0
].
scales
if
any
(
pad_ms_test
):
transform_config
[
'ENABLE_MULTISCALE_TEST'
]
=
True
transform_config
[
'NUM_SCALE'
]
=
feed
.
num_scale
transform_config
[
'COARSEST_STRIDE'
]
=
pad_ms_test
[
0
].
pad_to_stride
if
hasattr
(
inspect
,
'getfullargspec'
):
argspec
=
inspect
.
getfullargspec
...
...
@@ -186,6 +191,20 @@ class RandomShape(object):
self
.
sizes
=
sizes
@
serializable
class
PadMSTest
(
object
):
"""
Padding for multi-scale test
Args:
pad_to_stride (int): pad to multiple of strides, e.g., 32
"""
def
__init__
(
self
,
pad_to_stride
=
0
):
super
(
PadMSTest
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
@
serializable
class
DataSet
(
object
):
"""
...
...
@@ -502,7 +521,10 @@ class FasterRCNNEvalFeed(DataFeed):
samples
=-
1
,
drop_last
=
False
,
num_workers
=
2
,
use_padded_im_info
=
True
):
use_padded_im_info
=
True
,
enable_multiscale
=
False
,
num_scale
=
1
,
enable_aug_flip
=
False
):
sample_transforms
.
append
(
ArrangeEvalRCNN
())
super
(
FasterRCNNEvalFeed
,
self
).
__init__
(
dataset
,
...
...
@@ -517,6 +539,9 @@ class FasterRCNNEvalFeed(DataFeed):
num_workers
=
num_workers
,
use_padded_im_info
=
use_padded_im_info
)
self
.
mode
=
'VAL'
self
.
enable_multiscale
=
enable_multiscale
self
.
num_scale
=
num_scale
self
.
enable_aug_flip
=
enable_aug_flip
@
register
...
...
@@ -640,7 +665,10 @@ class MaskRCNNEvalFeed(DataFeed):
drop_last
=
False
,
num_workers
=
2
,
use_process
=
False
,
use_padded_im_info
=
True
):
use_padded_im_info
=
True
,
enable_multiscale
=
False
,
num_scale
=
1
,
enable_aug_flip
=
False
):
sample_transforms
.
append
(
ArrangeTestRCNN
())
super
(
MaskRCNNEvalFeed
,
self
).
__init__
(
dataset
,
...
...
@@ -656,6 +684,9 @@ class MaskRCNNEvalFeed(DataFeed):
use_process
=
use_process
,
use_padded_im_info
=
use_padded_im_info
)
self
.
mode
=
'VAL'
self
.
enable_multiscale
=
enable_multiscale
self
.
num_scale
=
num_scale
self
.
enable_aug_flip
=
enable_aug_flip
@
register
...
...
ppdet/data/reader.py
浏览文件 @
27ec95d7
...
...
@@ -68,8 +68,8 @@ class Reader(object):
mapped_ds
=
map
(
sc
,
mapper
,
worker_args
)
# In VAL mode, gt_bbox, gt_label can be empty, and should
# not be dropped
batched_ds
=
batch
(
mapped_ds
,
batchsize
,
drop_last
,
drop_empty
=
(
mode
!=
"VAL"
))
batched_ds
=
batch
(
mapped_ds
,
batchsize
,
drop_last
,
drop_empty
=
(
mode
!=
"VAL"
))
trans_conf
=
{
k
.
lower
():
v
for
k
,
v
in
self
.
_trans_conf
[
mode
].
items
()}
need_keys
=
{
...
...
@@ -78,6 +78,8 @@ class Reader(object):
'random_shapes'
,
'multi_scales'
,
'use_padded_im_info'
,
'enable_multiscale_test'
,
'num_scale'
,
}
bm_config
=
{
key
:
value
...
...
@@ -125,12 +127,15 @@ class Reader(object):
return
self
.
_make_reader
(
'TEST'
)
@
classmethod
def
create
(
cls
,
mode
,
data_config
,
transform_config
,
max_iter
=-
1
,
my_source
=
None
,
ret_iter
=
True
):
def
create
(
cls
,
mode
,
data_config
,
transform_config
,
max_iter
=-
1
,
my_source
=
None
,
ret_iter
=
True
):
""" create a specific reader """
reader
=
Reader
({
mode
:
data_config
},
{
mode
:
transform_config
},
max_iter
)
reader
=
Reader
({
mode
:
data_config
},
{
mode
:
transform_config
},
max_iter
)
if
ret_iter
:
return
reader
.
_make_reader
(
mode
,
my_source
)
else
:
...
...
ppdet/data/transform/arrange_sample.py
浏览文件 @
27ec95d7
...
...
@@ -110,8 +110,11 @@ class ArrangeEvalRCNN(BaseOperator):
(image, im_info, im_id, im_shape, gt_bbox,
gt_class, difficult)
"""
im
=
sample
[
'image'
]
keys
=
list
(
sample
.
keys
())
ims
=
[]
keys
=
sorted
(
list
(
sample
.
keys
()))
for
k
in
keys
:
if
'image'
in
k
:
ims
.
append
(
sample
[
k
])
if
'im_info'
in
keys
:
im_info
=
sample
[
'im_info'
]
else
:
...
...
@@ -127,7 +130,9 @@ class ArrangeEvalRCNN(BaseOperator):
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
difficult
=
sample
[
'difficult'
]
outs
=
(
im
,
im_info
,
im_id
,
im_shape
,
gt_bbox
,
gt_class
,
difficult
)
remain_list
=
[
im_info
,
im_id
,
im_shape
,
gt_bbox
,
gt_class
,
difficult
]
ims
.
extend
(
remain_list
)
outs
=
tuple
(
ims
)
return
outs
...
...
@@ -148,10 +153,13 @@ class ArrangeTestRCNN(BaseOperator):
context: a dict which contains additional info.
Returns:
sample: a tuple containing the following items:
(image, im_info, im_id)
(image, im_info, im_id
, im_shape
)
"""
im
=
sample
[
'image'
]
keys
=
list
(
sample
.
keys
())
ims
=
[]
keys
=
sorted
(
list
(
sample
.
keys
()))
for
k
in
keys
:
if
'image'
in
k
:
ims
.
append
(
sample
[
k
])
if
'im_info'
in
keys
:
im_info
=
sample
[
'im_info'
]
else
:
...
...
@@ -164,7 +172,9 @@ class ArrangeTestRCNN(BaseOperator):
# bbox prediction needs im_info as input in format of [N, 3],
# so im_shape is appended by 1 to match dimension.
im_shape
=
np
.
array
((
h
,
w
,
1
),
dtype
=
np
.
float32
)
outs
=
(
im
,
im_info
,
im_id
,
im_shape
)
remain_list
=
[
im_info
,
im_id
,
im_shape
]
ims
.
extend
(
remain_list
)
outs
=
tuple
(
ims
)
return
outs
...
...
ppdet/data/transform/operators.py
浏览文件 @
27ec95d7
...
...
@@ -121,6 +121,105 @@ class DecodeImage(BaseOperator):
return
sample
@
register_op
class
MultiscaleTestResize
(
BaseOperator
):
def
__init__
(
self
,
origin_target_size
=
800
,
origin_max_size
=
1333
,
target_size
=
[],
max_size
=
2000
,
interp
=
cv2
.
INTER_LINEAR
,
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
target_size (list): A list of target sizes of image's short side.
max_size (int): the max size of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super
(
MultiscaleTestResize
,
self
).
__init__
()
self
.
origin_target_size
=
int
(
origin_target_size
)
self
.
origin_max_size
=
int
(
origin_max_size
)
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_flip
=
use_flip
if
not
isinstance
(
target_size
,
list
):
raise
TypeError
(
"Type of target_size is invalid. Must be List, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
if
not
(
isinstance
(
self
.
origin_target_size
,
int
)
and
isinstance
(
self
.
origin_max_size
,
int
)
and
isinstance
(
self
.
max_size
,
int
)
and
isinstance
(
self
.
interp
,
int
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy for multi-scale test.
"""
origin_ims
=
{}
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
base_name_list
=
[
'image'
]
origin_ims
[
'image'
]
=
im
if
self
.
use_flip
:
sample
[
'flip_image'
]
=
im
[:,
::
-
1
,
:]
base_name_list
.
append
(
'flip_image'
)
origin_ims
[
'flip_image'
]
=
sample
[
'flip_image'
]
im_info
=
[]
for
base_name
in
base_name_list
:
im_scale
=
float
(
self
.
origin_target_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
origin_max_size
:
im_scale
=
float
(
self
.
origin_max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
im_info
.
extend
([
resize_h
,
resize_w
,
im_scale
])
sample
[
base_name
]
=
im_resize
for
i
,
size
in
enumerate
(
self
.
target_size
):
im_scale
=
float
(
size
)
/
float
(
im_size_min
)
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
im_info
.
extend
([
resize_h
,
resize_w
,
im_scale
])
name
=
base_name
+
'_scale_'
+
str
(
i
)
sample
[
name
]
=
im_resize
sample
[
'im_info'
]
=
np
.
array
(
im_info
,
dtype
=
np
.
float32
)
return
sample
@
register_op
class
ResizeImage
(
BaseOperator
):
def
__init__
(
self
,
...
...
@@ -183,9 +282,12 @@ class ResizeImage(BaseOperator):
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
sample
[
'im_info'
]
=
np
.
array
(
[
resize_h
,
resize_w
,
im_scale
],
dtype
=
np
.
float32
)
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
if
'im_info'
in
sample
and
sample
[
'im_info'
][
2
]
!=
1.
:
sample
[
'im_info'
]
=
np
.
append
(
list
(
sample
[
'im_info'
]),
im_info
).
astype
(
np
.
float32
)
else
:
sample
[
'im_info'
]
=
np
.
array
(
im_info
).
astype
(
np
.
float32
)
else
:
im_scale_x
=
float
(
selected_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
selected_size
)
/
float
(
im_shape
[
0
])
...
...
@@ -331,19 +433,21 @@ class NormalizeImage(BaseOperator):
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
sample
[
'image'
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
'image'
]
=
im
for
k
in
sample
.
keys
():
if
'image'
in
k
:
im
=
sample
[
k
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
k
]
=
im
return
sample
...
...
@@ -785,13 +889,15 @@ class Permute(BaseOperator):
def
__call__
(
self
,
sample
,
context
=
None
):
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
sample
[
'image'
]
=
im
for
k
in
sample
.
keys
():
if
'image'
in
k
:
im
=
sample
[
k
]
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
sample
[
k
]
=
im
return
sample
...
...
ppdet/data/transform/post_map.py
浏览文件 @
27ec95d7
...
...
@@ -27,7 +27,9 @@ def build_post_map(coarsest_stride=1,
is_padding
=
False
,
random_shapes
=
[],
multi_scales
=
[],
use_padded_im_info
=
False
):
use_padded_im_info
=
False
,
enable_multiscale_test
=
False
,
num_scale
=
1
):
"""
Build a mapper for post-processing batches
...
...
@@ -36,10 +38,13 @@ def build_post_map(coarsest_stride=1,
{
coarsest_stride (int): stride of the coarsest FPN level
is_padding (bool): whether to padding in minibatch
random_shapes: (list of int): resize to image to random
shapes, [] for not resize.
multi_scales: (list of int): resize image by random
scales, [] for not resize.
random_shapes (list of int): resize to image to random shapes,
[] for not resize.
multi_scales (list of int): resize image by random scales,
[] for not resize.
use_padded_im_info (bool): whether to update im_info after padding
enable_multiscale_test (bool): whether to use multiscale test.
num_scale (int) : the number of scales for multiscale test.
}
Returns:
a mapper function which accept one argument 'batch' and
...
...
@@ -66,6 +71,33 @@ def build_post_map(coarsest_stride=1,
padding_batch
.
append
((
padding_im
,
)
+
data
[
1
:])
return
padding_batch
def
padding_multiscale_test
(
batch_data
):
if
len
(
batch_data
)
!=
1
:
raise
NotImplementedError
(
"Batch size must be 1 when using multiscale test, but now batch size is {}"
.
format
(
len
(
batch_data
)))
if
coarsest_stride
>
1
:
padding_batch
=
[]
padding_images
=
[]
data
=
batch_data
[
0
]
for
i
,
input
in
enumerate
(
data
):
if
i
<
num_scale
:
im_c
,
im_h
,
im_w
=
input
.
shape
max_h
=
int
(
np
.
ceil
(
im_h
/
coarsest_stride
)
*
coarsest_stride
)
max_w
=
int
(
np
.
ceil
(
im_w
/
coarsest_stride
)
*
coarsest_stride
)
padding_im
=
np
.
zeros
(
(
im_c
,
max_h
,
max_w
),
dtype
=
np
.
float32
)
padding_im
[:,
:
im_h
,
:
im_w
]
=
input
data
[
num_scale
][
3
*
i
:
3
*
i
+
2
]
=
[
max_h
,
max_w
]
padding_batch
.
append
(
padding_im
)
else
:
padding_batch
.
append
(
input
)
return
[
tuple
(
padding_batch
)]
# no need to padding
return
batch_data
def
random_shape
(
batch_data
):
# For YOLO: gt_bbox is normalized, is scale invariant.
shape
=
np
.
random
.
choice
(
random_shapes
)
...
...
@@ -108,6 +140,8 @@ def build_post_map(coarsest_stride=1,
batch_data
=
random_shape
(
batch_data
)
if
len
(
multi_scales
)
>
0
:
batch_data
=
multi_scale_resize
(
batch_data
)
if
enable_multiscale_test
:
batch_data
=
padding_multiscale_test
(
batch_data
)
except
Exception
as
e
:
errmsg
=
"post-process failed with error: "
+
str
(
e
)
logger
.
warn
(
errmsg
)
...
...
ppdet/modeling/architectures/cascade_mask_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -80,21 +80,15 @@ class CascadeMaskRCNN(object):
self
.
cascade_rcnn_loss_weight
=
[
1.0
,
0.5
,
0.25
]
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'gt_mask'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
im
=
feed_vars
[
'image'
]
if
mode
==
'train'
:
gt_box
=
feed_vars
[
'gt_box'
]
is_crowd
=
feed_vars
[
'is_crowd'
]
...
...
@@ -199,55 +193,167 @@ class CascadeMaskRCNN(object):
loss
.
update
({
'loss'
:
total_loss
})
return
loss
else
:
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
mask_name
=
'mask_pred'
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
feed_vars
[
'im_shape'
])
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
def
build_multi_scale
(
self
,
feed_vars
,
mask_branch
=
False
):
required_fields
=
[
'image'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
if
not
mask_branch
:
assert
'im_shape'
in
feed_vars
,
\
"{} has no im_shape field"
.
format
(
feed_vars
)
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
not
mask_branch
:
im_shape
=
feed_vars
[
'im_shape'
]
body_feat_names
=
list
(
body_feats
.
keys
())
proposal_list
=
[]
roi_feat_list
=
[]
rcnn_pred_list
=
[]
proposals
=
None
bbox_pred
=
None
for
i
in
range
(
3
):
if
i
>
0
:
refined_bbox
=
self
.
_decode_box
(
proposals
,
bbox_pred
,
curr_stage
=
i
-
1
,
)
else
:
refined_bbox
=
rois
proposals
=
refined_bbox
proposal_list
.
append
(
proposals
)
# extract roi features
roi_feat
=
self
.
roi_extractor
(
body_feats
,
proposals
,
spatial_scale
)
roi_feat_list
.
append
(
roi_feat
)
# bbox head
cls_score
,
bbox_pred
=
self
.
bbox_head
.
get_output
(
roi_feat
,
wb_scalar
=
1.0
/
self
.
cascade_rcnn_loss_weight
[
i
],
name
=
'_'
+
str
(
i
+
1
)
if
i
>
0
else
''
)
rcnn_pred_list
.
append
((
cls_score
,
bbox_pred
))
# get mask rois
if
self
.
fpn
is
None
:
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
else
:
mask_name
=
'mask_pred_'
+
str
(
i
)
bbox_pred
=
feed_vars
[
'bbox'
]
result
.
update
({
im
.
name
:
im
})
if
'flip'
in
im
.
name
:
mask_name
+=
'_flip'
bbox_pred
=
feed_vars
[
'bbox_flip'
]
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
=
bbox_pred
,
use_multi_test
=
True
)
result
[
mask_name
]
=
mask_pred
return
result
def
single_scale_eval
(
self
,
body_feats
,
spatial_scale
,
im_info
,
mask_name
,
bbox_pred
,
roi_feat_list
=
None
,
rcnn_pred_list
=
None
,
proposal_list
=
None
,
im_shape
=
None
,
use_multi_test
=
False
):
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
if
not
use_multi_test
:
bbox_pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
feed_vars
[
'im_shape'
],
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
self
.
cls_agnostic_bbox_reg
)
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
)
bbox_pred
=
bbox_pred
[
'bbox'
]
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
'mask_pred'
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
mask_name
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
mask_pred
,
bbox_pred
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
_decode_box
(
self
,
proposals
,
bbox_pred
,
curr_stage
):
rcnn_loc_delta_r
=
fluid
.
layers
.
reshape
(
...
...
@@ -269,7 +375,9 @@ class CascadeMaskRCNN(object):
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
,
mask_branch
=
False
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
,
mask_branch
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/cascade_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -74,16 +74,13 @@ class CascadeRCNN(object):
self
.
cascade_rcnn_loss_weight
=
[
1.0
,
0.5
,
0.25
]
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
if
mode
==
'train'
:
...
...
@@ -171,6 +168,98 @@ class CascadeRCNN(object):
self
.
cls_agnostic_bbox_reg
)
return
pred
def
build_multi_scale
(
self
,
feed_vars
):
required_fields
=
[
'image'
,
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
im_shape
=
feed_vars
[
'im_shape'
]
# backbone
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
body_feat_names
=
list
(
body_feats
.
keys
())
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
# rpn proposals
rpn_rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
proposal_list
=
[]
roi_feat_list
=
[]
rcnn_pred_list
=
[]
proposals
=
None
bbox_pred
=
None
for
i
in
range
(
3
):
if
i
>
0
:
refined_bbox
=
self
.
_decode_box
(
proposals
,
bbox_pred
,
curr_stage
=
i
-
1
,
)
else
:
refined_bbox
=
rpn_rois
proposals
=
refined_bbox
proposal_list
.
append
(
proposals
)
# extract roi features
roi_feat
=
self
.
roi_extractor
(
body_feats
,
proposals
,
spatial_scale
)
roi_feat_list
.
append
(
roi_feat
)
# bbox head
cls_score
,
bbox_pred
=
self
.
bbox_head
.
get_output
(
roi_feat
,
wb_scalar
=
1.0
/
self
.
cascade_rcnn_loss_weight
[
i
],
name
=
'_'
+
str
(
i
+
1
)
if
i
>
0
else
''
)
rcnn_pred_list
.
append
((
cls_score
,
bbox_pred
))
# get mask rois
rois
=
proposal_list
[
2
]
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
im_info
,
im_shape
,
roi_feat_list
,
rcnn_pred_list
,
proposal_list
,
self
.
cascade_bbox_reg_weights
,
self
.
cls_agnostic_bbox_reg
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
return
result
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
_decode_box
(
self
,
proposals
,
bbox_pred
,
curr_stage
):
rcnn_loc_delta_r
=
fluid
.
layers
.
reshape
(
bbox_pred
,
(
-
1
,
self
.
cls_agnostic_bbox_reg
,
4
))
...
...
@@ -191,7 +280,9 @@ class CascadeRCNN(object):
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/faster_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -63,6 +63,12 @@ class FasterRCNN(object):
self
.
rpn_only
=
rpn_only
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
if
mode
==
'train'
:
...
...
@@ -136,10 +142,62 @@ class FasterRCNN(object):
im_shape
)
return
pred
def
build_multi_scale
(
self
,
feed_vars
):
required_fields
=
[
'image'
,
'im_info'
,
'im_shape'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
im_shape
=
feed_vars
[
'im_shape'
]
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
body_feat_names
=
list
(
body_feats
.
keys
())
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
self
.
fpn
is
None
:
# in models without FPN, roi extractor only uses the last level of
# feature maps. And body_feat_names[-1] represents the name of
# last feature map.
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
body_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
return
result
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/architectures/mask_rcnn.py
浏览文件 @
27ec95d7
...
...
@@ -69,18 +69,14 @@ class MaskRCNN(object):
self
.
fpn
=
fpn
def
build
(
self
,
feed_vars
,
mode
=
'train'
):
im
=
feed_vars
[
'image'
]
assert
mode
in
[
'train'
,
'test'
],
\
"only 'train' and 'test' mode is supported"
if
mode
==
'train'
:
required_fields
=
[
'gt_label'
,
'gt_box'
,
'gt_mask'
,
'is_crowd'
,
'im_info'
]
else
:
required_fields
=
[
'im_shape'
,
'im_info'
]
for
var
in
required_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
self
.
_input_check
(
required_fields
,
feed_vars
)
im
=
feed_vars
[
'image'
]
im_info
=
feed_vars
[
'im_info'
]
mixed_precision_enabled
=
mixed_precision_global_state
()
is
not
None
...
...
@@ -153,57 +149,135 @@ class MaskRCNN(object):
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
rois
)
rois
=
rois
/
im_scale
return
{
'proposal'
:
rois
}
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
mask_name
=
'mask_pred'
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
mask_name
,
rois
,
im_info
,
feed_vars
[
'im_shape'
],
spatial_scale
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
def
build_multi_scale
(
self
,
feed_vars
,
mask_branch
=
False
):
required_fields
=
[
'image'
,
'im_info'
]
self
.
_input_check
(
required_fields
,
feed_vars
)
ims
=
[]
for
k
in
feed_vars
.
keys
():
if
'image'
in
k
:
ims
.
append
(
feed_vars
[
k
])
result
=
{}
if
not
mask_branch
:
assert
'im_shape'
in
feed_vars
,
\
"{} has no im_shape field"
.
format
(
feed_vars
)
result
.
update
(
feed_vars
)
for
i
,
im
in
enumerate
(
ims
):
im_info
=
fluid
.
layers
.
slice
(
input
=
feed_vars
[
'im_info'
],
axes
=
[
1
],
starts
=
[
3
*
i
],
ends
=
[
3
*
i
+
3
])
body_feats
=
self
.
backbone
(
im
)
result
.
update
(
body_feats
)
# FPN
if
self
.
fpn
is
not
None
:
body_feats
,
spatial_scale
=
self
.
fpn
.
get_output
(
body_feats
)
rois
=
self
.
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
'test'
)
if
not
mask_branch
:
im_shape
=
feed_vars
[
'im_shape'
]
body_feat_names
=
list
(
body_feats
.
keys
())
if
self
.
fpn
is
None
:
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
body_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
True
)
bbox_name
=
'bbox_'
+
str
(
i
)
score_name
=
'score_'
+
str
(
i
)
if
'flip'
in
im
.
name
:
bbox_name
+=
'_flip'
score_name
+=
'_flip'
result
[
bbox_name
]
=
pred
[
'bbox'
]
result
[
score_name
]
=
pred
[
'score'
]
else
:
mask_name
=
'mask_pred_'
+
str
(
i
)
bbox_pred
=
feed_vars
[
'bbox'
]
result
.
update
({
im
.
name
:
im
})
if
'flip'
in
im
.
name
:
mask_name
+=
'_flip'
bbox_pred
=
feed_vars
[
'bbox_flip'
]
mask_pred
,
bbox_pred
=
self
.
single_scale_eval
(
body_feats
,
mask_name
,
rois
,
im_info
,
feed_vars
[
'im_shape'
],
spatial_scale
,
bbox_pred
)
result
[
mask_name
]
=
mask_pred
return
result
def
single_scale_eval
(
self
,
body_feats
,
mask_name
,
rois
,
im_info
,
im_shape
,
spatial_scale
,
bbox_pred
=
None
):
if
self
.
fpn
is
None
:
last_feat
=
body_feats
[
list
(
body_feats
.
keys
())[
-
1
]]
roi_feat
=
self
.
roi_extractor
(
last_feat
,
rois
)
else
:
roi_feat
=
self
.
roi_extractor
(
body_feats
,
rois
,
spatial_scale
)
if
not
bbox_pred
:
bbox_pred
=
self
.
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
feed_vars
[
'im_shape'
]
)
im_shape
)
bbox_pred
=
bbox_pred
[
'bbox'
]
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
'mask_pred'
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
{
'bbox'
:
bbox_pred
,
'mask'
:
mask_pred
}
# share weight
bbox_shape
=
fluid
.
layers
.
shape
(
bbox_pred
)
bbox_size
=
fluid
.
layers
.
reduce_prod
(
bbox_shape
)
bbox_size
=
fluid
.
layers
.
reshape
(
bbox_size
,
[
1
,
1
])
size
=
fluid
.
layers
.
fill_constant
([
1
,
1
],
value
=
6
,
dtype
=
'int32'
)
cond
=
fluid
.
layers
.
less_than
(
x
=
bbox_size
,
y
=
size
)
mask_pred
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
False
,
name
=
mask_name
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
cond
):
fluid
.
layers
.
assign
(
input
=
bbox_pred
,
output
=
mask_pred
)
with
switch
.
default
():
bbox
=
fluid
.
layers
.
slice
(
bbox_pred
,
[
1
],
starts
=
[
2
],
ends
=
[
6
])
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
bbox
)
mask_rois
=
bbox
*
im_scale
if
self
.
fpn
is
None
:
mask_feat
=
self
.
roi_extractor
(
last_feat
,
mask_rois
)
mask_feat
=
self
.
bbox_head
.
get_head_feat
(
mask_feat
)
else
:
mask_feat
=
self
.
roi_extractor
(
body_feats
,
mask_rois
,
spatial_scale
,
is_mask
=
True
)
mask_out
=
self
.
mask_head
.
get_prediction
(
mask_feat
,
bbox
)
fluid
.
layers
.
assign
(
input
=
mask_out
,
output
=
mask_pred
)
return
mask_pred
,
bbox_pred
def
_input_check
(
self
,
require_fields
,
feed_vars
):
for
var
in
require_fields
:
assert
var
in
feed_vars
,
\
"{} has no {} field"
.
format
(
feed_vars
,
var
)
def
train
(
self
,
feed_vars
):
return
self
.
build
(
feed_vars
,
'train'
)
def
eval
(
self
,
feed_vars
):
def
eval
(
self
,
feed_vars
,
multi_scale
=
None
,
mask_branch
=
False
):
if
multi_scale
:
return
self
.
build_multi_scale
(
feed_vars
,
mask_branch
)
return
self
.
build
(
feed_vars
,
'test'
)
def
test
(
self
,
feed_vars
):
...
...
ppdet/modeling/backbones/fpn.py
浏览文件 @
27ec95d7
...
...
@@ -17,7 +17,7 @@ from __future__ import division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
copy
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
Xavier
...
...
@@ -110,6 +110,7 @@ class FPN(object):
their name.
spatial_scale(list): A list of multiplicative spatial scale factor.
"""
spatial_scale
=
copy
.
deepcopy
(
self
.
spatial_scale
)
body_name_list
=
list
(
body_dict
.
keys
())[::
-
1
]
num_backbone_stages
=
len
(
body_name_list
)
self
.
fpn_inner_output
=
[[]
for
_
in
range
(
num_backbone_stages
)]
...
...
@@ -179,7 +180,7 @@ class FPN(object):
fpn_dict
[
fpn_name
]
=
fpn_output
fpn_name_list
.
append
(
fpn_name
)
if
not
self
.
has_extra_convs
and
self
.
max_level
-
self
.
min_level
==
len
(
s
elf
.
s
patial_scale
):
spatial_scale
):
body_top_name
=
fpn_name_list
[
0
]
body_top_extension
=
fluid
.
layers
.
pool2d
(
fpn_dict
[
body_top_name
],
...
...
@@ -189,9 +190,9 @@ class FPN(object):
name
=
body_top_name
+
'_subsampled_2x'
)
fpn_dict
[
body_top_name
+
'_subsampled_2x'
]
=
body_top_extension
fpn_name_list
.
insert
(
0
,
body_top_name
+
'_subsampled_2x'
)
s
elf
.
spatial_scale
.
insert
(
0
,
self
.
spatial_scale
[
0
]
*
0.5
)
s
patial_scale
.
insert
(
0
,
spatial_scale
[
0
]
*
0.5
)
# Coarser FPN levels introduced for RetinaNet
highest_backbone_level
=
self
.
min_level
+
len
(
s
elf
.
s
patial_scale
)
-
1
highest_backbone_level
=
self
.
min_level
+
len
(
spatial_scale
)
-
1
if
self
.
has_extra_convs
and
self
.
max_level
>
highest_backbone_level
:
fpn_blob
=
body_dict
[
body_name_list
[
0
]]
for
i
in
range
(
highest_backbone_level
+
1
,
self
.
max_level
+
1
):
...
...
@@ -215,6 +216,6 @@ class FPN(object):
name
=
fpn_name
)
fpn_dict
[
fpn_name
]
=
fpn_blob
fpn_name_list
.
insert
(
0
,
fpn_name
)
s
elf
.
spatial_scale
.
insert
(
0
,
self
.
spatial_scale
[
0
]
*
0.5
)
s
patial_scale
.
insert
(
0
,
spatial_scale
[
0
]
*
0.5
)
res_dict
=
OrderedDict
([(
k
,
fpn_dict
[
k
])
for
k
in
fpn_name_list
])
return
res_dict
,
s
elf
.
s
patial_scale
return
res_dict
,
spatial_scale
ppdet/modeling/model_input.py
浏览文件 @
27ec95d7
...
...
@@ -17,6 +17,7 @@ from __future__ import print_function
from
__future__
import
division
from
collections
import
OrderedDict
from
ppdet.data.transform.operators
import
*
from
paddle
import
fluid
...
...
@@ -38,7 +39,7 @@ feed_var_def = [
# yapf: enable
def
create_feed
(
feed
,
use_pyreader
=
True
):
def
create_feed
(
feed
,
use_pyreader
=
True
,
sub_prog_feed
=
False
):
image_shape
=
feed
.
image_shape
feed_var_map
=
{
var
[
'name'
]:
var
for
var
in
feed_var_def
}
feed_var_map
[
'image'
]
=
{
...
...
@@ -60,6 +61,58 @@ def create_feed(feed, use_pyreader=True):
feed_var_map
[
'gt_box'
][
'lod_level'
]
=
0
feed_var_map
[
'is_difficult'
][
'lod_level'
]
=
0
base_name_list
=
[
'image'
]
num_scale
=
getattr
(
feed
,
'num_scale'
,
1
)
sample_transform
=
feed
.
sample_transforms
multiscale_test
=
False
aug_flip
=
False
for
t
in
sample_transform
:
if
isinstance
(
t
,
MultiscaleTestResize
):
multiscale_test
=
True
aug_flip
=
t
.
use_flip
assert
(
len
(
t
.
target_size
)
+
1
)
*
(
aug_flip
+
1
)
==
num_scale
,
\
"num_scale: {} is not equal to the actual number of scale: {}."
\
.
format
(
num_scale
,
(
len
(
t
.
target_size
)
+
1
)
*
(
aug_flip
+
1
))
break
if
aug_flip
:
num_scale
//=
2
base_name_list
.
insert
(
0
,
'flip_image'
)
feed_var_map
[
'flip_image'
]
=
{
'name'
:
'flip_image'
,
'shape'
:
image_shape
,
'dtype'
:
'float32'
,
'lod_level'
:
0
}
image_name_list
=
[]
if
multiscale_test
:
for
base_name
in
base_name_list
:
for
i
in
range
(
0
,
num_scale
):
name
=
base_name
if
i
==
0
else
base_name
+
'_scale_'
+
str
(
i
-
1
)
feed_var_map
[
name
]
=
{
'name'
:
name
,
'shape'
:
image_shape
,
'dtype'
:
'float32'
,
'lod_level'
:
0
}
image_name_list
.
append
(
name
)
feed_var_map
[
'im_info'
][
'shape'
]
=
[
feed
.
num_scale
*
3
]
feed
.
fields
=
image_name_list
+
feed
.
fields
[
1
:]
if
sub_prog_feed
:
box_names
=
[
'bbox'
,
'bbox_flip'
]
for
box_name
in
box_names
:
sub_prog_feed
=
{
'name'
:
box_name
,
'shape'
:
[
6
],
'dtype'
:
'float32'
,
'lod_level'
:
1
}
feed
.
fields
=
feed
.
fields
+
[
box_name
]
feed_var_map
[
box_name
]
=
sub_prog_feed
feed_vars
=
OrderedDict
([(
key
,
fluid
.
layers
.
data
(
name
=
feed_var_map
[
key
][
'name'
],
shape
=
feed_var_map
[
key
][
'shape'
],
...
...
ppdet/modeling/roi_heads/bbox_head.py
浏览文件 @
27ec95d7
...
...
@@ -280,7 +280,12 @@ class BBoxHead(object):
loss_bbox
=
fluid
.
layers
.
reduce_mean
(
loss_bbox
)
return
{
'loss_cls'
:
loss_cls
,
'loss_bbox'
:
loss_bbox
}
def
get_prediction
(
self
,
roi_feat
,
rois
,
im_info
,
im_shape
):
def
get_prediction
(
self
,
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
False
):
"""
Get prediction bounding box in test stage.
...
...
@@ -308,5 +313,7 @@ class BBoxHead(object):
bbox_pred
=
fluid
.
layers
.
reshape
(
bbox_pred
,
(
-
1
,
self
.
num_classes
,
4
))
decoded_box
=
self
.
box_coder
(
prior_box
=
boxes
,
target_box
=
bbox_pred
)
cliped_box
=
fluid
.
layers
.
box_clip
(
input
=
decoded_box
,
im_info
=
im_shape
)
if
return_box_score
:
return
{
'bbox'
:
cliped_box
,
'score'
:
cls_prob
}
pred_result
=
self
.
nms
(
bboxes
=
cliped_box
,
scores
=
cls_prob
)
return
{
'bbox'
:
pred_result
}
ppdet/modeling/roi_heads/cascade_head.py
浏览文件 @
27ec95d7
...
...
@@ -146,7 +146,8 @@ class CascadeBBoxHead(object):
rcnn_pred_list
,
proposal_list
,
cascade_bbox_reg_weights
,
cls_agnostic_bbox_reg
=
2
):
cls_agnostic_bbox_reg
=
2
,
return_box_score
=
False
):
"""
Get prediction bounding box in test stage.
:
...
...
@@ -214,7 +215,8 @@ class CascadeBBoxHead(object):
axis
=
1
)
box_out
=
fluid
.
layers
.
box_clip
(
input
=
decoded_box
,
im_info
=
im_shape
)
if
return_box_score
:
return
{
'bbox'
:
box_out
,
'score'
:
boxes_cls_prob_mean
}
pred_result
=
self
.
nms
(
bboxes
=
box_out
,
scores
=
boxes_cls_prob_mean
)
return
{
"bbox"
:
pred_result
}
...
...
ppdet/utils/cli.py
浏览文件 @
27ec95d7
...
...
@@ -16,7 +16,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import
yaml
import
re
from
ppdet.core.workspace
import
get_registered_modules
from
ppdet.core.workspace
import
get_registered_modules
,
dump_value
__all__
=
[
'ColorTTY'
,
'ArgsParser'
]
...
...
ppdet/utils/eval_utils.py
浏览文件 @
27ec95d7
...
...
@@ -24,6 +24,7 @@ import time
import
paddle.fluid
as
fluid
from
ppdet.utils.voc_eval
import
bbox_eval
as
voc_bbox_eval
from
ppdet.utils.post_process
import
mstest_box_post_process
,
mstest_mask_post_process
,
box_flip
__all__
=
[
'parse_fetches'
,
'eval_run'
,
'eval_results'
,
'json_eval_results'
]
...
...
@@ -57,7 +58,52 @@ def parse_fetches(fetches, prog=None, extra_keys=None):
return
keys
,
values
,
cls
def
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
):
def
length2lod
(
length_lod
):
offset_lod
=
[
0
]
for
i
in
length_lod
:
offset_lod
.
append
(
offset_lod
[
-
1
]
+
i
)
return
[
offset_lod
]
def
get_sub_feed
(
input
,
place
):
new_dict
=
{}
res_feed
=
{}
key_name
=
[
'bbox'
,
'im_info'
,
'im_id'
,
'im_shape'
,
'bbox_flip'
]
for
k
in
key_name
:
if
k
in
input
.
keys
():
new_dict
[
k
]
=
input
[
k
]
for
k
in
input
.
keys
():
if
'image'
in
k
:
new_dict
[
k
]
=
input
[
k
]
for
k
,
v
in
new_dict
.
items
():
data_t
=
fluid
.
LoDTensor
()
data_t
.
set
(
v
[
0
],
place
)
if
'bbox'
in
k
:
lod
=
length2lod
(
v
[
1
][
0
])
data_t
.
set_lod
(
lod
)
res_feed
[
k
]
=
data_t
return
res_feed
def
clean_res
(
result
,
keep_name_list
):
clean_result
=
{}
for
k
in
result
.
keys
():
if
k
in
keep_name_list
:
clean_result
[
k
]
=
result
[
k
]
result
.
clear
()
return
clean_result
def
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
,
cfg
=
None
,
sub_prog
=
None
,
sub_keys
=
None
,
sub_values
=
None
):
"""
Run evaluation program, return program outputs.
"""
...
...
@@ -84,6 +130,28 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
k
:
(
np
.
array
(
v
),
v
.
recursive_sequence_lengths
())
for
k
,
v
in
zip
(
keys
,
outs
)
}
multi_scale_test
=
getattr
(
cfg
,
'MultiScaleTEST'
,
None
)
mask_multi_scale_test
=
multi_scale_test
and
'Mask'
in
cfg
.
architecture
if
multi_scale_test
:
post_res
=
mstest_box_post_process
(
res
,
cfg
)
res
.
update
(
post_res
)
if
mask_multi_scale_test
:
place
=
fluid
.
CUDAPlace
(
0
)
if
cfg
.
use_gpu
else
fluid
.
CPUPlace
()
sub_feed
=
get_sub_feed
(
res
,
place
)
sub_prog_outs
=
exe
.
run
(
sub_prog
,
feed
=
sub_feed
,
fetch_list
=
sub_values
,
return_numpy
=
False
)
sub_prog_res
=
{
k
:
(
np
.
array
(
v
),
v
.
recursive_sequence_lengths
())
for
k
,
v
in
zip
(
sub_keys
,
sub_prog_outs
)
}
post_res
=
mstest_mask_post_process
(
sub_prog_res
,
cfg
)
res
.
update
(
post_res
)
if
multi_scale_test
:
res
=
clean_res
(
res
,
[
'im_info'
,
'bbox'
,
'im_id'
,
'im_shape'
,
'mask'
])
results
.
append
(
res
)
if
iter_id
%
100
==
0
:
logger
.
info
(
'Test iter {}'
.
format
(
iter_id
))
...
...
ppdet/utils/post_process.py
0 → 100644
浏览文件 @
27ec95d7
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
logging
import
numpy
as
np
import
paddle.fluid
as
fluid
__all__
=
[
'nms'
]
logger
=
logging
.
getLogger
(
__name__
)
def
box_flip
(
boxes
,
im_shape
):
im_width
=
im_shape
[
0
][
1
]
flipped_boxes
=
boxes
.
copy
()
flipped_boxes
[:,
0
::
4
]
=
im_width
-
boxes
[:,
2
::
4
]
-
1
flipped_boxes
[:,
2
::
4
]
=
im_width
-
boxes
[:,
0
::
4
]
-
1
return
flipped_boxes
def
nms
(
dets
,
thresh
):
"""Apply classic DPM-style greedy NMS."""
if
dets
.
shape
[
0
]
==
0
:
return
[]
scores
=
dets
[:,
0
]
x1
=
dets
[:,
1
]
y1
=
dets
[:,
2
]
x2
=
dets
[:,
3
]
y2
=
dets
[:,
4
]
areas
=
(
x2
-
x1
+
1
)
*
(
y2
-
y1
+
1
)
order
=
scores
.
argsort
()[::
-
1
]
ndets
=
dets
.
shape
[
0
]
suppressed
=
np
.
zeros
((
ndets
),
dtype
=
np
.
int
)
# nominal indices
# _i, _j
# sorted indices
# i, j
# temp variables for box i's (the box currently under consideration)
# ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
# xx1, yy1, xx2, yy2
# w, h
# inter, ovr
for
_i
in
range
(
ndets
):
i
=
order
[
_i
]
if
suppressed
[
i
]
==
1
:
continue
ix1
=
x1
[
i
]
iy1
=
y1
[
i
]
ix2
=
x2
[
i
]
iy2
=
y2
[
i
]
iarea
=
areas
[
i
]
for
_j
in
range
(
_i
+
1
,
ndets
):
j
=
order
[
_j
]
if
suppressed
[
j
]
==
1
:
continue
xx1
=
max
(
ix1
,
x1
[
j
])
yy1
=
max
(
iy1
,
y1
[
j
])
xx2
=
min
(
ix2
,
x2
[
j
])
yy2
=
min
(
iy2
,
y2
[
j
])
w
=
max
(
0.0
,
xx2
-
xx1
+
1
)
h
=
max
(
0.0
,
yy2
-
yy1
+
1
)
inter
=
w
*
h
ovr
=
inter
/
(
iarea
+
areas
[
j
]
-
inter
)
if
ovr
>=
thresh
:
suppressed
[
j
]
=
1
return
np
.
where
(
suppressed
==
0
)[
0
]
def
bbox_area
(
box
):
w
=
box
[
2
]
-
box
[
0
]
+
1
h
=
box
[
3
]
-
box
[
1
]
+
1
return
w
*
h
def
bbox_overlaps
(
x
,
y
):
N
=
x
.
shape
[
0
]
K
=
y
.
shape
[
0
]
overlaps
=
np
.
zeros
((
N
,
K
),
dtype
=
np
.
float32
)
for
k
in
range
(
K
):
y_area
=
bbox_area
(
y
[
k
])
for
n
in
range
(
N
):
iw
=
min
(
x
[
n
,
2
],
y
[
k
,
2
])
-
max
(
x
[
n
,
0
],
y
[
k
,
0
])
+
1
if
iw
>
0
:
ih
=
min
(
x
[
n
,
3
],
y
[
k
,
3
])
-
max
(
x
[
n
,
1
],
y
[
k
,
1
])
+
1
if
ih
>
0
:
x_area
=
bbox_area
(
x
[
n
])
ua
=
x_area
+
y_area
-
iw
*
ih
overlaps
[
n
,
k
]
=
iw
*
ih
/
ua
return
overlaps
def
box_voting
(
nms_dets
,
dets
,
vote_thresh
):
top_dets
=
nms_dets
.
copy
()
top_boxes
=
nms_dets
[:,
1
:]
all_boxes
=
dets
[:,
1
:]
all_scores
=
dets
[:,
0
]
top_to_all_overlaps
=
bbox_overlaps
(
top_boxes
,
all_boxes
)
for
k
in
range
(
nms_dets
.
shape
[
0
]):
inds_to_vote
=
np
.
where
(
top_to_all_overlaps
[
k
]
>=
vote_thresh
)[
0
]
boxes_to_vote
=
all_boxes
[
inds_to_vote
,
:]
ws
=
all_scores
[
inds_to_vote
]
top_dets
[
k
,
1
:]
=
np
.
average
(
boxes_to_vote
,
axis
=
0
,
weights
=
ws
)
return
top_dets
def
get_nms_result
(
boxes
,
scores
,
cfg
):
cls_boxes
=
[[]
for
_
in
range
(
cfg
.
num_classes
)]
for
j
in
range
(
1
,
cfg
.
num_classes
):
inds
=
np
.
where
(
scores
[:,
j
]
>
cfg
.
MultiScaleTEST
[
'score_thresh'
])[
0
]
scores_j
=
scores
[
inds
,
j
]
boxes_j
=
boxes
[
inds
,
j
*
4
:(
j
+
1
)
*
4
]
dets_j
=
np
.
hstack
((
scores_j
[:,
np
.
newaxis
],
boxes_j
)).
astype
(
np
.
float32
,
copy
=
False
)
keep
=
nms
(
dets_j
,
cfg
.
MultiScaleTEST
[
'nms_thresh'
])
nms_dets
=
dets_j
[
keep
,
:]
if
cfg
.
MultiScaleTEST
[
'enable_voting'
]:
nms_dets
=
box_voting
(
nms_dets
,
dets_j
,
cfg
.
MultiScaleTEST
[
'vote_thresh'
])
#add labels
label
=
np
.
array
([
j
for
_
in
range
(
len
(
keep
))])
nms_dets
=
np
.
hstack
((
label
[:,
np
.
newaxis
],
nms_dets
)).
astype
(
np
.
float32
,
copy
=
False
)
cls_boxes
[
j
]
=
nms_dets
# Limit to max_per_image detections **over all classes**
image_scores
=
np
.
hstack
(
[
cls_boxes
[
j
][:,
1
]
for
j
in
range
(
1
,
cfg
.
num_classes
)])
if
len
(
image_scores
)
>
cfg
.
MultiScaleTEST
[
'detections_per_im'
]:
image_thresh
=
np
.
sort
(
image_scores
)[
-
cfg
.
MultiScaleTEST
[
'detections_per_im'
]]
for
j
in
range
(
1
,
cfg
.
num_classes
):
keep
=
np
.
where
(
cls_boxes
[
j
][:,
1
]
>=
image_thresh
)[
0
]
cls_boxes
[
j
]
=
cls_boxes
[
j
][
keep
,
:]
im_results
=
np
.
vstack
([
cls_boxes
[
j
]
for
j
in
range
(
1
,
cfg
.
num_classes
)])
return
im_results
def
mstest_box_post_process
(
result
,
cfg
):
"""
Multi-scale Test
Only available for batch_size=1 now.
"""
post_bbox
=
{}
use_flip
=
False
ms_boxes
=
[]
ms_scores
=
[]
im_shape
=
result
[
'im_shape'
][
0
]
for
k
in
result
.
keys
():
if
'bbox'
in
k
:
boxes
=
result
[
k
][
0
]
boxes
=
np
.
reshape
(
boxes
,
(
-
1
,
4
*
cfg
.
num_classes
))
scores
=
result
[
'score'
+
k
[
4
:]][
0
]
if
'flip'
in
k
:
boxes
=
box_flip
(
boxes
,
im_shape
)
use_flip
=
True
ms_boxes
.
append
(
boxes
)
ms_scores
.
append
(
scores
)
ms_boxes
=
np
.
concatenate
(
ms_boxes
)
ms_scores
=
np
.
concatenate
(
ms_scores
)
bbox_pred
=
get_nms_result
(
ms_boxes
,
ms_scores
,
cfg
)
post_bbox
.
update
({
'bbox'
:
(
bbox_pred
,
[[
len
(
bbox_pred
)]])})
if
use_flip
:
bbox
=
bbox_pred
[:,
2
:]
bbox_flip
=
np
.
append
(
bbox_pred
[:,
:
2
],
box_flip
(
bbox
,
im_shape
),
axis
=
1
)
post_bbox
.
update
({
'bbox_flip'
:
(
bbox_flip
,
[[
len
(
bbox_flip
)]])})
return
post_bbox
def
mstest_mask_post_process
(
result
,
cfg
):
mask_list
=
[]
im_shape
=
result
[
'im_shape'
][
0
]
M
=
cfg
.
FPNRoIAlign
[
'mask_resolution'
]
for
k
in
result
.
keys
():
if
'mask'
in
k
:
masks
=
result
[
k
][
0
]
if
len
(
masks
.
shape
)
!=
4
:
masks
=
np
.
zeros
((
0
,
M
,
M
))
mask_list
.
append
(
masks
)
continue
if
'flip'
in
k
:
masks
=
masks
[:,
:,
:,
::
-
1
]
mask_list
.
append
(
masks
)
mask_pred
=
np
.
mean
(
mask_list
,
axis
=
0
)
return
{
'mask'
:
(
mask_pred
,
[[
len
(
mask_pred
)]])}
tools/configure.py
浏览文件 @
27ec95d7
...
...
@@ -19,7 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import
yaml
from
ppdet.core.workspace
import
get_registered_modules
,
load_config
from
ppdet.core.workspace
import
get_registered_modules
,
load_config
,
dump_value
from
ppdet.utils.cli
import
ColorTTY
,
print_total_cfg
color_tty
=
ColorTTY
()
...
...
@@ -43,18 +43,6 @@ MISC_CONFIG = {
}
def
dump_value
(
value
):
# XXX this is hackish, but collections.abc is not available in python 2
if
hasattr
(
value
,
'__dict__'
)
or
isinstance
(
value
,
(
dict
,
tuple
,
list
)):
value
=
yaml
.
dump
(
value
,
default_flow_style
=
True
)
value
=
value
.
replace
(
'
\n
'
,
''
)
value
=
value
.
replace
(
'...'
,
''
)
return
"'{}'"
.
format
(
value
)
else
:
# primitive types
return
str
(
value
)
def
dump_config
(
module
,
minimal
=
False
):
args
=
module
.
schema
.
values
()
if
minimal
:
...
...
tools/eval.py
浏览文件 @
27ec95d7
...
...
@@ -59,7 +59,6 @@ def main():
raise
ValueError
(
"'architecture' not specified in config file."
)
merge_config
(
FLAGS
.
opt
)
# check if set use_gpu=True in paddlepaddle cpu version
check_gpu
(
cfg
.
use_gpu
)
print_total_cfg
(
cfg
)
...
...
@@ -69,6 +68,8 @@ def main():
else
:
eval_feed
=
create
(
cfg
.
eval_feed
)
multi_scale_test
=
getattr
(
cfg
,
'MultiScaleTEST'
,
None
)
# define executor
place
=
fluid
.
CUDAPlace
(
0
)
if
cfg
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -80,9 +81,8 @@ def main():
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
pyreader
,
feed_vars
=
create_feed
(
eval_feed
)
fetches
=
model
.
eval
(
feed_vars
)
fetches
=
model
.
eval
(
feed_vars
,
multi_scale_test
)
eval_prog
=
eval_prog
.
clone
(
True
)
reader
=
create_reader
(
eval_feed
,
args_path
=
FLAGS
.
dataset_dir
)
pyreader
.
decorate_sample_list_generator
(
reader
,
place
)
...
...
@@ -120,7 +120,32 @@ def main():
callable
(
model
.
is_bbox_normalized
):
is_bbox_normalized
=
model
.
is_bbox_normalized
()
results
=
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
)
sub_eval_prog
=
None
sub_keys
=
None
sub_values
=
None
# build sub-program
if
'Mask'
in
main_arch
and
multi_scale_test
:
sub_eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
sub_eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
_
,
feed_vars
=
create_feed
(
eval_feed
,
use_pyreader
=
False
,
sub_prog_feed
=
True
)
sub_fetches
=
model
.
eval
(
feed_vars
,
multi_scale_test
,
mask_branch
=
True
)
extra_keys
=
[]
if
cfg
.
metric
==
'COCO'
:
extra_keys
=
[
'im_id'
,
'im_shape'
]
if
cfg
.
metric
==
'VOC'
:
extra_keys
=
[
'gt_box'
,
'gt_label'
,
'is_difficult'
]
sub_keys
,
sub_values
,
_
=
parse_fetches
(
sub_fetches
,
sub_eval_prog
,
extra_keys
)
sub_eval_prog
=
sub_eval_prog
.
clone
(
True
)
if
'weights'
in
cfg
:
checkpoint
.
load_params
(
exe
,
sub_eval_prog
,
cfg
.
weights
)
results
=
eval_run
(
exe
,
compile_program
,
pyreader
,
keys
,
values
,
cls
,
cfg
,
sub_eval_prog
,
sub_keys
,
sub_values
)
# evaluation
resolution
=
None
...
...
tools/train.py
浏览文件 @
27ec95d7
...
...
@@ -73,9 +73,13 @@ def main():
raise
ValueError
(
"'architecture' not specified in config file."
)
merge_config
(
FLAGS
.
opt
)
if
'log_iter'
not
in
cfg
:
cfg
.
log_iter
=
20
if
'multi_scale_test'
not
in
cfg
:
cfg
.
multi_scale_test
=
False
ignore_params
=
cfg
.
finetune_exclude_pretrained_params
\
if
'finetune_exclude_pretrained_params'
in
cfg
else
[]
...
...
@@ -140,7 +144,7 @@ def main():
with
fluid
.
unique_name
.
guard
():
model
=
create
(
main_arch
)
eval_pyreader
,
feed_vars
=
create_feed
(
eval_feed
)
fetches
=
model
.
eval
(
feed_vars
)
fetches
=
model
.
eval
(
feed_vars
,
cfg
.
multi_scale_test
)
eval_prog
=
eval_prog
.
clone
(
True
)
eval_reader
=
create_reader
(
eval_feed
,
args_path
=
FLAGS
.
dataset_dir
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录