提交 27ec95d7 编写于 作者: W wangguanzhong 提交者: GitHub

add multi-scale test (#3376)

* add multi-scale test
上级 bdf0fdc1
architecture: CascadeMaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
max_iters: 300000
snapshot_iter: 10000
use_gpu: true
log_iter: 20
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar
weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
metric: COCO
num_classes: 81
CascadeMaskRCNN:
backbone: SENet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: CascadeBBoxHead
bbox_assigner: CascadeBBoxAssigner
mask_assigner: MaskAssigner
mask_head: MaskHead
SENet:
depth: 152
feature_maps: [2, 3, 4, 5]
freeze_at: 2
group_width: 4
groups: 64
norm_type: bn
freeze_norm: True
variant: d
dcn_v2_stages: [3, 4, 5]
std_senet: True
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
freeze_norm: False
norm_type: gn
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
mask_resolution: 14
MaskHead:
dilation: 1
conv_dim: 256
num_convs: 4
resolution: 28
norm_type: gn
CascadeBBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [10, 20, 30]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_fraction: 0.25
fg_thresh: [0.5, 0.6, 0.7]
MaskAssigner:
resolution: 28
CascadeBBoxHead:
head: CascadeXConvNormHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
CascadeXConvNormHead:
norm_type: gn
MultiScaleTEST:
score_thresh: 0.05
nms_thresh: 0.5
detections_per_im: 100
enable_voting: true
vote_thresh: 0.9
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 280000]
- !LinearWarmup
start_factor: 0.01
steps: 2000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: dataset/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
sample_transforms:
- !DecodeImage
to_rgb: False
with_mixup: False
- !RandomFlipImage
is_mask_flip: true
is_normalized: false
prob: 0.5
- !NormalizeImage
is_channel_first: false
is_scale: False
mean:
- 102.9801
- 115.9465
- 122.7717
std:
- 1.0
- 1.0
- 1.0
- !ResizeImage
interp: 1
target_size:
- 416
- 448
- 480
- 512
- 544
- 576
- 608
- 640
- 672
- 704
- 736
- 768
- 800
- 832
- 864
- 896
- 928
- 960
- 992
- 1024
- 1056
- 1088
- 1120
- 1152
- 1184
- 1216
- 1248
- 1280
- 1312
- 1344
- 1376
- 1408
max_size: 1600
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 8
MaskRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
sample_transforms:
- !DecodeImage
to_rgb: False
- !NormalizeImage
is_channel_first: false
is_scale: False
mean:
- 102.9801
- 115.9465
- 122.7717
std:
- 1.0
- 1.0
- 1.0
- !MultiscaleTestResize
origin_target_size: 800
origin_max_size: 1333
target_size:
- 400
- 500
- 600
- 700
- 900
- 1000
- 1100
- 1200
max_size: 2000
use_flip: true
- !Permute
channel_first: true
to_bgr: false
batch_transforms:
- !PadMSTest
pad_to_stride: 32
# num_scale = (len(target_size) + 1) * (1 + use_flip)
num_scale: 18
num_workers: 2
MaskRCNNTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
architecture: CascadeRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 90000
snapshot_iter: 10000
use_gpu: true
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_rcnn_r50_fpn_1x/model_final
metric: COCO
num_classes: 81
CascadeRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: CascadeBBoxHead
bbox_assigner: CascadeBBoxAssigner
ResNet:
norm_type: affine_channel
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
variant: b
FPN:
min_level: 2
max_level: 6
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
min_level: 2
max_level: 6
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
min_level: 2
max_level: 5
box_resolution: 7
sampling_ratio: 2
CascadeBBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [10, 20, 30]
bg_thresh_lo: [0.0, 0.0, 0.0]
bg_thresh_hi: [0.5, 0.6, 0.7]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
CascadeBBoxHead:
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
CascadeTwoFCHead:
mlp_dim: 1024
MultiScaleTEST:
score_thresh: 0.05
nms_thresh: 0.5
detections_per_im: 100
enable_voting: true
vote_thresh: 0.9
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [60000, 80000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 2
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
batch_transforms:
- !PadBatch
pad_to_stride: 32
drop_last: false
num_workers: 2
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
sample_transforms:
- !DecodeImage
to_rgb: true
- !NormalizeImage
is_channel_first: false
is_scale: true
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
- !MultiscaleTestResize
origin_target_size: 800
origin_max_size: 1333
target_size:
- 400
- 500
- 600
- 700
- 900
- 1000
- 1100
- 1200
max_size: 2000
use_flip: true
- !Permute
channel_first: true
to_bgr: false
batch_transforms:
- !PadMSTest
pad_to_stride: 32
num_scale: 18
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms:
- !PadBatch
pad_to_stride: 32
drop_last: false
num_workers: 2
......@@ -27,11 +27,29 @@ from .config.schema import SchemaDict, SharedConfig, extract_schema
from .config.yaml_helpers import serializable
__all__ = [
'global_config', 'load_config', 'merge_config', 'get_registered_modules',
'create', 'register', 'serializable'
'global_config',
'load_config',
'merge_config',
'get_registered_modules',
'create',
'register',
'serializable',
'dump_value',
]
def dump_value(value):
# XXX this is hackish, but collections.abc is not available in python 2
if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
value = yaml.dump(value, default_flow_style=True)
value = value.replace('\n', '')
value = value.replace('...', '')
return "'{}'".format(value)
else:
# primitive types
return str(value)
class AttrDict(dict):
"""Single level attribute dict, NOT recursive"""
......@@ -154,9 +172,9 @@ def create(cls_or_name, **kwargs):
target_key = config[k]
shared_conf = config.schema[k].default
assert isinstance(shared_conf, SharedConfig)
if target_key is not None and not isinstance(
target_key, SharedConfig):
continue # value is given for the module
if target_key is not None and not isinstance(target_key,
SharedConfig):
continue # value is given for the module
elif shared_conf.key in global_config:
# `key` is present in config
kwargs[k] = global_config[shared_conf.key]
......
......@@ -27,18 +27,18 @@ from ppdet.data.reader import Reader
from ppdet.data.transform.operators import (
DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort,
RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage,
Permute)
Permute, MultiscaleTestResize)
from ppdet.data.transform.arrange_sample import (
ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD,
ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
__all__ = [
'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet',
'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
'MaskRCNNTrainFeed', 'FasterRCNNTestFeed', 'MaskRCNNTestFeed',
'SSDTrainFeed', 'SSDEvalFeed', 'SSDTestFeed', 'YoloTrainFeed',
'YoloEvalFeed', 'YoloTestFeed', 'create_reader'
'PadBatch', 'MultiScale', 'RandomShape', 'PadMSTest', 'DataSet',
'CocoDataSet', 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
'MaskRCNNTrainFeed', 'FasterRCNNEvalFeed', 'MaskRCNNEvalFeed',
'FasterRCNNTestFeed', 'MaskRCNNTestFeed', 'SSDTrainFeed', 'SSDEvalFeed',
'SSDTestFeed', 'YoloTrainFeed', 'YoloEvalFeed', 'YoloTestFeed',
'create_reader'
]
......@@ -113,6 +113,7 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
pad = [t for t in batch_transforms if isinstance(t, PadBatch)]
rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)]
multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)]
pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)]
if any(pad):
transform_config['IS_PADDING'] = True
......@@ -122,6 +123,10 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes
if any(multi_scale):
transform_config['MULTI_SCALES'] = multi_scale[0].scales
if any(pad_ms_test):
transform_config['ENABLE_MULTISCALE_TEST'] = True
transform_config['NUM_SCALE'] = feed.num_scale
transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride
if hasattr(inspect, 'getfullargspec'):
argspec = inspect.getfullargspec
......@@ -186,6 +191,20 @@ class RandomShape(object):
self.sizes = sizes
@serializable
class PadMSTest(object):
"""
Padding for multi-scale test
Args:
pad_to_stride (int): pad to multiple of strides, e.g., 32
"""
def __init__(self, pad_to_stride=0):
super(PadMSTest, self).__init__()
self.pad_to_stride = pad_to_stride
@serializable
class DataSet(object):
"""
......@@ -502,7 +521,10 @@ class FasterRCNNEvalFeed(DataFeed):
samples=-1,
drop_last=False,
num_workers=2,
use_padded_im_info=True):
use_padded_im_info=True,
enable_multiscale=False,
num_scale=1,
enable_aug_flip=False):
sample_transforms.append(ArrangeEvalRCNN())
super(FasterRCNNEvalFeed, self).__init__(
dataset,
......@@ -517,6 +539,9 @@ class FasterRCNNEvalFeed(DataFeed):
num_workers=num_workers,
use_padded_im_info=use_padded_im_info)
self.mode = 'VAL'
self.enable_multiscale = enable_multiscale
self.num_scale = num_scale
self.enable_aug_flip = enable_aug_flip
@register
......@@ -640,7 +665,10 @@ class MaskRCNNEvalFeed(DataFeed):
drop_last=False,
num_workers=2,
use_process=False,
use_padded_im_info=True):
use_padded_im_info=True,
enable_multiscale=False,
num_scale=1,
enable_aug_flip=False):
sample_transforms.append(ArrangeTestRCNN())
super(MaskRCNNEvalFeed, self).__init__(
dataset,
......@@ -656,6 +684,9 @@ class MaskRCNNEvalFeed(DataFeed):
use_process=use_process,
use_padded_im_info=use_padded_im_info)
self.mode = 'VAL'
self.enable_multiscale = enable_multiscale
self.num_scale = num_scale
self.enable_aug_flip = enable_aug_flip
@register
......
......@@ -68,8 +68,8 @@ class Reader(object):
mapped_ds = map(sc, mapper, worker_args)
# In VAL mode, gt_bbox, gt_label can be empty, and should
# not be dropped
batched_ds = batch(mapped_ds, batchsize, drop_last,
drop_empty=(mode!="VAL"))
batched_ds = batch(
mapped_ds, batchsize, drop_last, drop_empty=(mode != "VAL"))
trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
need_keys = {
......@@ -78,6 +78,8 @@ class Reader(object):
'random_shapes',
'multi_scales',
'use_padded_im_info',
'enable_multiscale_test',
'num_scale',
}
bm_config = {
key: value
......@@ -125,12 +127,15 @@ class Reader(object):
return self._make_reader('TEST')
@classmethod
def create(cls, mode, data_config,
transform_config, max_iter=-1,
my_source=None, ret_iter=True):
def create(cls,
mode,
data_config,
transform_config,
max_iter=-1,
my_source=None,
ret_iter=True):
""" create a specific reader """
reader = Reader({mode: data_config},
{mode: transform_config}, max_iter)
reader = Reader({mode: data_config}, {mode: transform_config}, max_iter)
if ret_iter:
return reader._make_reader(mode, my_source)
else:
......
......@@ -110,8 +110,11 @@ class ArrangeEvalRCNN(BaseOperator):
(image, im_info, im_id, im_shape, gt_bbox,
gt_class, difficult)
"""
im = sample['image']
keys = list(sample.keys())
ims = []
keys = sorted(list(sample.keys()))
for k in keys:
if 'image' in k:
ims.append(sample[k])
if 'im_info' in keys:
im_info = sample['im_info']
else:
......@@ -127,7 +130,9 @@ class ArrangeEvalRCNN(BaseOperator):
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
difficult = sample['difficult']
outs = (im, im_info, im_id, im_shape, gt_bbox, gt_class, difficult)
remain_list = [im_info, im_id, im_shape, gt_bbox, gt_class, difficult]
ims.extend(remain_list)
outs = tuple(ims)
return outs
......@@ -148,10 +153,13 @@ class ArrangeTestRCNN(BaseOperator):
context: a dict which contains additional info.
Returns:
sample: a tuple containing the following items:
(image, im_info, im_id)
(image, im_info, im_id, im_shape)
"""
im = sample['image']
keys = list(sample.keys())
ims = []
keys = sorted(list(sample.keys()))
for k in keys:
if 'image' in k:
ims.append(sample[k])
if 'im_info' in keys:
im_info = sample['im_info']
else:
......@@ -164,7 +172,9 @@ class ArrangeTestRCNN(BaseOperator):
# bbox prediction needs im_info as input in format of [N, 3],
# so im_shape is appended by 1 to match dimension.
im_shape = np.array((h, w, 1), dtype=np.float32)
outs = (im, im_info, im_id, im_shape)
remain_list = [im_info, im_id, im_shape]
ims.extend(remain_list)
outs = tuple(ims)
return outs
......
......@@ -121,6 +121,105 @@ class DecodeImage(BaseOperator):
return sample
@register_op
class MultiscaleTestResize(BaseOperator):
def __init__(self,
origin_target_size=800,
origin_max_size=1333,
target_size=[],
max_size=2000,
interp=cv2.INTER_LINEAR,
use_flip=True):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
target_size (list): A list of target sizes of image's short side.
max_size (int): the max size of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super(MultiscaleTestResize, self).__init__()
self.origin_target_size = int(origin_target_size)
self.origin_max_size = int(origin_max_size)
self.max_size = int(max_size)
self.interp = int(interp)
self.use_flip = use_flip
if not isinstance(target_size, list):
raise TypeError(
"Type of target_size is invalid. Must be List, now is {}".
format(type(target_size)))
self.target_size = target_size
if not (isinstance(self.origin_target_size, int) and isinstance(
self.origin_max_size, int) and isinstance(self.max_size, int)
and isinstance(self.interp, int)):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None):
""" Resize the image numpy for multi-scale test.
"""
origin_ims = {}
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
base_name_list = ['image']
origin_ims['image'] = im
if self.use_flip:
sample['flip_image'] = im[:, ::-1, :]
base_name_list.append('flip_image')
origin_ims['flip_image'] = sample['flip_image']
im_info = []
for base_name in base_name_list:
im_scale = float(self.origin_target_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.origin_max_size:
im_scale = float(self.origin_max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
im_resize = cv2.resize(
origin_ims[base_name],
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info.extend([resize_h, resize_w, im_scale])
sample[base_name] = im_resize
for i, size in enumerate(self.target_size):
im_scale = float(size) / float(im_size_min)
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
im_resize = cv2.resize(
origin_ims[base_name],
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info.extend([resize_h, resize_w, im_scale])
name = base_name + '_scale_' + str(i)
sample[name] = im_resize
sample['im_info'] = np.array(im_info, dtype=np.float32)
return sample
@register_op
class ResizeImage(BaseOperator):
def __init__(self,
......@@ -183,9 +282,12 @@ class ResizeImage(BaseOperator):
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
sample['im_info'] = np.array(
[resize_h, resize_w, im_scale], dtype=np.float32)
im_info = [resize_h, resize_w, im_scale]
if 'im_info' in sample and sample['im_info'][2] != 1.:
sample['im_info'] = np.append(
list(sample['im_info']), im_info).astype(np.float32)
else:
sample['im_info'] = np.array(im_info).astype(np.float32)
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
......@@ -331,19 +433,21 @@ class NormalizeImage(BaseOperator):
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = sample['image']
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
sample['image'] = im
for k in sample.keys():
if 'image' in k:
im = sample[k]
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
sample[k] = im
return sample
......@@ -785,13 +889,15 @@ class Permute(BaseOperator):
def __call__(self, sample, context=None):
assert 'image' in sample, "image data not found"
im = sample['image']
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
sample['image'] = im
for k in sample.keys():
if 'image' in k:
im = sample[k]
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
sample[k] = im
return sample
......
......@@ -27,7 +27,9 @@ def build_post_map(coarsest_stride=1,
is_padding=False,
random_shapes=[],
multi_scales=[],
use_padded_im_info=False):
use_padded_im_info=False,
enable_multiscale_test=False,
num_scale=1):
"""
Build a mapper for post-processing batches
......@@ -36,10 +38,13 @@ def build_post_map(coarsest_stride=1,
{
coarsest_stride (int): stride of the coarsest FPN level
is_padding (bool): whether to padding in minibatch
random_shapes: (list of int): resize to image to random
shapes, [] for not resize.
multi_scales: (list of int): resize image by random
scales, [] for not resize.
random_shapes (list of int): resize to image to random shapes,
[] for not resize.
multi_scales (list of int): resize image by random scales,
[] for not resize.
use_padded_im_info (bool): whether to update im_info after padding
enable_multiscale_test (bool): whether to use multiscale test.
num_scale (int) : the number of scales for multiscale test.
}
Returns:
a mapper function which accept one argument 'batch' and
......@@ -66,6 +71,33 @@ def build_post_map(coarsest_stride=1,
padding_batch.append((padding_im, ) + data[1:])
return padding_batch
def padding_multiscale_test(batch_data):
if len(batch_data) != 1:
raise NotImplementedError(
"Batch size must be 1 when using multiscale test, but now batch size is {}".
format(len(batch_data)))
if coarsest_stride > 1:
padding_batch = []
padding_images = []
data = batch_data[0]
for i, input in enumerate(data):
if i < num_scale:
im_c, im_h, im_w = input.shape
max_h = int(
np.ceil(im_h / coarsest_stride) * coarsest_stride)
max_w = int(
np.ceil(im_w / coarsest_stride) * coarsest_stride)
padding_im = np.zeros(
(im_c, max_h, max_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = input
data[num_scale][3 * i:3 * i + 2] = [max_h, max_w]
padding_batch.append(padding_im)
else:
padding_batch.append(input)
return [tuple(padding_batch)]
# no need to padding
return batch_data
def random_shape(batch_data):
# For YOLO: gt_bbox is normalized, is scale invariant.
shape = np.random.choice(random_shapes)
......@@ -108,6 +140,8 @@ def build_post_map(coarsest_stride=1,
batch_data = random_shape(batch_data)
if len(multi_scales) > 0:
batch_data = multi_scale_resize(batch_data)
if enable_multiscale_test:
batch_data = padding_multiscale_test(batch_data)
except Exception as e:
errmsg = "post-process failed with error: " + str(e)
logger.warn(errmsg)
......
......@@ -80,21 +80,15 @@ class CascadeMaskRCNN(object):
self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25]
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
assert mode in ['train', 'test'], \
"only 'train' and 'test' mode is supported"
if mode == 'train':
required_fields = [
'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
]
else:
required_fields = ['im_shape', 'im_info']
self._input_check(required_fields, feed_vars)
for var in required_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
im = feed_vars['image']
if mode == 'train':
gt_box = feed_vars['gt_box']
is_crowd = feed_vars['is_crowd']
......@@ -199,55 +193,167 @@ class CascadeMaskRCNN(object):
loss.update({'loss': total_loss})
return loss
else:
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
mask_name = 'mask_pred'
mask_pred, bbox_pred = self.single_scale_eval(
body_feats, spatial_scale, im_info, mask_name, bbox_pred,
roi_feat_list, rcnn_pred_list, proposal_list,
feed_vars['im_shape'])
return {'bbox': bbox_pred, 'mask': mask_pred}
def build_multi_scale(self, feed_vars, mask_branch=False):
required_fields = ['image', 'im_info']
self._input_check(required_fields, feed_vars)
ims = []
for k in feed_vars.keys():
if 'image' in k:
ims.append(feed_vars[k])
result = {}
if not mask_branch:
assert 'im_shape' in feed_vars, \
"{} has no im_shape field".format(feed_vars)
result.update(feed_vars)
for i, im in enumerate(ims):
im_info = fluid.layers.slice(
input=feed_vars['im_info'],
axes=[1],
starts=[3 * i],
ends=[3 * i + 3])
body_feats = self.backbone(im)
result.update(body_feats)
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
if not mask_branch:
im_shape = feed_vars['im_shape']
body_feat_names = list(body_feats.keys())
proposal_list = []
roi_feat_list = []
rcnn_pred_list = []
proposals = None
bbox_pred = None
for i in range(3):
if i > 0:
refined_bbox = self._decode_box(
proposals,
bbox_pred,
curr_stage=i - 1, )
else:
refined_bbox = rois
proposals = refined_bbox
proposal_list.append(proposals)
# extract roi features
roi_feat = self.roi_extractor(body_feats, proposals,
spatial_scale)
roi_feat_list.append(roi_feat)
# bbox head
cls_score, bbox_pred = self.bbox_head.get_output(
roi_feat,
wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
name='_' + str(i + 1) if i > 0 else '')
rcnn_pred_list.append((cls_score, bbox_pred))
# get mask rois
if self.fpn is None:
body_feat = body_feats[body_feat_names[-1]]
pred = self.bbox_head.get_prediction(
im_info,
im_shape,
roi_feat_list,
rcnn_pred_list,
proposal_list,
self.cascade_bbox_reg_weights,
return_box_score=True)
bbox_name = 'bbox_' + str(i)
score_name = 'score_' + str(i)
if 'flip' in im.name:
bbox_name += '_flip'
score_name += '_flip'
result[bbox_name] = pred['bbox']
result[score_name] = pred['score']
else:
mask_name = 'mask_pred_' + str(i)
bbox_pred = feed_vars['bbox']
result.update({im.name: im})
if 'flip' in im.name:
mask_name += '_flip'
bbox_pred = feed_vars['bbox_flip']
mask_pred, bbox_pred = self.single_scale_eval(
body_feats,
spatial_scale,
im_info,
mask_name,
bbox_pred=bbox_pred,
use_multi_test=True)
result[mask_name] = mask_pred
return result
def single_scale_eval(self,
body_feats,
spatial_scale,
im_info,
mask_name,
bbox_pred,
roi_feat_list=None,
rcnn_pred_list=None,
proposal_list=None,
im_shape=None,
use_multi_test=False):
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
if not use_multi_test:
bbox_pred = self.bbox_head.get_prediction(
im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
proposal_list, self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg)
im_info, im_shape, roi_feat_list, rcnn_pred_list, proposal_list,
self.cascade_bbox_reg_weights)
bbox_pred = bbox_pred['bbox']
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name='mask_pred')
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(
bbox_pred, [1], starts=[2], ends=[6])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(last_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(
body_feats, mask_rois, spatial_scale, is_mask=True)
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name=mask_name)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return {'bbox': bbox_pred, 'mask': mask_pred}
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(last_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(
body_feats, mask_rois, spatial_scale, is_mask=True)
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return mask_pred, bbox_pred
def _input_check(self, require_fields, feed_vars):
for var in require_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
def _decode_box(self, proposals, bbox_pred, curr_stage):
rcnn_loc_delta_r = fluid.layers.reshape(
......@@ -269,7 +375,9 @@ class CascadeMaskRCNN(object):
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
def eval(self, feed_vars, multi_scale=None, mask_branch=False):
if multi_scale:
return self.build_multi_scale(feed_vars, mask_branch)
return self.build(feed_vars, 'test')
def test(self, feed_vars):
......
......@@ -74,16 +74,13 @@ class CascadeRCNN(object):
self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25]
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
assert mode in ['train', 'test'], \
"only 'train' and 'test' mode is supported"
if mode == 'train':
required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
else:
required_fields = ['im_shape', 'im_info']
for var in required_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
self._input_check(required_fields, feed_vars)
im = feed_vars['image']
im_info = feed_vars['im_info']
if mode == 'train':
......@@ -171,6 +168,98 @@ class CascadeRCNN(object):
self.cls_agnostic_bbox_reg)
return pred
def build_multi_scale(self, feed_vars):
required_fields = ['image', 'im_shape', 'im_info']
self._input_check(required_fields, feed_vars)
ims = []
for k in feed_vars.keys():
if 'image' in k:
ims.append(feed_vars[k])
result = {}
result.update(feed_vars)
for i, im in enumerate(ims):
im_info = fluid.layers.slice(
input=feed_vars['im_info'],
axes=[1],
starts=[3 * i],
ends=[3 * i + 3])
im_shape = feed_vars['im_shape']
# backbone
body_feats = self.backbone(im)
result.update(body_feats)
body_feat_names = list(body_feats.keys())
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
# rpn proposals
rpn_rois = self.rpn_head.get_proposals(
body_feats, im_info, mode='test')
proposal_list = []
roi_feat_list = []
rcnn_pred_list = []
proposals = None
bbox_pred = None
for i in range(3):
if i > 0:
refined_bbox = self._decode_box(
proposals,
bbox_pred,
curr_stage=i - 1, )
else:
refined_bbox = rpn_rois
proposals = refined_bbox
proposal_list.append(proposals)
# extract roi features
roi_feat = self.roi_extractor(body_feats, proposals,
spatial_scale)
roi_feat_list.append(roi_feat)
# bbox head
cls_score, bbox_pred = self.bbox_head.get_output(
roi_feat,
wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
name='_' + str(i + 1) if i > 0 else '')
rcnn_pred_list.append((cls_score, bbox_pred))
# get mask rois
rois = proposal_list[2]
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
pred = self.bbox_head.get_prediction(
im_info,
im_shape,
roi_feat_list,
rcnn_pred_list,
proposal_list,
self.cascade_bbox_reg_weights,
self.cls_agnostic_bbox_reg,
return_box_score=True)
bbox_name = 'bbox_' + str(i)
score_name = 'score_' + str(i)
if 'flip' in im.name:
bbox_name += '_flip'
score_name += '_flip'
result[bbox_name] = pred['bbox']
result[score_name] = pred['score']
return result
def _input_check(self, require_fields, feed_vars):
for var in require_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
def _decode_box(self, proposals, bbox_pred, curr_stage):
rcnn_loc_delta_r = fluid.layers.reshape(
bbox_pred, (-1, self.cls_agnostic_bbox_reg, 4))
......@@ -191,7 +280,9 @@ class CascadeRCNN(object):
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
def eval(self, feed_vars, multi_scale=None):
if multi_scale:
return self.build_multi_scale(feed_vars)
return self.build(feed_vars, 'test')
def test(self, feed_vars):
......
......@@ -63,6 +63,12 @@ class FasterRCNN(object):
self.rpn_only = rpn_only
def build(self, feed_vars, mode='train'):
if mode == 'train':
required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
else:
required_fields = ['im_shape', 'im_info']
self._input_check(required_fields, feed_vars)
im = feed_vars['image']
im_info = feed_vars['im_info']
if mode == 'train':
......@@ -136,10 +142,62 @@ class FasterRCNN(object):
im_shape)
return pred
def build_multi_scale(self, feed_vars):
required_fields = ['image', 'im_info', 'im_shape']
self._input_check(required_fields, feed_vars)
ims = []
for k in feed_vars.keys():
if 'image' in k:
ims.append(feed_vars[k])
result = {}
result.update(feed_vars)
for i, im in enumerate(ims):
im_info = fluid.layers.slice(
input=feed_vars['im_info'],
axes=[1],
starts=[3 * i],
ends=[3 * i + 3])
im_shape = feed_vars['im_shape']
body_feats = self.backbone(im)
result.update(body_feats)
body_feat_names = list(body_feats.keys())
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
if self.fpn is None:
# in models without FPN, roi extractor only uses the last level of
# feature maps. And body_feat_names[-1] represents the name of
# last feature map.
body_feat = body_feats[body_feat_names[-1]]
roi_feat = self.roi_extractor(body_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
pred = self.bbox_head.get_prediction(
roi_feat, rois, im_info, im_shape, return_box_score=True)
bbox_name = 'bbox_' + str(i)
score_name = 'score_' + str(i)
if 'flip' in im.name:
bbox_name += '_flip'
score_name += '_flip'
result[bbox_name] = pred['bbox']
result[score_name] = pred['score']
return result
def _input_check(self, require_fields, feed_vars):
for var in require_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
def eval(self, feed_vars, multi_scale=None):
if multi_scale:
return self.build_multi_scale(feed_vars)
return self.build(feed_vars, 'test')
def test(self, feed_vars):
......
......@@ -69,18 +69,14 @@ class MaskRCNN(object):
self.fpn = fpn
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
assert mode in ['train', 'test'], \
"only 'train' and 'test' mode is supported"
if mode == 'train':
required_fields = [
'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
]
else:
required_fields = ['im_shape', 'im_info']
for var in required_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
self._input_check(required_fields, feed_vars)
im = feed_vars['image']
im_info = feed_vars['im_info']
mixed_precision_enabled = mixed_precision_global_state() is not None
......@@ -153,57 +149,135 @@ class MaskRCNN(object):
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
mask_name = 'mask_pred'
mask_pred, bbox_pred = self.single_scale_eval(
body_feats, mask_name, rois, im_info, feed_vars['im_shape'],
spatial_scale)
return {'bbox': bbox_pred, 'mask': mask_pred}
def build_multi_scale(self, feed_vars, mask_branch=False):
required_fields = ['image', 'im_info']
self._input_check(required_fields, feed_vars)
ims = []
for k in feed_vars.keys():
if 'image' in k:
ims.append(feed_vars[k])
result = {}
if not mask_branch:
assert 'im_shape' in feed_vars, \
"{} has no im_shape field".format(feed_vars)
result.update(feed_vars)
for i, im in enumerate(ims):
im_info = fluid.layers.slice(
input=feed_vars['im_info'],
axes=[1],
starts=[3 * i],
ends=[3 * i + 3])
body_feats = self.backbone(im)
result.update(body_feats)
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
if not mask_branch:
im_shape = feed_vars['im_shape']
body_feat_names = list(body_feats.keys())
if self.fpn is None:
body_feat = body_feats[body_feat_names[-1]]
roi_feat = self.roi_extractor(body_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois,
spatial_scale)
pred = self.bbox_head.get_prediction(
roi_feat, rois, im_info, im_shape, return_box_score=True)
bbox_name = 'bbox_' + str(i)
score_name = 'score_' + str(i)
if 'flip' in im.name:
bbox_name += '_flip'
score_name += '_flip'
result[bbox_name] = pred['bbox']
result[score_name] = pred['score']
else:
mask_name = 'mask_pred_' + str(i)
bbox_pred = feed_vars['bbox']
result.update({im.name: im})
if 'flip' in im.name:
mask_name += '_flip'
bbox_pred = feed_vars['bbox_flip']
mask_pred, bbox_pred = self.single_scale_eval(
body_feats, mask_name, rois, im_info, feed_vars['im_shape'],
spatial_scale, bbox_pred)
result[mask_name] = mask_pred
return result
def single_scale_eval(self,
body_feats,
mask_name,
rois,
im_info,
im_shape,
spatial_scale,
bbox_pred=None):
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois)
else:
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
if not bbox_pred:
bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
feed_vars['im_shape'])
im_shape)
bbox_pred = bbox_pred['bbox']
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name='mask_pred')
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(
bbox_pred, [1], starts=[2], ends=[6])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(last_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(
body_feats, mask_rois, spatial_scale, is_mask=True)
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return {'bbox': bbox_pred, 'mask': mask_pred}
# share weight
bbox_shape = fluid.layers.shape(bbox_pred)
bbox_size = fluid.layers.reduce_prod(bbox_shape)
bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
cond = fluid.layers.less_than(x=bbox_size, y=size)
mask_pred = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=False,
name=mask_name)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(cond):
fluid.layers.assign(input=bbox_pred, output=mask_pred)
with switch.default():
bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, bbox)
mask_rois = bbox * im_scale
if self.fpn is None:
mask_feat = self.roi_extractor(last_feat, mask_rois)
mask_feat = self.bbox_head.get_head_feat(mask_feat)
else:
mask_feat = self.roi_extractor(
body_feats, mask_rois, spatial_scale, is_mask=True)
mask_out = self.mask_head.get_prediction(mask_feat, bbox)
fluid.layers.assign(input=mask_out, output=mask_pred)
return mask_pred, bbox_pred
def _input_check(self, require_fields, feed_vars):
for var in require_fields:
assert var in feed_vars, \
"{} has no {} field".format(feed_vars, var)
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
def eval(self, feed_vars, multi_scale=None, mask_branch=False):
if multi_scale:
return self.build_multi_scale(feed_vars, mask_branch)
return self.build(feed_vars, 'test')
def test(self, feed_vars):
......
......@@ -17,7 +17,7 @@ from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import copy
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
......@@ -110,6 +110,7 @@ class FPN(object):
their name.
spatial_scale(list): A list of multiplicative spatial scale factor.
"""
spatial_scale = copy.deepcopy(self.spatial_scale)
body_name_list = list(body_dict.keys())[::-1]
num_backbone_stages = len(body_name_list)
self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
......@@ -179,7 +180,7 @@ class FPN(object):
fpn_dict[fpn_name] = fpn_output
fpn_name_list.append(fpn_name)
if not self.has_extra_convs and self.max_level - self.min_level == len(
self.spatial_scale):
spatial_scale):
body_top_name = fpn_name_list[0]
body_top_extension = fluid.layers.pool2d(
fpn_dict[body_top_name],
......@@ -189,9 +190,9 @@ class FPN(object):
name=body_top_name + '_subsampled_2x')
fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
spatial_scale.insert(0, spatial_scale[0] * 0.5)
# Coarser FPN levels introduced for RetinaNet
highest_backbone_level = self.min_level + len(self.spatial_scale) - 1
highest_backbone_level = self.min_level + len(spatial_scale) - 1
if self.has_extra_convs and self.max_level > highest_backbone_level:
fpn_blob = body_dict[body_name_list[0]]
for i in range(highest_backbone_level + 1, self.max_level + 1):
......@@ -215,6 +216,6 @@ class FPN(object):
name=fpn_name)
fpn_dict[fpn_name] = fpn_blob
fpn_name_list.insert(0, fpn_name)
self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, self.spatial_scale
return res_dict, spatial_scale
......@@ -17,6 +17,7 @@ from __future__ import print_function
from __future__ import division
from collections import OrderedDict
from ppdet.data.transform.operators import *
from paddle import fluid
......@@ -38,7 +39,7 @@ feed_var_def = [
# yapf: enable
def create_feed(feed, use_pyreader=True):
def create_feed(feed, use_pyreader=True, sub_prog_feed=False):
image_shape = feed.image_shape
feed_var_map = {var['name']: var for var in feed_var_def}
feed_var_map['image'] = {
......@@ -60,6 +61,58 @@ def create_feed(feed, use_pyreader=True):
feed_var_map['gt_box']['lod_level'] = 0
feed_var_map['is_difficult']['lod_level'] = 0
base_name_list = ['image']
num_scale = getattr(feed, 'num_scale', 1)
sample_transform = feed.sample_transforms
multiscale_test = False
aug_flip = False
for t in sample_transform:
if isinstance(t, MultiscaleTestResize):
multiscale_test = True
aug_flip = t.use_flip
assert (len(t.target_size)+1)*(aug_flip+1) == num_scale, \
"num_scale: {} is not equal to the actual number of scale: {}."\
.format(num_scale, (len(t.target_size)+1)*(aug_flip+1))
break
if aug_flip:
num_scale //= 2
base_name_list.insert(0, 'flip_image')
feed_var_map['flip_image'] = {
'name': 'flip_image',
'shape': image_shape,
'dtype': 'float32',
'lod_level': 0
}
image_name_list = []
if multiscale_test:
for base_name in base_name_list:
for i in range(0, num_scale):
name = base_name if i == 0 else base_name + '_scale_' + str(i -
1)
feed_var_map[name] = {
'name': name,
'shape': image_shape,
'dtype': 'float32',
'lod_level': 0
}
image_name_list.append(name)
feed_var_map['im_info']['shape'] = [feed.num_scale * 3]
feed.fields = image_name_list + feed.fields[1:]
if sub_prog_feed:
box_names = ['bbox', 'bbox_flip']
for box_name in box_names:
sub_prog_feed = {
'name': box_name,
'shape': [6],
'dtype': 'float32',
'lod_level': 1
}
feed.fields = feed.fields + [box_name]
feed_var_map[box_name] = sub_prog_feed
feed_vars = OrderedDict([(key, fluid.layers.data(
name=feed_var_map[key]['name'],
shape=feed_var_map[key]['shape'],
......
......@@ -280,7 +280,12 @@ class BBoxHead(object):
loss_bbox = fluid.layers.reduce_mean(loss_bbox)
return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
def get_prediction(self, roi_feat, rois, im_info, im_shape):
def get_prediction(self,
roi_feat,
rois,
im_info,
im_shape,
return_box_score=False):
"""
Get prediction bounding box in test stage.
......@@ -308,5 +313,7 @@ class BBoxHead(object):
bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred)
cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': cliped_box, 'score': cls_prob}
pred_result = self.nms(bboxes=cliped_box, scores=cls_prob)
return {'bbox': pred_result}
......@@ -146,7 +146,8 @@ class CascadeBBoxHead(object):
rcnn_pred_list,
proposal_list,
cascade_bbox_reg_weights,
cls_agnostic_bbox_reg=2):
cls_agnostic_bbox_reg=2,
return_box_score=False):
"""
Get prediction bounding box in test stage.
:
......@@ -214,7 +215,8 @@ class CascadeBBoxHead(object):
axis=1)
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': box_out, 'score': boxes_cls_prob_mean}
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result}
......
......@@ -16,7 +16,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import yaml
import re
from ppdet.core.workspace import get_registered_modules
from ppdet.core.workspace import get_registered_modules, dump_value
__all__ = ['ColorTTY', 'ArgsParser']
......
......@@ -24,6 +24,7 @@ import time
import paddle.fluid as fluid
from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval
from ppdet.utils.post_process import mstest_box_post_process, mstest_mask_post_process, box_flip
__all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results']
......@@ -57,7 +58,52 @@ def parse_fetches(fetches, prog=None, extra_keys=None):
return keys, values, cls
def eval_run(exe, compile_program, pyreader, keys, values, cls):
def length2lod(length_lod):
offset_lod = [0]
for i in length_lod:
offset_lod.append(offset_lod[-1] + i)
return [offset_lod]
def get_sub_feed(input, place):
new_dict = {}
res_feed = {}
key_name = ['bbox', 'im_info', 'im_id', 'im_shape', 'bbox_flip']
for k in key_name:
if k in input.keys():
new_dict[k] = input[k]
for k in input.keys():
if 'image' in k:
new_dict[k] = input[k]
for k, v in new_dict.items():
data_t = fluid.LoDTensor()
data_t.set(v[0], place)
if 'bbox' in k:
lod = length2lod(v[1][0])
data_t.set_lod(lod)
res_feed[k] = data_t
return res_feed
def clean_res(result, keep_name_list):
clean_result = {}
for k in result.keys():
if k in keep_name_list:
clean_result[k] = result[k]
result.clear()
return clean_result
def eval_run(exe,
compile_program,
pyreader,
keys,
values,
cls,
cfg=None,
sub_prog=None,
sub_keys=None,
sub_values=None):
"""
Run evaluation program, return program outputs.
"""
......@@ -84,6 +130,28 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(keys, outs)
}
multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
mask_multi_scale_test = multi_scale_test and 'Mask' in cfg.architecture
if multi_scale_test:
post_res = mstest_box_post_process(res, cfg)
res.update(post_res)
if mask_multi_scale_test:
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
sub_feed = get_sub_feed(res, place)
sub_prog_outs = exe.run(sub_prog,
feed=sub_feed,
fetch_list=sub_values,
return_numpy=False)
sub_prog_res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(sub_keys, sub_prog_outs)
}
post_res = mstest_mask_post_process(sub_prog_res, cfg)
res.update(post_res)
if multi_scale_test:
res = clean_res(
res, ['im_info', 'bbox', 'im_id', 'im_shape', 'mask'])
results.append(res)
if iter_id % 100 == 0:
logger.info('Test iter {}'.format(iter_id))
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import paddle.fluid as fluid
__all__ = ['nms']
logger = logging.getLogger(__name__)
def box_flip(boxes, im_shape):
im_width = im_shape[0][1]
flipped_boxes = boxes.copy()
flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1
flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1
return flipped_boxes
def nms(dets, thresh):
"""Apply classic DPM-style greedy NMS."""
if dets.shape[0] == 0:
return []
scores = dets[:, 0]
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int)
# nominal indices
# _i, _j
# sorted indices
# i, j
# temp variables for box i's (the box currently under consideration)
# ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
# xx1, yy1, xx2, yy2
# w, h
# inter, ovr
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
return np.where(suppressed == 0)[0]
def bbox_area(box):
w = box[2] - box[0] + 1
h = box[3] - box[1] + 1
return w * h
def bbox_overlaps(x, y):
N = x.shape[0]
K = y.shape[0]
overlaps = np.zeros((N, K), dtype=np.float32)
for k in range(K):
y_area = bbox_area(y[k])
for n in range(N):
iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1
if iw > 0:
ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1
if ih > 0:
x_area = bbox_area(x[n])
ua = x_area + y_area - iw * ih
overlaps[n, k] = iw * ih / ua
return overlaps
def box_voting(nms_dets, dets, vote_thresh):
top_dets = nms_dets.copy()
top_boxes = nms_dets[:, 1:]
all_boxes = dets[:, 1:]
all_scores = dets[:, 0]
top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
for k in range(nms_dets.shape[0]):
inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0]
boxes_to_vote = all_boxes[inds_to_vote, :]
ws = all_scores[inds_to_vote]
top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws)
return top_dets
def get_nms_result(boxes, scores, cfg):
cls_boxes = [[] for _ in range(cfg.num_classes)]
for j in range(1, cfg.num_classes):
inds = np.where(scores[:, j] > cfg.MultiScaleTEST['score_thresh'])[0]
scores_j = scores[inds, j]
boxes_j = boxes[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, cfg.MultiScaleTEST['nms_thresh'])
nms_dets = dets_j[keep, :]
if cfg.MultiScaleTEST['enable_voting']:
nms_dets = box_voting(nms_dets, dets_j,
cfg.MultiScaleTEST['vote_thresh'])
#add labels
label = np.array([j for _ in range(len(keep))])
nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
np.float32, copy=False)
cls_boxes[j] = nms_dets
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, cfg.num_classes)])
if len(image_scores) > cfg.MultiScaleTEST['detections_per_im']:
image_thresh = np.sort(image_scores)[-cfg.MultiScaleTEST[
'detections_per_im']]
for j in range(1, cfg.num_classes):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
im_results = np.vstack([cls_boxes[j] for j in range(1, cfg.num_classes)])
return im_results
def mstest_box_post_process(result, cfg):
"""
Multi-scale Test
Only available for batch_size=1 now.
"""
post_bbox = {}
use_flip = False
ms_boxes = []
ms_scores = []
im_shape = result['im_shape'][0]
for k in result.keys():
if 'bbox' in k:
boxes = result[k][0]
boxes = np.reshape(boxes, (-1, 4 * cfg.num_classes))
scores = result['score' + k[4:]][0]
if 'flip' in k:
boxes = box_flip(boxes, im_shape)
use_flip = True
ms_boxes.append(boxes)
ms_scores.append(scores)
ms_boxes = np.concatenate(ms_boxes)
ms_scores = np.concatenate(ms_scores)
bbox_pred = get_nms_result(ms_boxes, ms_scores, cfg)
post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])})
if use_flip:
bbox = bbox_pred[:, 2:]
bbox_flip = np.append(
bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1)
post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])})
return post_bbox
def mstest_mask_post_process(result, cfg):
mask_list = []
im_shape = result['im_shape'][0]
M = cfg.FPNRoIAlign['mask_resolution']
for k in result.keys():
if 'mask' in k:
masks = result[k][0]
if len(masks.shape) != 4:
masks = np.zeros((0, M, M))
mask_list.append(masks)
continue
if 'flip' in k:
masks = masks[:, :, :, ::-1]
mask_list.append(masks)
mask_pred = np.mean(mask_list, axis=0)
return {'mask': (mask_pred, [[len(mask_pred)]])}
......@@ -19,7 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import yaml
from ppdet.core.workspace import get_registered_modules, load_config
from ppdet.core.workspace import get_registered_modules, load_config, dump_value
from ppdet.utils.cli import ColorTTY, print_total_cfg
color_tty = ColorTTY()
......@@ -43,18 +43,6 @@ MISC_CONFIG = {
}
def dump_value(value):
# XXX this is hackish, but collections.abc is not available in python 2
if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
value = yaml.dump(value, default_flow_style=True)
value = value.replace('\n', '')
value = value.replace('...', '')
return "'{}'".format(value)
else:
# primitive types
return str(value)
def dump_config(module, minimal=False):
args = module.schema.values()
if minimal:
......
......@@ -59,7 +59,6 @@ def main():
raise ValueError("'architecture' not specified in config file.")
merge_config(FLAGS.opt)
# check if set use_gpu=True in paddlepaddle cpu version
check_gpu(cfg.use_gpu)
print_total_cfg(cfg)
......@@ -69,6 +68,8 @@ def main():
else:
eval_feed = create(cfg.eval_feed)
multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
# define executor
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -80,9 +81,8 @@ def main():
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
pyreader, feed_vars = create_feed(eval_feed)
fetches = model.eval(feed_vars)
fetches = model.eval(feed_vars, multi_scale_test)
eval_prog = eval_prog.clone(True)
reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
pyreader.decorate_sample_list_generator(reader, place)
......@@ -120,7 +120,32 @@ def main():
callable(model.is_bbox_normalized):
is_bbox_normalized = model.is_bbox_normalized()
results = eval_run(exe, compile_program, pyreader, keys, values, cls)
sub_eval_prog = None
sub_keys = None
sub_values = None
# build sub-program
if 'Mask' in main_arch and multi_scale_test:
sub_eval_prog = fluid.Program()
with fluid.program_guard(sub_eval_prog, startup_prog):
with fluid.unique_name.guard():
_, feed_vars = create_feed(
eval_feed, use_pyreader=False, sub_prog_feed=True)
sub_fetches = model.eval(
feed_vars, multi_scale_test, mask_branch=True)
extra_keys = []
if cfg.metric == 'COCO':
extra_keys = ['im_id', 'im_shape']
if cfg.metric == 'VOC':
extra_keys = ['gt_box', 'gt_label', 'is_difficult']
sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog,
extra_keys)
sub_eval_prog = sub_eval_prog.clone(True)
if 'weights' in cfg:
checkpoint.load_params(exe, sub_eval_prog, cfg.weights)
results = eval_run(exe, compile_program, pyreader, keys, values, cls, cfg,
sub_eval_prog, sub_keys, sub_values)
# evaluation
resolution = None
......
......@@ -73,9 +73,13 @@ def main():
raise ValueError("'architecture' not specified in config file.")
merge_config(FLAGS.opt)
if 'log_iter' not in cfg:
cfg.log_iter = 20
if 'multi_scale_test' not in cfg:
cfg.multi_scale_test = False
ignore_params = cfg.finetune_exclude_pretrained_params \
if 'finetune_exclude_pretrained_params' in cfg else []
......@@ -140,7 +144,7 @@ def main():
with fluid.unique_name.guard():
model = create(main_arch)
eval_pyreader, feed_vars = create_feed(eval_feed)
fetches = model.eval(feed_vars)
fetches = model.eval(feed_vars, cfg.multi_scale_test)
eval_prog = eval_prog.clone(True)
eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册