diff --git a/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..3c552bf9c5eb2b628850e89c05d7a22986c91223 --- /dev/null +++ b/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml @@ -0,0 +1,255 @@ +architecture: CascadeMaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +max_iters: 300000 +snapshot_iter: 10000 +use_gpu: true +log_iter: 20 +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar +weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/ +metric: COCO +num_classes: 81 + +CascadeMaskRCNN: + backbone: SENet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + mask_assigner: MaskAssigner + mask_head: MaskHead + +SENet: + depth: 152 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + group_width: 4 + groups: 64 + norm_type: bn + freeze_norm: True + variant: d + dcn_v2_stages: [3, 4, 5] + std_senet: True + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + freeze_norm: False + norm_type: gn + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + box_resolution: 7 + sampling_ratio: 2 + mask_resolution: 14 + +MaskHead: + dilation: 1 + conv_dim: 256 + num_convs: 4 + resolution: 28 + norm_type: gn + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_hi: [0.5, 0.6, 0.7] + bg_thresh_lo: [0.0, 0.0, 0.0] + fg_fraction: 0.25 + fg_thresh: [0.5, 0.6, 0.7] + +MaskAssigner: + resolution: 28 + +CascadeBBoxHead: + head: CascadeXConvNormHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +CascadeXConvNormHead: + norm_type: gn + +MultiScaleTEST: + score_thresh: 0.05 + nms_thresh: 0.5 + detections_per_im: 100 + enable_voting: true + vote_thresh: 0.9 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [240000, 280000] + - !LinearWarmup + start_factor: 0.01 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + # batch size per device + batch_size: 1 + dataset: + dataset_dir: dataset/coco + image_dir: train2017 + annotation: annotations/instances_train2017.json + sample_transforms: + - !DecodeImage + to_rgb: False + with_mixup: False + - !RandomFlipImage + is_mask_flip: true + is_normalized: false + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: False + mean: + - 102.9801 + - 115.9465 + - 122.7717 + std: + - 1.0 + - 1.0 + - 1.0 + - !ResizeImage + interp: 1 + target_size: + - 416 + - 448 + - 480 + - 512 + - 544 + - 576 + - 608 + - 640 + - 672 + - 704 + - 736 + - 768 + - 800 + - 832 + - 864 + - 896 + - 928 + - 960 + - 992 + - 1024 + - 1056 + - 1088 + - 1120 + - 1152 + - 1184 + - 1216 + - 1248 + - 1280 + - 1312 + - 1344 + - 1376 + - 1408 + max_size: 1600 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 8 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: False + - !NormalizeImage + is_channel_first: false + is_scale: False + mean: + - 102.9801 + - 115.9465 + - 122.7717 + std: + - 1.0 + - 1.0 + - 1.0 + - !MultiscaleTestResize + origin_target_size: 800 + origin_max_size: 1333 + target_size: + - 400 + - 500 + - 600 + - 700 + - 900 + - 1000 + - 1100 + - 1200 + max_size: 2000 + use_flip: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadMSTest + pad_to_stride: 32 + # num_scale = (len(target_size) + 1) * (1 + use_flip) + num_scale: 18 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..c345aeedbe6f65fbe19aaf87ddbadf5ed567c38d --- /dev/null +++ b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml @@ -0,0 +1,177 @@ +architecture: CascadeRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +weights: output/cascade_rcnn_r50_fpn_1x/model_final +metric: COCO +num_classes: 81 + +CascadeRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: affine_channel + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: b + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 7 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + +CascadeBBoxHead: + head: CascadeTwoFCHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +CascadeTwoFCHead: + mlp_dim: 1024 + +MultiScaleTEST: + score_thresh: 0.05 + nms_thresh: 0.5 + detections_per_im: 100 + enable_voting: true + vote_thresh: 0.9 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !MultiscaleTestResize + origin_target_size: 800 + origin_max_size: 1333 + target_size: + - 400 + - 500 + - 600 + - 700 + - 900 + - 1000 + - 1100 + - 1200 + max_size: 2000 + use_flip: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadMSTest + pad_to_stride: 32 + num_scale: 18 + num_workers: 2 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/PaddleCV/PaddleDetection/ppdet/core/workspace.py b/PaddleCV/PaddleDetection/ppdet/core/workspace.py index 64c0be1fcec9617072b1e60dca685ebb182139dd..bf505d6e4d1aab311057763c52f4ef501606a7fb 100644 --- a/PaddleCV/PaddleDetection/ppdet/core/workspace.py +++ b/PaddleCV/PaddleDetection/ppdet/core/workspace.py @@ -27,11 +27,29 @@ from .config.schema import SchemaDict, SharedConfig, extract_schema from .config.yaml_helpers import serializable __all__ = [ - 'global_config', 'load_config', 'merge_config', 'get_registered_modules', - 'create', 'register', 'serializable' + 'global_config', + 'load_config', + 'merge_config', + 'get_registered_modules', + 'create', + 'register', + 'serializable', + 'dump_value', ] +def dump_value(value): + # XXX this is hackish, but collections.abc is not available in python 2 + if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)): + value = yaml.dump(value, default_flow_style=True) + value = value.replace('\n', '') + value = value.replace('...', '') + return "'{}'".format(value) + else: + # primitive types + return str(value) + + class AttrDict(dict): """Single level attribute dict, NOT recursive""" @@ -154,9 +172,9 @@ def create(cls_or_name, **kwargs): target_key = config[k] shared_conf = config.schema[k].default assert isinstance(shared_conf, SharedConfig) - if target_key is not None and not isinstance( - target_key, SharedConfig): - continue # value is given for the module + if target_key is not None and not isinstance(target_key, + SharedConfig): + continue # value is given for the module elif shared_conf.key in global_config: # `key` is present in config kwargs[k] = global_config[shared_conf.key] diff --git a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py index 7fab7915441e18a17fe751e114693b5ce556e67c..b70f4be2067cd789a52b553eb7d8beb96bc4be94 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py +++ b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py @@ -27,18 +27,18 @@ from ppdet.data.reader import Reader from ppdet.data.transform.operators import ( DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort, RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage, - Permute) - + Permute, MultiscaleTestResize) from ppdet.data.transform.arrange_sample import ( ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD, ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO) __all__ = [ - 'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet', - 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed', - 'MaskRCNNTrainFeed', 'FasterRCNNTestFeed', 'MaskRCNNTestFeed', - 'SSDTrainFeed', 'SSDEvalFeed', 'SSDTestFeed', 'YoloTrainFeed', - 'YoloEvalFeed', 'YoloTestFeed', 'create_reader' + 'PadBatch', 'MultiScale', 'RandomShape', 'PadMSTest', 'DataSet', + 'CocoDataSet', 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed', + 'MaskRCNNTrainFeed', 'FasterRCNNEvalFeed', 'MaskRCNNEvalFeed', + 'FasterRCNNTestFeed', 'MaskRCNNTestFeed', 'SSDTrainFeed', 'SSDEvalFeed', + 'SSDTestFeed', 'YoloTrainFeed', 'YoloEvalFeed', 'YoloTestFeed', + 'create_reader' ] @@ -113,6 +113,7 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None): pad = [t for t in batch_transforms if isinstance(t, PadBatch)] rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)] multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)] + pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)] if any(pad): transform_config['IS_PADDING'] = True @@ -122,6 +123,10 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None): transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes if any(multi_scale): transform_config['MULTI_SCALES'] = multi_scale[0].scales + if any(pad_ms_test): + transform_config['ENABLE_MULTISCALE_TEST'] = True + transform_config['NUM_SCALE'] = feed.num_scale + transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride if hasattr(inspect, 'getfullargspec'): argspec = inspect.getfullargspec @@ -186,6 +191,20 @@ class RandomShape(object): self.sizes = sizes +@serializable +class PadMSTest(object): + """ + Padding for multi-scale test + + Args: + pad_to_stride (int): pad to multiple of strides, e.g., 32 + """ + + def __init__(self, pad_to_stride=0): + super(PadMSTest, self).__init__() + self.pad_to_stride = pad_to_stride + + @serializable class DataSet(object): """ @@ -502,7 +521,10 @@ class FasterRCNNEvalFeed(DataFeed): samples=-1, drop_last=False, num_workers=2, - use_padded_im_info=True): + use_padded_im_info=True, + enable_multiscale=False, + num_scale=1, + enable_aug_flip=False): sample_transforms.append(ArrangeEvalRCNN()) super(FasterRCNNEvalFeed, self).__init__( dataset, @@ -517,6 +539,9 @@ class FasterRCNNEvalFeed(DataFeed): num_workers=num_workers, use_padded_im_info=use_padded_im_info) self.mode = 'VAL' + self.enable_multiscale = enable_multiscale + self.num_scale = num_scale + self.enable_aug_flip = enable_aug_flip @register @@ -640,7 +665,10 @@ class MaskRCNNEvalFeed(DataFeed): drop_last=False, num_workers=2, use_process=False, - use_padded_im_info=True): + use_padded_im_info=True, + enable_multiscale=False, + num_scale=1, + enable_aug_flip=False): sample_transforms.append(ArrangeTestRCNN()) super(MaskRCNNEvalFeed, self).__init__( dataset, @@ -656,6 +684,9 @@ class MaskRCNNEvalFeed(DataFeed): use_process=use_process, use_padded_im_info=use_padded_im_info) self.mode = 'VAL' + self.enable_multiscale = enable_multiscale + self.num_scale = num_scale + self.enable_aug_flip = enable_aug_flip @register diff --git a/PaddleCV/PaddleDetection/ppdet/data/reader.py b/PaddleCV/PaddleDetection/ppdet/data/reader.py index 5370bb9e42ef34909d8b41f1ec5c7b475f13357e..b2d4d07850601fb047b7b17fcb7c33ce6f35f6ea 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/reader.py +++ b/PaddleCV/PaddleDetection/ppdet/data/reader.py @@ -68,8 +68,8 @@ class Reader(object): mapped_ds = map(sc, mapper, worker_args) # In VAL mode, gt_bbox, gt_label can be empty, and should # not be dropped - batched_ds = batch(mapped_ds, batchsize, drop_last, - drop_empty=(mode!="VAL")) + batched_ds = batch( + mapped_ds, batchsize, drop_last, drop_empty=(mode != "VAL")) trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()} need_keys = { @@ -78,6 +78,8 @@ class Reader(object): 'random_shapes', 'multi_scales', 'use_padded_im_info', + 'enable_multiscale_test', + 'num_scale', } bm_config = { key: value @@ -125,12 +127,15 @@ class Reader(object): return self._make_reader('TEST') @classmethod - def create(cls, mode, data_config, - transform_config, max_iter=-1, - my_source=None, ret_iter=True): + def create(cls, + mode, + data_config, + transform_config, + max_iter=-1, + my_source=None, + ret_iter=True): """ create a specific reader """ - reader = Reader({mode: data_config}, - {mode: transform_config}, max_iter) + reader = Reader({mode: data_config}, {mode: transform_config}, max_iter) if ret_iter: return reader._make_reader(mode, my_source) else: diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py b/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py index e082c2dd72b53bf2ea4601584f1b9428e8ef617f..bebce691d36ddb12141dd7bfdf81030ff8ed2d1f 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py +++ b/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py @@ -110,8 +110,11 @@ class ArrangeEvalRCNN(BaseOperator): (image, im_info, im_id, im_shape, gt_bbox, gt_class, difficult) """ - im = sample['image'] - keys = list(sample.keys()) + ims = [] + keys = sorted(list(sample.keys())) + for k in keys: + if 'image' in k: + ims.append(sample[k]) if 'im_info' in keys: im_info = sample['im_info'] else: @@ -127,7 +130,9 @@ class ArrangeEvalRCNN(BaseOperator): gt_bbox = sample['gt_bbox'] gt_class = sample['gt_class'] difficult = sample['difficult'] - outs = (im, im_info, im_id, im_shape, gt_bbox, gt_class, difficult) + remain_list = [im_info, im_id, im_shape, gt_bbox, gt_class, difficult] + ims.extend(remain_list) + outs = tuple(ims) return outs @@ -148,10 +153,13 @@ class ArrangeTestRCNN(BaseOperator): context: a dict which contains additional info. Returns: sample: a tuple containing the following items: - (image, im_info, im_id) + (image, im_info, im_id, im_shape) """ - im = sample['image'] - keys = list(sample.keys()) + ims = [] + keys = sorted(list(sample.keys())) + for k in keys: + if 'image' in k: + ims.append(sample[k]) if 'im_info' in keys: im_info = sample['im_info'] else: @@ -164,7 +172,9 @@ class ArrangeTestRCNN(BaseOperator): # bbox prediction needs im_info as input in format of [N, 3], # so im_shape is appended by 1 to match dimension. im_shape = np.array((h, w, 1), dtype=np.float32) - outs = (im, im_info, im_id, im_shape) + remain_list = [im_info, im_id, im_shape] + ims.extend(remain_list) + outs = tuple(ims) return outs diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py index 0a426e0812c39909597a19a53028daf1772ac2c0..b09998120ffc1e76e42299489f16bacc6424454c 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py +++ b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py @@ -121,6 +121,105 @@ class DecodeImage(BaseOperator): return sample +@register_op +class MultiscaleTestResize(BaseOperator): + def __init__(self, + origin_target_size=800, + origin_max_size=1333, + target_size=[], + max_size=2000, + interp=cv2.INTER_LINEAR, + use_flip=True): + """ + Rescale image to the each size in target size, and capped at max_size. + + Args: + origin_target_size(int): original target size of image's short side. + origin_max_size(int): original max size of image. + target_size (list): A list of target sizes of image's short side. + max_size (int): the max size of image. + interp (int): the interpolation method. + use_flip (bool): whether use flip augmentation. + """ + super(MultiscaleTestResize, self).__init__() + self.origin_target_size = int(origin_target_size) + self.origin_max_size = int(origin_max_size) + self.max_size = int(max_size) + self.interp = int(interp) + self.use_flip = use_flip + + if not isinstance(target_size, list): + raise TypeError( + "Type of target_size is invalid. Must be List, now is {}". + format(type(target_size))) + self.target_size = target_size + if not (isinstance(self.origin_target_size, int) and isinstance( + self.origin_max_size, int) and isinstance(self.max_size, int) + and isinstance(self.interp, int)): + raise TypeError("{}: input type is invalid.".format(self)) + + def __call__(self, sample, context=None): + """ Resize the image numpy for multi-scale test. + """ + origin_ims = {} + im = sample['image'] + if not isinstance(im, np.ndarray): + raise TypeError("{}: image type is not numpy.".format(self)) + if len(im.shape) != 3: + raise ImageError('{}: image is not 3-dimensional.'.format(self)) + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('{}: min size of image is 0'.format(self)) + base_name_list = ['image'] + origin_ims['image'] = im + if self.use_flip: + sample['flip_image'] = im[:, ::-1, :] + base_name_list.append('flip_image') + origin_ims['flip_image'] = sample['flip_image'] + im_info = [] + for base_name in base_name_list: + im_scale = float(self.origin_target_size) / float(im_size_min) + # Prevent the biggest axis from being more than max_size + if np.round(im_scale * im_size_max) > self.origin_max_size: + im_scale = float(self.origin_max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + + resize_w = np.round(im_scale_x * float(im_shape[1])) + resize_h = np.round(im_scale_y * float(im_shape[0])) + im_resize = cv2.resize( + origin_ims[base_name], + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info.extend([resize_h, resize_w, im_scale]) + sample[base_name] = im_resize + for i, size in enumerate(self.target_size): + im_scale = float(size) / float(im_size_min) + if np.round(im_scale * im_size_max) > self.max_size: + im_scale = float(self.max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + resize_w = np.round(im_scale_x * float(im_shape[1])) + resize_h = np.round(im_scale_y * float(im_shape[0])) + im_resize = cv2.resize( + origin_ims[base_name], + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info.extend([resize_h, resize_w, im_scale]) + name = base_name + '_scale_' + str(i) + sample[name] = im_resize + sample['im_info'] = np.array(im_info, dtype=np.float32) + return sample + + @register_op class ResizeImage(BaseOperator): def __init__(self, @@ -183,9 +282,12 @@ class ResizeImage(BaseOperator): resize_w = np.round(im_scale_x * float(im_shape[1])) resize_h = np.round(im_scale_y * float(im_shape[0])) - - sample['im_info'] = np.array( - [resize_h, resize_w, im_scale], dtype=np.float32) + im_info = [resize_h, resize_w, im_scale] + if 'im_info' in sample and sample['im_info'][2] != 1.: + sample['im_info'] = np.append( + list(sample['im_info']), im_info).astype(np.float32) + else: + sample['im_info'] = np.array(im_info).astype(np.float32) else: im_scale_x = float(selected_size) / float(im_shape[1]) im_scale_y = float(selected_size) / float(im_shape[0]) @@ -331,19 +433,21 @@ class NormalizeImage(BaseOperator): 1.(optional) Scale the image to [0,1] 2. Each pixel minus mean and is divided by std """ - im = sample['image'] - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - sample['image'] = im + for k in sample.keys(): + if 'image' in k: + im = sample[k] + im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + sample[k] = im return sample @@ -785,13 +889,15 @@ class Permute(BaseOperator): def __call__(self, sample, context=None): assert 'image' in sample, "image data not found" - im = sample['image'] - if self.channel_first: - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if self.to_bgr: - im = im[[2, 1, 0], :, :] - sample['image'] = im + for k in sample.keys(): + if 'image' in k: + im = sample[k] + if self.channel_first: + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if self.to_bgr: + im = im[[2, 1, 0], :, :] + sample[k] = im return sample diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py b/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py index e8e9c3823050a9a649c39f0ce410c227c3f3c1b3..d556160e5a485753fe2d68600d320fcda6c91496 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py +++ b/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py @@ -27,7 +27,9 @@ def build_post_map(coarsest_stride=1, is_padding=False, random_shapes=[], multi_scales=[], - use_padded_im_info=False): + use_padded_im_info=False, + enable_multiscale_test=False, + num_scale=1): """ Build a mapper for post-processing batches @@ -36,10 +38,13 @@ def build_post_map(coarsest_stride=1, { coarsest_stride (int): stride of the coarsest FPN level is_padding (bool): whether to padding in minibatch - random_shapes: (list of int): resize to image to random - shapes, [] for not resize. - multi_scales: (list of int): resize image by random - scales, [] for not resize. + random_shapes (list of int): resize to image to random shapes, + [] for not resize. + multi_scales (list of int): resize image by random scales, + [] for not resize. + use_padded_im_info (bool): whether to update im_info after padding + enable_multiscale_test (bool): whether to use multiscale test. + num_scale (int) : the number of scales for multiscale test. } Returns: a mapper function which accept one argument 'batch' and @@ -66,6 +71,33 @@ def build_post_map(coarsest_stride=1, padding_batch.append((padding_im, ) + data[1:]) return padding_batch + def padding_multiscale_test(batch_data): + if len(batch_data) != 1: + raise NotImplementedError( + "Batch size must be 1 when using multiscale test, but now batch size is {}". + format(len(batch_data))) + if coarsest_stride > 1: + padding_batch = [] + padding_images = [] + data = batch_data[0] + for i, input in enumerate(data): + if i < num_scale: + im_c, im_h, im_w = input.shape + max_h = int( + np.ceil(im_h / coarsest_stride) * coarsest_stride) + max_w = int( + np.ceil(im_w / coarsest_stride) * coarsest_stride) + padding_im = np.zeros( + (im_c, max_h, max_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = input + data[num_scale][3 * i:3 * i + 2] = [max_h, max_w] + padding_batch.append(padding_im) + else: + padding_batch.append(input) + return [tuple(padding_batch)] + # no need to padding + return batch_data + def random_shape(batch_data): # For YOLO: gt_bbox is normalized, is scale invariant. shape = np.random.choice(random_shapes) @@ -108,6 +140,8 @@ def build_post_map(coarsest_stride=1, batch_data = random_shape(batch_data) if len(multi_scales) > 0: batch_data = multi_scale_resize(batch_data) + if enable_multiscale_test: + batch_data = padding_multiscale_test(batch_data) except Exception as e: errmsg = "post-process failed with error: " + str(e) logger.warn(errmsg) diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py index ccfb16c27275322a54e206e50e0bcc2b1a2c89f9..f77ee62759bd9fe7e2b97669e0bdd82d1bddd9ad 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py @@ -80,21 +80,15 @@ class CascadeMaskRCNN(object): self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25] def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - assert mode in ['train', 'test'], \ - "only 'train' and 'test' mode is supported" - if mode == 'train': required_fields = [ 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info' ] else: required_fields = ['im_shape', 'im_info'] + self._input_check(required_fields, feed_vars) - for var in required_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) - + im = feed_vars['image'] if mode == 'train': gt_box = feed_vars['gt_box'] is_crowd = feed_vars['is_crowd'] @@ -199,55 +193,167 @@ class CascadeMaskRCNN(object): loss.update({'loss': total_loss}) return loss else: - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - roi_feat = self.roi_extractor(last_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) + mask_name = 'mask_pred' + mask_pred, bbox_pred = self.single_scale_eval( + body_feats, spatial_scale, im_info, mask_name, bbox_pred, + roi_feat_list, rcnn_pred_list, proposal_list, + feed_vars['im_shape']) + return {'bbox': bbox_pred, 'mask': mask_pred} + def build_multi_scale(self, feed_vars, mask_branch=False): + required_fields = ['image', 'im_info'] + self._input_check(required_fields, feed_vars) + + ims = [] + for k in feed_vars.keys(): + if 'image' in k: + ims.append(feed_vars[k]) + result = {} + + if not mask_branch: + assert 'im_shape' in feed_vars, \ + "{} has no im_shape field".format(feed_vars) + result.update(feed_vars) + + for i, im in enumerate(ims): + im_info = fluid.layers.slice( + input=feed_vars['im_info'], + axes=[1], + starts=[3 * i], + ends=[3 * i + 3]) + body_feats = self.backbone(im) + result.update(body_feats) + + # FPN + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') + if not mask_branch: + im_shape = feed_vars['im_shape'] + body_feat_names = list(body_feats.keys()) + proposal_list = [] + roi_feat_list = [] + rcnn_pred_list = [] + + proposals = None + bbox_pred = None + for i in range(3): + if i > 0: + refined_bbox = self._decode_box( + proposals, + bbox_pred, + curr_stage=i - 1, ) + else: + refined_bbox = rois + + proposals = refined_bbox + proposal_list.append(proposals) + + # extract roi features + roi_feat = self.roi_extractor(body_feats, proposals, + spatial_scale) + roi_feat_list.append(roi_feat) + + # bbox head + cls_score, bbox_pred = self.bbox_head.get_output( + roi_feat, + wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], + name='_' + str(i + 1) if i > 0 else '') + rcnn_pred_list.append((cls_score, bbox_pred)) + + # get mask rois + if self.fpn is None: + body_feat = body_feats[body_feat_names[-1]] + pred = self.bbox_head.get_prediction( + im_info, + im_shape, + roi_feat_list, + rcnn_pred_list, + proposal_list, + self.cascade_bbox_reg_weights, + return_box_score=True) + bbox_name = 'bbox_' + str(i) + score_name = 'score_' + str(i) + if 'flip' in im.name: + bbox_name += '_flip' + score_name += '_flip' + result[bbox_name] = pred['bbox'] + result[score_name] = pred['score'] + else: + mask_name = 'mask_pred_' + str(i) + bbox_pred = feed_vars['bbox'] + result.update({im.name: im}) + if 'flip' in im.name: + mask_name += '_flip' + bbox_pred = feed_vars['bbox_flip'] + mask_pred, bbox_pred = self.single_scale_eval( + body_feats, + spatial_scale, + im_info, + mask_name, + bbox_pred=bbox_pred, + use_multi_test=True) + result[mask_name] = mask_pred + return result + + def single_scale_eval(self, + body_feats, + spatial_scale, + im_info, + mask_name, + bbox_pred, + roi_feat_list=None, + rcnn_pred_list=None, + proposal_list=None, + im_shape=None, + use_multi_test=False): + if self.fpn is None: + last_feat = body_feats[list(body_feats.keys())[-1]] + if not use_multi_test: bbox_pred = self.bbox_head.get_prediction( - im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list, - proposal_list, self.cascade_bbox_reg_weights, - self.cls_agnostic_bbox_reg) - + im_info, im_shape, roi_feat_list, rcnn_pred_list, proposal_list, + self.cascade_bbox_reg_weights) bbox_pred = bbox_pred['bbox'] - # share weight - bbox_shape = fluid.layers.shape(bbox_pred) - bbox_size = fluid.layers.reduce_prod(bbox_shape) - bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) - size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') - cond = fluid.layers.less_than(x=bbox_size, y=size) - - mask_pred = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=False, - name='mask_pred') - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(cond): - fluid.layers.assign(input=bbox_pred, output=mask_pred) - with switch.default(): - bbox = fluid.layers.slice( - bbox_pred, [1], starts=[2], ends=[6]) - - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, bbox) - - mask_rois = bbox * im_scale - if self.fpn is None: - mask_feat = self.roi_extractor(last_feat, mask_rois) - mask_feat = self.bbox_head.get_head_feat(mask_feat) - else: - mask_feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) + # share weight + bbox_shape = fluid.layers.shape(bbox_pred) + bbox_size = fluid.layers.reduce_prod(bbox_shape) + bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) + size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') + cond = fluid.layers.less_than(x=bbox_size, y=size) + + mask_pred = fluid.layers.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=False, + name=mask_name) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(cond): + fluid.layers.assign(input=bbox_pred, output=mask_pred) + with switch.default(): + bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6]) - mask_out = self.mask_head.get_prediction(mask_feat, bbox) - fluid.layers.assign(input=mask_out, output=mask_pred) - return {'bbox': bbox_pred, 'mask': mask_pred} + im_scale = fluid.layers.slice( + im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, bbox) + + mask_rois = bbox * im_scale + if self.fpn is None: + mask_feat = self.roi_extractor(last_feat, mask_rois) + mask_feat = self.bbox_head.get_head_feat(mask_feat) + else: + mask_feat = self.roi_extractor( + body_feats, mask_rois, spatial_scale, is_mask=True) + + mask_out = self.mask_head.get_prediction(mask_feat, bbox) + fluid.layers.assign(input=mask_out, output=mask_pred) + return mask_pred, bbox_pred + + def _input_check(self, require_fields, feed_vars): + for var in require_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) def _decode_box(self, proposals, bbox_pred, curr_stage): rcnn_loc_delta_r = fluid.layers.reshape( @@ -269,7 +375,9 @@ class CascadeMaskRCNN(object): def train(self, feed_vars): return self.build(feed_vars, 'train') - def eval(self, feed_vars): + def eval(self, feed_vars, multi_scale=None, mask_branch=False): + if multi_scale: + return self.build_multi_scale(feed_vars, mask_branch) return self.build(feed_vars, 'test') def test(self, feed_vars): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py index 647d8bbc58b02782f96f0d5e2bcf63046b406d32..b80a8d7f62ccd13f632fe8124372156d656d2abc 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py @@ -74,16 +74,13 @@ class CascadeRCNN(object): self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25] def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - assert mode in ['train', 'test'], \ - "only 'train' and 'test' mode is supported" if mode == 'train': required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info'] else: required_fields = ['im_shape', 'im_info'] - for var in required_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) + self._input_check(required_fields, feed_vars) + + im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': @@ -171,6 +168,98 @@ class CascadeRCNN(object): self.cls_agnostic_bbox_reg) return pred + def build_multi_scale(self, feed_vars): + required_fields = ['image', 'im_shape', 'im_info'] + self._input_check(required_fields, feed_vars) + ims = [] + for k in feed_vars.keys(): + if 'image' in k: + ims.append(feed_vars[k]) + result = {} + result.update(feed_vars) + for i, im in enumerate(ims): + im_info = fluid.layers.slice( + input=feed_vars['im_info'], + axes=[1], + starts=[3 * i], + ends=[3 * i + 3]) + im_shape = feed_vars['im_shape'] + + # backbone + body_feats = self.backbone(im) + result.update(body_feats) + body_feat_names = list(body_feats.keys()) + + # FPN + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + + # rpn proposals + rpn_rois = self.rpn_head.get_proposals( + body_feats, im_info, mode='test') + + proposal_list = [] + roi_feat_list = [] + rcnn_pred_list = [] + + proposals = None + bbox_pred = None + for i in range(3): + if i > 0: + refined_bbox = self._decode_box( + proposals, + bbox_pred, + curr_stage=i - 1, ) + else: + refined_bbox = rpn_rois + + proposals = refined_bbox + proposal_list.append(proposals) + + # extract roi features + roi_feat = self.roi_extractor(body_feats, proposals, + spatial_scale) + roi_feat_list.append(roi_feat) + + # bbox head + cls_score, bbox_pred = self.bbox_head.get_output( + roi_feat, + wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], + name='_' + str(i + 1) if i > 0 else '') + rcnn_pred_list.append((cls_score, bbox_pred)) + + # get mask rois + rois = proposal_list[2] + + if self.fpn is None: + last_feat = body_feats[list(body_feats.keys())[-1]] + roi_feat = self.roi_extractor(last_feat, rois) + else: + roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) + + pred = self.bbox_head.get_prediction( + im_info, + im_shape, + roi_feat_list, + rcnn_pred_list, + proposal_list, + self.cascade_bbox_reg_weights, + self.cls_agnostic_bbox_reg, + return_box_score=True) + bbox_name = 'bbox_' + str(i) + score_name = 'score_' + str(i) + if 'flip' in im.name: + bbox_name += '_flip' + score_name += '_flip' + result[bbox_name] = pred['bbox'] + result[score_name] = pred['score'] + return result + + def _input_check(self, require_fields, feed_vars): + for var in require_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) + def _decode_box(self, proposals, bbox_pred, curr_stage): rcnn_loc_delta_r = fluid.layers.reshape( bbox_pred, (-1, self.cls_agnostic_bbox_reg, 4)) @@ -191,7 +280,9 @@ class CascadeRCNN(object): def train(self, feed_vars): return self.build(feed_vars, 'train') - def eval(self, feed_vars): + def eval(self, feed_vars, multi_scale=None): + if multi_scale: + return self.build_multi_scale(feed_vars) return self.build(feed_vars, 'test') def test(self, feed_vars): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py index 6985598670717df9f9fea580ee7922c2cb981acf..e0ef7355c0d358d7b409ce3080f2416cd38de0b3 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py @@ -63,6 +63,12 @@ class FasterRCNN(object): self.rpn_only = rpn_only def build(self, feed_vars, mode='train'): + if mode == 'train': + required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info'] + else: + required_fields = ['im_shape', 'im_info'] + self._input_check(required_fields, feed_vars) + im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': @@ -136,10 +142,62 @@ class FasterRCNN(object): im_shape) return pred + def build_multi_scale(self, feed_vars): + required_fields = ['image', 'im_info', 'im_shape'] + self._input_check(required_fields, feed_vars) + ims = [] + for k in feed_vars.keys(): + if 'image' in k: + ims.append(feed_vars[k]) + result = {} + result.update(feed_vars) + for i, im in enumerate(ims): + im_info = fluid.layers.slice( + input=feed_vars['im_info'], + axes=[1], + starts=[3 * i], + ends=[3 * i + 3]) + im_shape = feed_vars['im_shape'] + body_feats = self.backbone(im) + result.update(body_feats) + body_feat_names = list(body_feats.keys()) + + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + + rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') + + if self.fpn is None: + # in models without FPN, roi extractor only uses the last level of + # feature maps. And body_feat_names[-1] represents the name of + # last feature map. + body_feat = body_feats[body_feat_names[-1]] + roi_feat = self.roi_extractor(body_feat, rois) + else: + roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) + + pred = self.bbox_head.get_prediction( + roi_feat, rois, im_info, im_shape, return_box_score=True) + bbox_name = 'bbox_' + str(i) + score_name = 'score_' + str(i) + if 'flip' in im.name: + bbox_name += '_flip' + score_name += '_flip' + result[bbox_name] = pred['bbox'] + result[score_name] = pred['score'] + return result + + def _input_check(self, require_fields, feed_vars): + for var in require_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) + def train(self, feed_vars): return self.build(feed_vars, 'train') - def eval(self, feed_vars): + def eval(self, feed_vars, multi_scale=None): + if multi_scale: + return self.build_multi_scale(feed_vars) return self.build(feed_vars, 'test') def test(self, feed_vars): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py index 97eacbf07be7cb0fe3624fad9c7f90bb8721d26e..74631b95eedeb679a528dc12b753d8f5df96d54e 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py @@ -69,18 +69,14 @@ class MaskRCNN(object): self.fpn = fpn def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - assert mode in ['train', 'test'], \ - "only 'train' and 'test' mode is supported" if mode == 'train': required_fields = [ 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info' ] else: required_fields = ['im_shape', 'im_info'] - for var in required_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) + self._input_check(required_fields, feed_vars) + im = feed_vars['image'] im_info = feed_vars['im_info'] mixed_precision_enabled = mixed_precision_global_state() is not None @@ -153,57 +149,135 @@ class MaskRCNN(object): im_scale = fluid.layers.sequence_expand(im_scale, rois) rois = rois / im_scale return {'proposal': rois} - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - roi_feat = self.roi_extractor(last_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) + mask_name = 'mask_pred' + mask_pred, bbox_pred = self.single_scale_eval( + body_feats, mask_name, rois, im_info, feed_vars['im_shape'], + spatial_scale) + return {'bbox': bbox_pred, 'mask': mask_pred} + def build_multi_scale(self, feed_vars, mask_branch=False): + required_fields = ['image', 'im_info'] + self._input_check(required_fields, feed_vars) + + ims = [] + for k in feed_vars.keys(): + if 'image' in k: + ims.append(feed_vars[k]) + result = {} + + if not mask_branch: + assert 'im_shape' in feed_vars, \ + "{} has no im_shape field".format(feed_vars) + result.update(feed_vars) + + for i, im in enumerate(ims): + im_info = fluid.layers.slice( + input=feed_vars['im_info'], + axes=[1], + starts=[3 * i], + ends=[3 * i + 3]) + body_feats = self.backbone(im) + result.update(body_feats) + + # FPN + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') + if not mask_branch: + im_shape = feed_vars['im_shape'] + body_feat_names = list(body_feats.keys()) + if self.fpn is None: + body_feat = body_feats[body_feat_names[-1]] + roi_feat = self.roi_extractor(body_feat, rois) + else: + roi_feat = self.roi_extractor(body_feats, rois, + spatial_scale) + pred = self.bbox_head.get_prediction( + roi_feat, rois, im_info, im_shape, return_box_score=True) + bbox_name = 'bbox_' + str(i) + score_name = 'score_' + str(i) + if 'flip' in im.name: + bbox_name += '_flip' + score_name += '_flip' + result[bbox_name] = pred['bbox'] + result[score_name] = pred['score'] + else: + mask_name = 'mask_pred_' + str(i) + bbox_pred = feed_vars['bbox'] + result.update({im.name: im}) + if 'flip' in im.name: + mask_name += '_flip' + bbox_pred = feed_vars['bbox_flip'] + mask_pred, bbox_pred = self.single_scale_eval( + body_feats, mask_name, rois, im_info, feed_vars['im_shape'], + spatial_scale, bbox_pred) + result[mask_name] = mask_pred + return result + + def single_scale_eval(self, + body_feats, + mask_name, + rois, + im_info, + im_shape, + spatial_scale, + bbox_pred=None): + if self.fpn is None: + last_feat = body_feats[list(body_feats.keys())[-1]] + roi_feat = self.roi_extractor(last_feat, rois) + else: + roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) + if not bbox_pred: bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info, - feed_vars['im_shape']) + im_shape) bbox_pred = bbox_pred['bbox'] - # share weight - bbox_shape = fluid.layers.shape(bbox_pred) - bbox_size = fluid.layers.reduce_prod(bbox_shape) - bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) - size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') - cond = fluid.layers.less_than(x=bbox_size, y=size) - - mask_pred = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=False, - name='mask_pred') - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(cond): - fluid.layers.assign(input=bbox_pred, output=mask_pred) - with switch.default(): - bbox = fluid.layers.slice( - bbox_pred, [1], starts=[2], ends=[6]) - - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, bbox) - - mask_rois = bbox * im_scale - if self.fpn is None: - mask_feat = self.roi_extractor(last_feat, mask_rois) - mask_feat = self.bbox_head.get_head_feat(mask_feat) - else: - mask_feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) - - mask_out = self.mask_head.get_prediction(mask_feat, bbox) - fluid.layers.assign(input=mask_out, output=mask_pred) - return {'bbox': bbox_pred, 'mask': mask_pred} + # share weight + bbox_shape = fluid.layers.shape(bbox_pred) + bbox_size = fluid.layers.reduce_prod(bbox_shape) + bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) + size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') + cond = fluid.layers.less_than(x=bbox_size, y=size) + + mask_pred = fluid.layers.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=False, + name=mask_name) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(cond): + fluid.layers.assign(input=bbox_pred, output=mask_pred) + with switch.default(): + bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6]) + + im_scale = fluid.layers.slice( + im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, bbox) + + mask_rois = bbox * im_scale + if self.fpn is None: + mask_feat = self.roi_extractor(last_feat, mask_rois) + mask_feat = self.bbox_head.get_head_feat(mask_feat) + else: + mask_feat = self.roi_extractor( + body_feats, mask_rois, spatial_scale, is_mask=True) + + mask_out = self.mask_head.get_prediction(mask_feat, bbox) + fluid.layers.assign(input=mask_out, output=mask_pred) + return mask_pred, bbox_pred + + def _input_check(self, require_fields, feed_vars): + for var in require_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) def train(self, feed_vars): return self.build(feed_vars, 'train') - def eval(self, feed_vars): + def eval(self, feed_vars, multi_scale=None, mask_branch=False): + if multi_scale: + return self.build_multi_scale(feed_vars, mask_branch) return self.build(feed_vars, 'test') def test(self, feed_vars): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py index 823f7c2857622718281e7dda23f64144aeae2102..762df42d1c8e44588a50f4826638e6d218a3d14c 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py @@ -17,7 +17,7 @@ from __future__ import division from __future__ import print_function from collections import OrderedDict - +import copy from paddle import fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Xavier @@ -110,6 +110,7 @@ class FPN(object): their name. spatial_scale(list): A list of multiplicative spatial scale factor. """ + spatial_scale = copy.deepcopy(self.spatial_scale) body_name_list = list(body_dict.keys())[::-1] num_backbone_stages = len(body_name_list) self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] @@ -179,7 +180,7 @@ class FPN(object): fpn_dict[fpn_name] = fpn_output fpn_name_list.append(fpn_name) if not self.has_extra_convs and self.max_level - self.min_level == len( - self.spatial_scale): + spatial_scale): body_top_name = fpn_name_list[0] body_top_extension = fluid.layers.pool2d( fpn_dict[body_top_name], @@ -189,9 +190,9 @@ class FPN(object): name=body_top_name + '_subsampled_2x') fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) + spatial_scale.insert(0, spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(self.spatial_scale) - 1 + highest_backbone_level = self.min_level + len(spatial_scale) - 1 if self.has_extra_convs and self.max_level > highest_backbone_level: fpn_blob = body_dict[body_name_list[0]] for i in range(highest_backbone_level + 1, self.max_level + 1): @@ -215,6 +216,6 @@ class FPN(object): name=fpn_name) fpn_dict[fpn_name] = fpn_blob fpn_name_list.insert(0, fpn_name) - self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) + spatial_scale.insert(0, spatial_scale[0] * 0.5) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, self.spatial_scale + return res_dict, spatial_scale diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py b/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py index 1f16efe3f887eaf98e2318f5bc18a111fa5edefc..6b407cfbd82d1588cbc020b3e8bf5e7d5f4b930c 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py @@ -17,6 +17,7 @@ from __future__ import print_function from __future__ import division from collections import OrderedDict +from ppdet.data.transform.operators import * from paddle import fluid @@ -38,7 +39,7 @@ feed_var_def = [ # yapf: enable -def create_feed(feed, use_pyreader=True): +def create_feed(feed, use_pyreader=True, sub_prog_feed=False): image_shape = feed.image_shape feed_var_map = {var['name']: var for var in feed_var_def} feed_var_map['image'] = { @@ -60,6 +61,58 @@ def create_feed(feed, use_pyreader=True): feed_var_map['gt_box']['lod_level'] = 0 feed_var_map['is_difficult']['lod_level'] = 0 + base_name_list = ['image'] + num_scale = getattr(feed, 'num_scale', 1) + sample_transform = feed.sample_transforms + multiscale_test = False + aug_flip = False + for t in sample_transform: + if isinstance(t, MultiscaleTestResize): + multiscale_test = True + aug_flip = t.use_flip + assert (len(t.target_size)+1)*(aug_flip+1) == num_scale, \ + "num_scale: {} is not equal to the actual number of scale: {}."\ + .format(num_scale, (len(t.target_size)+1)*(aug_flip+1)) + break + + if aug_flip: + num_scale //= 2 + base_name_list.insert(0, 'flip_image') + feed_var_map['flip_image'] = { + 'name': 'flip_image', + 'shape': image_shape, + 'dtype': 'float32', + 'lod_level': 0 + } + + image_name_list = [] + if multiscale_test: + for base_name in base_name_list: + for i in range(0, num_scale): + name = base_name if i == 0 else base_name + '_scale_' + str(i - + 1) + feed_var_map[name] = { + 'name': name, + 'shape': image_shape, + 'dtype': 'float32', + 'lod_level': 0 + } + image_name_list.append(name) + feed_var_map['im_info']['shape'] = [feed.num_scale * 3] + feed.fields = image_name_list + feed.fields[1:] + if sub_prog_feed: + box_names = ['bbox', 'bbox_flip'] + for box_name in box_names: + sub_prog_feed = { + 'name': box_name, + 'shape': [6], + 'dtype': 'float32', + 'lod_level': 1 + } + + feed.fields = feed.fields + [box_name] + feed_var_map[box_name] = sub_prog_feed + feed_vars = OrderedDict([(key, fluid.layers.data( name=feed_var_map[key]['name'], shape=feed_var_map[key]['shape'], diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py index 16a7db529dfd8f0740ceae04210a86a9b3e6ac35..314aeb6087e05aa1c33d26b6b838075523ff2b10 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py @@ -280,7 +280,12 @@ class BBoxHead(object): loss_bbox = fluid.layers.reduce_mean(loss_bbox) return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - def get_prediction(self, roi_feat, rois, im_info, im_shape): + def get_prediction(self, + roi_feat, + rois, + im_info, + im_shape, + return_box_score=False): """ Get prediction bounding box in test stage. @@ -308,5 +313,7 @@ class BBoxHead(object): bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred) cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) + if return_box_score: + return {'bbox': cliped_box, 'score': cls_prob} pred_result = self.nms(bboxes=cliped_box, scores=cls_prob) return {'bbox': pred_result} diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py index 2068b1d8d73c2e4a98143ce38bfecfbee79a7c35..d36ff4c7541d8825e9491e696f787e10ef95b97e 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py @@ -146,7 +146,8 @@ class CascadeBBoxHead(object): rcnn_pred_list, proposal_list, cascade_bbox_reg_weights, - cls_agnostic_bbox_reg=2): + cls_agnostic_bbox_reg=2, + return_box_score=False): """ Get prediction bounding box in test stage. : @@ -214,7 +215,8 @@ class CascadeBBoxHead(object): axis=1) box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - + if return_box_score: + return {'bbox': box_out, 'score': boxes_cls_prob_mean} pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) return {"bbox": pred_result} diff --git a/PaddleCV/PaddleDetection/ppdet/utils/cli.py b/PaddleCV/PaddleDetection/ppdet/utils/cli.py index 1bec22894bbca2e87a6de75fc03c699ef31e89ce..b8ba59d78f1ddf606012fd0cf6d71a71d79eea05 100644 --- a/PaddleCV/PaddleDetection/ppdet/utils/cli.py +++ b/PaddleCV/PaddleDetection/ppdet/utils/cli.py @@ -16,7 +16,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter import yaml import re -from ppdet.core.workspace import get_registered_modules +from ppdet.core.workspace import get_registered_modules, dump_value __all__ = ['ColorTTY', 'ArgsParser'] diff --git a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py index 2af875e45ca19f7ac5af901eb4733321b52299fe..dbded30197b46e12282d50991406a6585c0cb572 100644 --- a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py +++ b/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py @@ -24,6 +24,7 @@ import time import paddle.fluid as fluid from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval +from ppdet.utils.post_process import mstest_box_post_process, mstest_mask_post_process, box_flip __all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results'] @@ -57,7 +58,52 @@ def parse_fetches(fetches, prog=None, extra_keys=None): return keys, values, cls -def eval_run(exe, compile_program, pyreader, keys, values, cls): +def length2lod(length_lod): + offset_lod = [0] + for i in length_lod: + offset_lod.append(offset_lod[-1] + i) + return [offset_lod] + + +def get_sub_feed(input, place): + new_dict = {} + res_feed = {} + key_name = ['bbox', 'im_info', 'im_id', 'im_shape', 'bbox_flip'] + for k in key_name: + if k in input.keys(): + new_dict[k] = input[k] + for k in input.keys(): + if 'image' in k: + new_dict[k] = input[k] + for k, v in new_dict.items(): + data_t = fluid.LoDTensor() + data_t.set(v[0], place) + if 'bbox' in k: + lod = length2lod(v[1][0]) + data_t.set_lod(lod) + res_feed[k] = data_t + return res_feed + + +def clean_res(result, keep_name_list): + clean_result = {} + for k in result.keys(): + if k in keep_name_list: + clean_result[k] = result[k] + result.clear() + return clean_result + + +def eval_run(exe, + compile_program, + pyreader, + keys, + values, + cls, + cfg=None, + sub_prog=None, + sub_keys=None, + sub_values=None): """ Run evaluation program, return program outputs. """ @@ -84,6 +130,28 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls): k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } + multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) + mask_multi_scale_test = multi_scale_test and 'Mask' in cfg.architecture + + if multi_scale_test: + post_res = mstest_box_post_process(res, cfg) + res.update(post_res) + if mask_multi_scale_test: + place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() + sub_feed = get_sub_feed(res, place) + sub_prog_outs = exe.run(sub_prog, + feed=sub_feed, + fetch_list=sub_values, + return_numpy=False) + sub_prog_res = { + k: (np.array(v), v.recursive_sequence_lengths()) + for k, v in zip(sub_keys, sub_prog_outs) + } + post_res = mstest_mask_post_process(sub_prog_res, cfg) + res.update(post_res) + if multi_scale_test: + res = clean_res( + res, ['im_info', 'bbox', 'im_id', 'im_shape', 'mask']) results.append(res) if iter_id % 100 == 0: logger.info('Test iter {}'.format(iter_id)) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/post_process.py b/PaddleCV/PaddleDetection/ppdet/utils/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..cc80bc186626c4e983328ffd53f53d217230a244 --- /dev/null +++ b/PaddleCV/PaddleDetection/ppdet/utils/post_process.py @@ -0,0 +1,212 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import numpy as np + +import paddle.fluid as fluid + +__all__ = ['nms'] + +logger = logging.getLogger(__name__) + + +def box_flip(boxes, im_shape): + im_width = im_shape[0][1] + flipped_boxes = boxes.copy() + + flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1 + flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1 + return flipped_boxes + + +def nms(dets, thresh): + """Apply classic DPM-style greedy NMS.""" + if dets.shape[0] == 0: + return [] + scores = dets[:, 0] + x1 = dets[:, 1] + y1 = dets[:, 2] + x2 = dets[:, 3] + y2 = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + ndets = dets.shape[0] + suppressed = np.zeros((ndets), dtype=np.int) + + # nominal indices + # _i, _j + # sorted indices + # i, j + # temp variables for box i's (the box currently under consideration) + # ix1, iy1, ix2, iy2, iarea + + # variables for computing overlap with box j (lower scoring box) + # xx1, yy1, xx2, yy2 + # w, h + # inter, ovr + + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return np.where(suppressed == 0)[0] + + +def bbox_area(box): + w = box[2] - box[0] + 1 + h = box[3] - box[1] + 1 + return w * h + + +def bbox_overlaps(x, y): + N = x.shape[0] + K = y.shape[0] + overlaps = np.zeros((N, K), dtype=np.float32) + for k in range(K): + y_area = bbox_area(y[k]) + for n in range(N): + iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1 + if iw > 0: + ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1 + if ih > 0: + x_area = bbox_area(x[n]) + ua = x_area + y_area - iw * ih + overlaps[n, k] = iw * ih / ua + return overlaps + + +def box_voting(nms_dets, dets, vote_thresh): + top_dets = nms_dets.copy() + top_boxes = nms_dets[:, 1:] + all_boxes = dets[:, 1:] + all_scores = dets[:, 0] + top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes) + for k in range(nms_dets.shape[0]): + inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0] + boxes_to_vote = all_boxes[inds_to_vote, :] + ws = all_scores[inds_to_vote] + top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws) + + return top_dets + + +def get_nms_result(boxes, scores, cfg): + cls_boxes = [[] for _ in range(cfg.num_classes)] + for j in range(1, cfg.num_classes): + inds = np.where(scores[:, j] > cfg.MultiScaleTEST['score_thresh'])[0] + scores_j = scores[inds, j] + boxes_j = boxes[inds, j * 4:(j + 1) * 4] + dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype( + np.float32, copy=False) + keep = nms(dets_j, cfg.MultiScaleTEST['nms_thresh']) + nms_dets = dets_j[keep, :] + if cfg.MultiScaleTEST['enable_voting']: + nms_dets = box_voting(nms_dets, dets_j, + cfg.MultiScaleTEST['vote_thresh']) + #add labels + label = np.array([j for _ in range(len(keep))]) + nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( + np.float32, copy=False) + cls_boxes[j] = nms_dets + # Limit to max_per_image detections **over all classes** + image_scores = np.hstack( + [cls_boxes[j][:, 1] for j in range(1, cfg.num_classes)]) + if len(image_scores) > cfg.MultiScaleTEST['detections_per_im']: + image_thresh = np.sort(image_scores)[-cfg.MultiScaleTEST[ + 'detections_per_im']] + for j in range(1, cfg.num_classes): + keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] + cls_boxes[j] = cls_boxes[j][keep, :] + + im_results = np.vstack([cls_boxes[j] for j in range(1, cfg.num_classes)]) + return im_results + + +def mstest_box_post_process(result, cfg): + """ + Multi-scale Test + Only available for batch_size=1 now. + """ + post_bbox = {} + use_flip = False + ms_boxes = [] + ms_scores = [] + im_shape = result['im_shape'][0] + for k in result.keys(): + if 'bbox' in k: + boxes = result[k][0] + boxes = np.reshape(boxes, (-1, 4 * cfg.num_classes)) + scores = result['score' + k[4:]][0] + if 'flip' in k: + boxes = box_flip(boxes, im_shape) + use_flip = True + ms_boxes.append(boxes) + ms_scores.append(scores) + + ms_boxes = np.concatenate(ms_boxes) + ms_scores = np.concatenate(ms_scores) + bbox_pred = get_nms_result(ms_boxes, ms_scores, cfg) + post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])}) + if use_flip: + bbox = bbox_pred[:, 2:] + bbox_flip = np.append( + bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1) + post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])}) + return post_bbox + + +def mstest_mask_post_process(result, cfg): + mask_list = [] + im_shape = result['im_shape'][0] + M = cfg.FPNRoIAlign['mask_resolution'] + for k in result.keys(): + if 'mask' in k: + masks = result[k][0] + if len(masks.shape) != 4: + masks = np.zeros((0, M, M)) + mask_list.append(masks) + continue + if 'flip' in k: + masks = masks[:, :, :, ::-1] + mask_list.append(masks) + + mask_pred = np.mean(mask_list, axis=0) + return {'mask': (mask_pred, [[len(mask_pred)]])} diff --git a/PaddleCV/PaddleDetection/tools/configure.py b/PaddleCV/PaddleDetection/tools/configure.py index 45b297116a1eaa787e72b8d191245ff70dfef3dd..560d161513ae8f0115d8d3d5f97f6a0695642015 100644 --- a/PaddleCV/PaddleDetection/tools/configure.py +++ b/PaddleCV/PaddleDetection/tools/configure.py @@ -19,7 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter import yaml -from ppdet.core.workspace import get_registered_modules, load_config +from ppdet.core.workspace import get_registered_modules, load_config, dump_value from ppdet.utils.cli import ColorTTY, print_total_cfg color_tty = ColorTTY() @@ -43,18 +43,6 @@ MISC_CONFIG = { } -def dump_value(value): - # XXX this is hackish, but collections.abc is not available in python 2 - if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)): - value = yaml.dump(value, default_flow_style=True) - value = value.replace('\n', '') - value = value.replace('...', '') - return "'{}'".format(value) - else: - # primitive types - return str(value) - - def dump_config(module, minimal=False): args = module.schema.values() if minimal: diff --git a/PaddleCV/PaddleDetection/tools/eval.py b/PaddleCV/PaddleDetection/tools/eval.py index 4c941863decf7ce23383d5de03b179aae523769a..fabce5abeecaf9f64549edf1c06e3df2eae4f704 100644 --- a/PaddleCV/PaddleDetection/tools/eval.py +++ b/PaddleCV/PaddleDetection/tools/eval.py @@ -59,7 +59,6 @@ def main(): raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) - # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) print_total_cfg(cfg) @@ -69,6 +68,8 @@ def main(): else: eval_feed = create(cfg.eval_feed) + multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) + # define executor place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) @@ -80,9 +81,8 @@ def main(): with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): pyreader, feed_vars = create_feed(eval_feed) - fetches = model.eval(feed_vars) + fetches = model.eval(feed_vars, multi_scale_test) eval_prog = eval_prog.clone(True) - reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) pyreader.decorate_sample_list_generator(reader, place) @@ -120,7 +120,32 @@ def main(): callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() - results = eval_run(exe, compile_program, pyreader, keys, values, cls) + sub_eval_prog = None + sub_keys = None + sub_values = None + # build sub-program + if 'Mask' in main_arch and multi_scale_test: + sub_eval_prog = fluid.Program() + with fluid.program_guard(sub_eval_prog, startup_prog): + with fluid.unique_name.guard(): + _, feed_vars = create_feed( + eval_feed, use_pyreader=False, sub_prog_feed=True) + sub_fetches = model.eval( + feed_vars, multi_scale_test, mask_branch=True) + extra_keys = [] + if cfg.metric == 'COCO': + extra_keys = ['im_id', 'im_shape'] + if cfg.metric == 'VOC': + extra_keys = ['gt_box', 'gt_label', 'is_difficult'] + sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog, + extra_keys) + sub_eval_prog = sub_eval_prog.clone(True) + + if 'weights' in cfg: + checkpoint.load_params(exe, sub_eval_prog, cfg.weights) + + results = eval_run(exe, compile_program, pyreader, keys, values, cls, cfg, + sub_eval_prog, sub_keys, sub_values) # evaluation resolution = None diff --git a/PaddleCV/PaddleDetection/tools/train.py b/PaddleCV/PaddleDetection/tools/train.py index 76caebdb0ae9a128908511fc5840d9669a230c4f..1a6250901d0db1a09cc098aa0fa79edcb5bc3726 100644 --- a/PaddleCV/PaddleDetection/tools/train.py +++ b/PaddleCV/PaddleDetection/tools/train.py @@ -73,9 +73,13 @@ def main(): raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) + if 'log_iter' not in cfg: cfg.log_iter = 20 + if 'multi_scale_test' not in cfg: + cfg.multi_scale_test = False + ignore_params = cfg.finetune_exclude_pretrained_params \ if 'finetune_exclude_pretrained_params' in cfg else [] @@ -140,7 +144,7 @@ def main(): with fluid.unique_name.guard(): model = create(main_arch) eval_pyreader, feed_vars = create_feed(eval_feed) - fetches = model.eval(feed_vars) + fetches = model.eval(feed_vars, cfg.multi_scale_test) eval_prog = eval_prog.clone(True) eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)