From 26e7c73edc660d722e2ac2b042879b9185f17d82 Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Fri, 18 Dec 2020 14:56:30 +0800 Subject: [PATCH] remove fields config in input_def (#1921) * remove fields config in input_def --- .../datasets/{coco.yml => coco_detection.yml} | 1 + configs/_base_/datasets/coco_instance.yml | 19 +++ configs/_base_/datasets/voc.yml | 2 + configs/_base_/optimizers/ssd_240e.yml | 21 ++++ configs/_base_/readers/faster_fpn_reader.yml | 8 +- configs/_base_/readers/faster_reader.yml | 8 +- configs/_base_/readers/mask_fpn_reader.yml | 8 +- configs/_base_/readers/mask_reader.yml | 8 +- configs/_base_/readers/ssd_reader.yml | 4 - configs/_base_/readers/yolov3_reader.yml | 4 - configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml | 2 +- configs/cascade_rcnn_r50_fpn_1x_coco.yml | 2 +- configs/faster_rcnn_r50_1x_coco.yml | 2 +- configs/faster_rcnn_r50_fpn_1x_coco.yml | 2 +- configs/mask_rcnn_r50_1x_coco.yml | 2 +- configs/mask_rcnn_r50_fpn_1x_coco.yml | 2 +- configs/ssd_vgg16_300_120e_coco.yml | 7 -- configs/yolov3_darknet53_270e_coco.yml | 2 +- ppdet/data/reader.py | 115 +++++++++--------- ppdet/data/source/coco.py | 29 +++-- ppdet/data/source/dataset.py | 17 +-- ppdet/data/source/voc.py | 16 ++- ppdet/data/transform/batch_operator.py | 5 + ppdet/data/transform/operator.py | 12 +- tools/infer.py | 8 +- 25 files changed, 161 insertions(+), 145 deletions(-) rename configs/_base_/datasets/{coco.yml => coco_detection.yml} (85%) create mode 100644 configs/_base_/datasets/coco_instance.yml create mode 100644 configs/_base_/optimizers/ssd_240e.yml delete mode 100644 configs/ssd_vgg16_300_120e_coco.yml diff --git a/configs/_base_/datasets/coco.yml b/configs/_base_/datasets/coco_detection.yml similarity index 85% rename from configs/_base_/datasets/coco.yml rename to configs/_base_/datasets/coco_detection.yml index 6f8af0739..7a62c3b0b 100644 --- a/configs/_base_/datasets/coco.yml +++ b/configs/_base_/datasets/coco_detection.yml @@ -6,6 +6,7 @@ TrainDataset: image_dir: train2017 anno_path: annotations/instances_train2017.json dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] EvalDataset: !COCODataSet diff --git a/configs/_base_/datasets/coco_instance.yml b/configs/_base_/datasets/coco_instance.yml new file mode 100644 index 000000000..5eaf76791 --- /dev/null +++ b/configs/_base_/datasets/coco_instance.yml @@ -0,0 +1,19 @@ +metric: COCO +num_classes: 80 + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd'] + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json diff --git a/configs/_base_/datasets/voc.yml b/configs/_base_/datasets/voc.yml index de4d78eda..1240c80d7 100644 --- a/configs/_base_/datasets/voc.yml +++ b/configs/_base_/datasets/voc.yml @@ -6,12 +6,14 @@ TrainDataset: dataset_dir: dataset/voc anno_path: trainval.txt label_list: label_list.txt + data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] EvalDataset: !VOCDataSet dataset_dir: dataset/voc anno_path: test.txt label_list: label_list.txt + data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] TestDataset: !ImageFolder diff --git a/configs/_base_/optimizers/ssd_240e.yml b/configs/_base_/optimizers/ssd_240e.yml new file mode 100644 index 000000000..de31eac3d --- /dev/null +++ b/configs/_base_/optimizers/ssd_240e.yml @@ -0,0 +1,21 @@ +epoch: 240 + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 160 + - 200 + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/_base_/readers/faster_fpn_reader.yml b/configs/_base_/readers/faster_fpn_reader.yml index 348df9f97..8a8fb1ebb 100644 --- a/configs/_base_/readers/faster_fpn_reader.yml +++ b/configs/_base_/readers/faster_fpn_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: { } - RandomFlipImage: {prob: 0.5} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } diff --git a/configs/_base_/readers/faster_reader.yml b/configs/_base_/readers/faster_reader.yml index 0778729d7..5131d0360 100644 --- a/configs/_base_/readers/faster_reader.yml +++ b/configs/_base_/readers/faster_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: { } - RandomFlipImage: {prob: 0.5} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } diff --git a/configs/_base_/readers/mask_fpn_reader.yml b/configs/_base_/readers/mask_fpn_reader.yml index 3dcd8464c..1d73c7f31 100644 --- a/configs/_base_/readers/mask_fpn_reader.yml +++ b/configs/_base_/readers/mask_fpn_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: {} - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} diff --git a/configs/_base_/readers/mask_reader.yml b/configs/_base_/readers/mask_reader.yml index 7281edd6d..9ae4ef98e 100644 --- a/configs/_base_/readers/mask_reader.yml +++ b/configs/_base_/readers/mask_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: {} - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} diff --git a/configs/_base_/readers/ssd_reader.yml b/configs/_base_/readers/ssd_reader.yml index f493c4022..5f692e955 100644 --- a/configs/_base_/readers/ssd_reader.yml +++ b/configs/_base_/readers/ssd_reader.yml @@ -1,7 +1,6 @@ worker_num: 2 TrainReader: inputs_def: - fields: ['image', 'gt_bbox', 'gt_class'] num_max_boxes: 90 sample_transforms: @@ -24,8 +23,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id', 'gt_bbox', 'gt_class', 'difficult'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1} @@ -37,7 +34,6 @@ EvalReader: TestReader: inputs_def: image_shape: [3, 300, 300] - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1} diff --git a/configs/_base_/readers/yolov3_reader.yml b/configs/_base_/readers/yolov3_reader.yml index 594ee2ad7..0668b9cb9 100644 --- a/configs/_base_/readers/yolov3_reader.yml +++ b/configs/_base_/readers/yolov3_reader.yml @@ -1,7 +1,6 @@ worker_num: 2 TrainReader: inputs_def: - fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_shape', 'scale_factor'] num_max_boxes: 50 sample_transforms: - DecodeOp: {} @@ -26,13 +25,11 @@ TrainReader: EvalReader: inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] num_max_boxes: 50 sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2} - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - - PadBoxOp: {num_max_boxes: 50} - PermuteOp: {} batch_size: 1 drop_empty: false @@ -40,7 +37,6 @@ EvalReader: TestReader: inputs_def: image_shape: [3, 608, 608] - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2} diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml index 5995d04b5..b38bd32f5 100644 --- a/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml +++ b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/cascade_mask_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/cascade_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_rcnn_r50_fpn_1x_coco.yml index adcd2230e..1f7b471d3 100644 --- a/configs/cascade_rcnn_r50_fpn_1x_coco.yml +++ b/configs/cascade_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/cascade_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/faster_rcnn_r50_1x_coco.yml b/configs/faster_rcnn_r50_1x_coco.yml index 1e97e7e63..f44dc9a87 100644 --- a/configs/faster_rcnn_r50_1x_coco.yml +++ b/configs/faster_rcnn_r50_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/faster_rcnn_r50.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/faster_rcnn_r50_fpn_1x_coco.yml b/configs/faster_rcnn_r50_fpn_1x_coco.yml index 2120358f5..7ae27da7f 100644 --- a/configs/faster_rcnn_r50_fpn_1x_coco.yml +++ b/configs/faster_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/faster_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/mask_rcnn_r50_1x_coco.yml b/configs/mask_rcnn_r50_1x_coco.yml index 4d7eaaae9..50e48272a 100644 --- a/configs/mask_rcnn_r50_1x_coco.yml +++ b/configs/mask_rcnn_r50_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/mask_rcnn_r50.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/mask_rcnn_r50_fpn_1x_coco.yml b/configs/mask_rcnn_r50_fpn_1x_coco.yml index 3332b43f3..b881e14cd 100644 --- a/configs/mask_rcnn_r50_fpn_1x_coco.yml +++ b/configs/mask_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/mask_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/ssd_vgg16_300_120e_coco.yml b/configs/ssd_vgg16_300_120e_coco.yml deleted file mode 100644 index a94e3cf3e..000000000 --- a/configs/ssd_vgg16_300_120e_coco.yml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: [ - './_base_/models/ssd_vgg16_300.yml', - './_base_/optimizers/ssd_120e.yml', - './_base_/datasets/coco.yml', - './_base_/readers/ssd_reader.yml', - './_base_/runtime.yml', -] diff --git a/configs/yolov3_darknet53_270e_coco.yml b/configs/yolov3_darknet53_270e_coco.yml index b31e7e8af..08ea8bad7 100644 --- a/configs/yolov3_darknet53_270e_coco.yml +++ b/configs/yolov3_darknet53_270e_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/yolov3_darknet53.yml', './_base_/optimizers/yolov3_270e.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/yolov3_reader.yml', './_base_/runtime.yml', ] diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index 54eabce7a..f267d8aef 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -16,6 +16,7 @@ import copy import traceback import six import sys +import multiprocessing as mp if sys.version_info >= (3, 0): import queue as Queue else: @@ -27,45 +28,42 @@ from paddle.io import DistributedBatchSampler from ppdet.core.workspace import register, serializable, create from . import transform -from .transform import operator, batch_operator from ppdet.utils.logger import setup_logger logger = setup_logger('reader') class Compose(object): - def __init__(self, transforms, fields=None, from_=transform, - num_classes=81): + def __init__(self, transforms, num_classes=81): self.transforms = transforms self.transforms_cls = [] - output_fields = None for t in self.transforms: for k, v in t.items(): - op_cls = getattr(from_, k) + op_cls = getattr(transform, k) self.transforms_cls.append(op_cls(**v)) if hasattr(op_cls, 'num_classes'): op_cls.num_classes = num_classes - # TODO: should be refined in the future - if op_cls in [ - transform.Gt2YoloTargetOp, transform.Gt2YoloTarget - ]: - output_fields = ['image', 'gt_bbox'] - output_fields.extend([ - 'target{}'.format(i) - for i in range(len(v['anchor_masks'])) - ]) + def __call__(self, data): + for f in self.transforms_cls: + try: + data = f(data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warn("fail to map op [{}] with error: {} and stack:\n{}". + format(f, e, str(stack_info))) + raise e + + return data - self.fields = fields - self.output_fields = output_fields if output_fields else fields - def __call__(self, data): - if self.fields is not None: - data_new = [] - for item in data: - data_new.append(dict(zip(self.fields, item))) - data = data_new +class BatchCompose(Compose): + def __init__(self, transforms, num_classes=81): + super(BatchCompose, self).__init__(transforms, num_classes) + self.output_fields = mp.Manager().list([]) + self.lock = mp.Lock() + def __call__(self, data): for f in self.transforms_cls: try: data = f(data) @@ -75,23 +73,27 @@ class Compose(object): format(f, e, str(stack_info))) raise e - if self.output_fields is not None: - data_new = [] - for item in data: - batch = [] - for k in self.output_fields: - batch.append(item[k]) - data_new.append(batch) - batch_size = len(data_new) - data_new = list(zip(*data_new)) - if batch_size > 1: - data = [ - np.array(item).astype(item[0].dtype) for item in data_new - ] - else: - data = data_new - - return data + # parse output fields by first sample + # **this shoule be fixed if paddle.io.DataLoader support** + # For paddle.io.DataLoader not support dict currently, + # we need to parse the key from the first sample, + # BatchCompose.__call__ will be called in each worker + # process, so lock is need here. + if len(self.output_fields) == 0: + self.lock.acquire() + if len(self.output_fields) == 0: + for k, v in data[0].items(): + # FIXME(dkp): for more elegent coding + if k not in ['flipped', 'h', 'w']: + self.output_fields.append(k) + self.lock.release() + + data = [[data[i][k] for k in self.output_fields] + for i in range(len(data))] + data = list(zip(*data)) + + batch_data = [np.stack(d, axis=0) for d in data] + return batch_data class BaseDataLoader(object): @@ -99,8 +101,8 @@ class BaseDataLoader(object): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=False, @@ -108,21 +110,12 @@ class BaseDataLoader(object): num_classes=81, with_background=True, **kwargs): - # out fields - self._fields = inputs_def['fields'] if inputs_def else None # sample transform self._sample_transforms = Compose( sample_transforms, num_classes=num_classes) # batch transfrom - self._batch_transforms = None - if batch_transforms: - self._batch_transforms = Compose(batch_transforms, - copy.deepcopy(self._fields), - transform, num_classes) - self.output_fields = self._batch_transforms.output_fields - else: - self.output_fields = self._fields + self._batch_transforms = BatchCompose(batch_transforms, num_classes) self.batch_size = batch_size self.shuffle = shuffle @@ -139,8 +132,7 @@ class BaseDataLoader(object): self.dataset = dataset self.dataset.parse_dataset(self.with_background) # get data - self.dataset.set_out(self._sample_transforms, - copy.deepcopy(self._fields)) + self.dataset.set_transform(self._sample_transforms) # set kwargs self.dataset.set_kwargs(**self.kwargs) # batch sampler @@ -177,7 +169,10 @@ class BaseDataLoader(object): # data structure in paddle.io.DataLoader try: data = next(self.loader) - return {k: v for k, v in zip(self.output_fields, data)} + return { + k: v + for k, v in zip(self._batch_transforms.output_fields, data) + } except StopIteration: self.loader = iter(self.dataloader) six.reraise(*sys.exc_info()) @@ -191,8 +186,8 @@ class BaseDataLoader(object): class TrainReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=True, drop_last=True, @@ -210,8 +205,8 @@ class TrainReader(BaseDataLoader): class EvalReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=True, @@ -229,8 +224,8 @@ class EvalReader(BaseDataLoader): class TestReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=False, diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index f21ee46ab..394ffdc86 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -28,9 +28,10 @@ class COCODataSet(DetDataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1): super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path, - sample_num) + data_fields, sample_num) self.load_image_only = False self.load_semantic = False @@ -82,13 +83,6 @@ class COCODataSet(DetDataset): im_w, im_h, img_id)) continue - coco_rec = { - 'im_file': im_path, - 'im_id': np.array([img_id]), - 'h': im_h, - 'w': im_w, - } - if not self.load_image_only: ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) instances = coco.loadAnns(ins_anno_ids) @@ -121,7 +115,6 @@ class COCODataSet(DetDataset): gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_class = np.zeros((num_bbox, 1), dtype=np.int32) - gt_score = np.ones((num_bbox, 1), dtype=np.float32) is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) difficult = np.zeros((num_bbox, 1), dtype=np.int32) gt_poly = [None] * num_bbox @@ -142,15 +135,25 @@ class COCODataSet(DetDataset): if has_segmentation and not any(gt_poly): continue - coco_rec.update({ + coco_rec = { + 'im_file': im_path, + 'im_id': np.array([img_id]), + 'h': im_h, + 'w': im_w, + } if 'image' in self.data_fields else {} + + gt_rec = { 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, - 'gt_score': gt_score, 'gt_poly': gt_poly, - }) + } + for k, v in gt_rec.items(): + if k in self.data_fields: + coco_rec[k] = v + # TODO: remove load_semantic - if self.load_semantic: + if self.load_semantic and 'semantic' in self.data_fields: seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps', 'train2017', im_fname[:-3] + 'png') coco_rec.update({'semantic': seg_path}) diff --git a/ppdet/data/source/dataset.py b/ppdet/data/source/dataset.py index 0b1aa42c2..e948f176b 100644 --- a/ppdet/data/source/dataset.py +++ b/ppdet/data/source/dataset.py @@ -31,6 +31,7 @@ class DetDataset(Dataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1, use_default_label=None, **kwargs): @@ -38,6 +39,7 @@ class DetDataset(Dataset): self.dataset_dir = dataset_dir if dataset_dir is not None else '' self.anno_path = anno_path self.image_dir = image_dir if image_dir is not None else '' + self.data_fields = data_fields self.sample_num = sample_num self.use_default_label = use_default_label self._epoch = 0 @@ -63,26 +65,19 @@ class DetDataset(Dataset): for _ in range(3) ] - # data augment - roidb = self.transform(roidb) - # data item - out = OrderedDict() - for k in self.fields: - out[k] = roidb[k] - return out.values() + return self.transform(roidb) def set_kwargs(self, **kwargs): self.mixup_epoch = kwargs.get('mixup_epoch', -1) self.cutmix_epoch = kwargs.get('cutmix_epoch', -1) self.mosaic_epoch = kwargs.get('mosaic_epoch', -1) + def set_transform(self, transform): + self.transform = transform + def set_epoch(self, epoch_id): self._epoch = epoch_id - def set_out(self, sample_transform, fields): - self.transform = sample_transform - self.fields = fields - def parse_dataset(self, with_background=True): raise NotImplemented( "Need to implement parse_dataset method of Dataset") diff --git a/ppdet/data/source/voc.py b/ppdet/data/source/voc.py index 41c5b4c33..fa28b12df 100644 --- a/ppdet/data/source/voc.py +++ b/ppdet/data/source/voc.py @@ -46,12 +46,14 @@ class VOCDataSet(DetDataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1, label_list=None): super(VOCDataSet, self).__init__( dataset_dir=dataset_dir, image_dir=image_dir, anno_path=anno_path, + data_fields=data_fields, sample_num=sample_num) self.label_list = label_list @@ -113,7 +115,6 @@ class VOCDataSet(DetDataset): gt_bbox = [] gt_class = [] gt_score = [] - is_crowd = [] difficult = [] for i, obj in enumerate(objs): cname = obj.find('name').text @@ -130,7 +131,6 @@ class VOCDataSet(DetDataset): gt_bbox.append([x1, y1, x2, y2]) gt_class.append([cname2cid[cname]]) gt_score.append([1.]) - is_crowd.append([0]) difficult.append([_difficult]) else: logger.warn( @@ -140,19 +140,25 @@ class VOCDataSet(DetDataset): gt_bbox = np.array(gt_bbox).astype('float32') gt_class = np.array(gt_class).astype('int32') gt_score = np.array(gt_score).astype('float32') - is_crowd = np.array(is_crowd).astype('int32') difficult = np.array(difficult).astype('int32') + voc_rec = { 'im_file': img_file, 'im_id': im_id, 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, + 'w': im_w + } if 'image' in self.data_fields else {} + + gt_rec = { 'gt_class': gt_class, 'gt_score': gt_score, 'gt_bbox': gt_bbox, 'difficult': difficult } + for k, v in gt_rec.items(): + if k in self.data_fields: + voc_rec[k] = v + if len(objs) != 0: records.append(voc_rec) diff --git a/ppdet/data/transform/batch_operator.py b/ppdet/data/transform/batch_operator.py index d94ae6a14..acfc326bb 100644 --- a/ppdet/data/transform/batch_operator.py +++ b/ppdet/data/transform/batch_operator.py @@ -303,6 +303,11 @@ class Gt2YoloTargetOp(BaseOperator): # classification target[idx, 6 + cls, gj, gi] = 1. sample['target{}'.format(i)] = target + + # remove useless gt_class and gt_score after target calculated + sample.pop('gt_class') + sample.pop('gt_score') + return samples diff --git a/ppdet/data/transform/operator.py b/ppdet/data/transform/operator.py index f98e95f06..6c39ed6e9 100644 --- a/ppdet/data/transform/operator.py +++ b/ppdet/data/transform/operator.py @@ -116,6 +116,7 @@ class DecodeOp(BaseOperator): if 'image' not in sample: with open(sample['im_file'], 'rb') as f: sample['image'] = f.read() + sample.pop('im_file') im = sample['image'] data = np.frombuffer(im, dtype='uint8') @@ -1570,9 +1571,9 @@ class MixupOp(BaseOperator): gt_class2 = sample[1]['gt_class'] gt_class = np.concatenate((gt_class1, gt_class2), axis=0) result['gt_class'] = gt_class - if 'gt_score' in sample[0]: - gt_score1 = sample[0]['gt_score'] - gt_score2 = sample[1]['gt_score'] + + gt_score1 = np.ones_like(sample[0]['gt_class']) + gt_score2 = np.ones_like(sample[1]['gt_class']) gt_score = np.concatenate( (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) result['gt_score'] = gt_score @@ -1673,6 +1674,11 @@ class PadBoxOp(BaseOperator): if gt_num > 0: pad_diff[:gt_num] = sample['difficult'][:gt_num, 0] sample['difficult'] = pad_diff + if 'is_crowd' in sample: + pad_crowd = np.zeros((num_max, ), dtype=np.int32) + if gt_num > 0: + pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0] + sample['is_crowd'] = pad_crowd return sample diff --git a/tools/infer.py b/tools/infer.py index 65fbc3cef..b593321f1 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -131,16 +131,18 @@ def run(FLAGS, cfg, place): dataset.set_images(test_images) test_loader = create('TestReader')(dataset, cfg['worker_num']) extra_key = ['im_shape', 'scale_factor', 'im_id'] - if cfg.metric == 'VOC': - extra_key += ['gt_bbox', 'gt_class', 'difficult'] # TODO: support other metrics imid2path = dataset.get_imid2path() - from ppdet.utils.coco_eval import get_category_info anno_file = dataset.get_anno() with_background = cfg.with_background use_default_label = dataset.use_default_label + + if cfg.metric == 'COCO': + from ppdet.utils.coco_eval import get_category_info + if cfg.metric == 'VOC': + from ppdet.utils.voc_eval import get_category_info clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) -- GitLab