diff --git a/configs/_base_/datasets/coco.yml b/configs/_base_/datasets/coco_detection.yml similarity index 85% rename from configs/_base_/datasets/coco.yml rename to configs/_base_/datasets/coco_detection.yml index 6f8af073925f10f94c64cad83b7876f17553a226..7a62c3b0b57a5d76c8ed519d3a3940c1b4532c15 100644 --- a/configs/_base_/datasets/coco.yml +++ b/configs/_base_/datasets/coco_detection.yml @@ -6,6 +6,7 @@ TrainDataset: image_dir: train2017 anno_path: annotations/instances_train2017.json dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] EvalDataset: !COCODataSet diff --git a/configs/_base_/datasets/coco_instance.yml b/configs/_base_/datasets/coco_instance.yml new file mode 100644 index 0000000000000000000000000000000000000000..5eaf76791a94bfd2819ba6dab610fae54b69f26e --- /dev/null +++ b/configs/_base_/datasets/coco_instance.yml @@ -0,0 +1,19 @@ +metric: COCO +num_classes: 80 + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd'] + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json diff --git a/configs/_base_/datasets/voc.yml b/configs/_base_/datasets/voc.yml index de4d78eda57792f857eac95141cd28b5c34a6175..1240c80d7df4a53693136ff0e8885434d9765035 100644 --- a/configs/_base_/datasets/voc.yml +++ b/configs/_base_/datasets/voc.yml @@ -6,12 +6,14 @@ TrainDataset: dataset_dir: dataset/voc anno_path: trainval.txt label_list: label_list.txt + data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] EvalDataset: !VOCDataSet dataset_dir: dataset/voc anno_path: test.txt label_list: label_list.txt + data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] TestDataset: !ImageFolder diff --git a/configs/_base_/optimizers/ssd_240e.yml b/configs/_base_/optimizers/ssd_240e.yml new file mode 100644 index 0000000000000000000000000000000000000000..de31eac3d22c97b2b72083a79342b880f4be9b8a --- /dev/null +++ b/configs/_base_/optimizers/ssd_240e.yml @@ -0,0 +1,21 @@ +epoch: 240 + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 160 + - 200 + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/_base_/readers/faster_fpn_reader.yml b/configs/_base_/readers/faster_fpn_reader.yml index 348df9f97f0a3d4f1cb9ee9d7a343cd31751e571..8a8fb1ebb8f6d20c8e8fec681f7a7b0ce0d45069 100644 --- a/configs/_base_/readers/faster_fpn_reader.yml +++ b/configs/_base_/readers/faster_fpn_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: { } - RandomFlipImage: {prob: 0.5} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } diff --git a/configs/_base_/readers/faster_reader.yml b/configs/_base_/readers/faster_reader.yml index 0778729d78e8952289d12f2c93c6636110df0584..5131d0360d4d1b833f85fed45e1b38d0723bb2f4 100644 --- a/configs/_base_/readers/faster_reader.yml +++ b/configs/_base_/readers/faster_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: { } - RandomFlipImage: {prob: 0.5} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: { } - NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] } diff --git a/configs/_base_/readers/mask_fpn_reader.yml b/configs/_base_/readers/mask_fpn_reader.yml index 3dcd8464cf2efb7395b489319d0678773d5ba76d..1d73c7f31ea6d1e0babb548b91604c1d3492dd83 100644 --- a/configs/_base_/readers/mask_fpn_reader.yml +++ b/configs/_base_/readers/mask_fpn_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: {} - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} diff --git a/configs/_base_/readers/mask_reader.yml b/configs/_base_/readers/mask_reader.yml index 7281edd6d856ce00b7f25a95729f323cca5f1de2..9ae4ef98ee444d61bf790f8c135ffbe9e4d561c4 100644 --- a/configs/_base_/readers/mask_reader.yml +++ b/configs/_base_/readers/mask_reader.yml @@ -1,9 +1,7 @@ worker_num: 2 TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] sample_transforms: - - DecodeImage: {to_rgb: true} + - DecodeOp: {} - RandomFlipImage: {prob: 0.5, is_mask_flip: true} - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} @@ -16,8 +14,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} @@ -32,8 +28,6 @@ EvalReader: TestReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} diff --git a/configs/_base_/readers/ssd_reader.yml b/configs/_base_/readers/ssd_reader.yml index f493c402214de60302b438628db2af5cd2ba83e0..5f692e955f28ae83c6e1a51d104f4f2d140c1810 100644 --- a/configs/_base_/readers/ssd_reader.yml +++ b/configs/_base_/readers/ssd_reader.yml @@ -1,7 +1,6 @@ worker_num: 2 TrainReader: inputs_def: - fields: ['image', 'gt_bbox', 'gt_class'] num_max_boxes: 90 sample_transforms: @@ -24,8 +23,6 @@ TrainReader: EvalReader: - inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id', 'gt_bbox', 'gt_class', 'difficult'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1} @@ -37,7 +34,6 @@ EvalReader: TestReader: inputs_def: image_shape: [3, 300, 300] - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1} diff --git a/configs/_base_/readers/yolov3_reader.yml b/configs/_base_/readers/yolov3_reader.yml index 594ee2ad75e3ae0b518e7fd39433cfbd1aba4a5b..0668b9cb960420b1ec9a3a845a72ddacc18d8274 100644 --- a/configs/_base_/readers/yolov3_reader.yml +++ b/configs/_base_/readers/yolov3_reader.yml @@ -1,7 +1,6 @@ worker_num: 2 TrainReader: inputs_def: - fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_shape', 'scale_factor'] num_max_boxes: 50 sample_transforms: - DecodeOp: {} @@ -26,13 +25,11 @@ TrainReader: EvalReader: inputs_def: - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] num_max_boxes: 50 sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2} - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - - PadBoxOp: {num_max_boxes: 50} - PermuteOp: {} batch_size: 1 drop_empty: false @@ -40,7 +37,6 @@ EvalReader: TestReader: inputs_def: image_shape: [3, 608, 608] - fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2} diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml index 5995d04b501d1f9ceb8d488799c2a0bf75c18272..b38bd32f53844b16897e64bf12836ff3fb4efae2 100644 --- a/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml +++ b/configs/cascade_mask_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/cascade_mask_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/cascade_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_rcnn_r50_fpn_1x_coco.yml index adcd2230e5d1b9e5d575d51ece8e7a3204af9d74..1f7b471d36717a6a7539b8be11d8931dbdf49a2d 100644 --- a/configs/cascade_rcnn_r50_fpn_1x_coco.yml +++ b/configs/cascade_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/cascade_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/faster_rcnn_r50_1x_coco.yml b/configs/faster_rcnn_r50_1x_coco.yml index 1e97e7e6364cab840e0d07f49b37f201c9d1913e..f44dc9a87aa65fe67b35790da6fbf429a83d7d17 100644 --- a/configs/faster_rcnn_r50_1x_coco.yml +++ b/configs/faster_rcnn_r50_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/faster_rcnn_r50.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/faster_rcnn_r50_fpn_1x_coco.yml b/configs/faster_rcnn_r50_fpn_1x_coco.yml index 2120358f5767990c4892dedeff40dd183e531f05..7ae27da7fb7d736f756403a5e1954652fe00fde8 100644 --- a/configs/faster_rcnn_r50_fpn_1x_coco.yml +++ b/configs/faster_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/faster_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/faster_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/mask_rcnn_r50_1x_coco.yml b/configs/mask_rcnn_r50_1x_coco.yml index 4d7eaaae9804d4e573d4cb718978b149b45e1f00..50e48272a0b0efec8c00834051bbcc80e3b4be5d 100644 --- a/configs/mask_rcnn_r50_1x_coco.yml +++ b/configs/mask_rcnn_r50_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/mask_rcnn_r50.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/mask_rcnn_r50_fpn_1x_coco.yml b/configs/mask_rcnn_r50_fpn_1x_coco.yml index 3332b43f30515f20792cd91f5992f3384aeafb94..b881e14cd537fc568ba68d31dce689717e862868 100644 --- a/configs/mask_rcnn_r50_fpn_1x_coco.yml +++ b/configs/mask_rcnn_r50_fpn_1x_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/mask_rcnn_r50_fpn.yml', './_base_/optimizers/rcnn_1x.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_instance.yml', './_base_/readers/mask_fpn_reader.yml', './_base_/runtime.yml', ] diff --git a/configs/ssd_vgg16_300_120e_coco.yml b/configs/ssd_vgg16_300_120e_coco.yml deleted file mode 100644 index a94e3cf3e70b91033678e8d874eaeebaf0c0a907..0000000000000000000000000000000000000000 --- a/configs/ssd_vgg16_300_120e_coco.yml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: [ - './_base_/models/ssd_vgg16_300.yml', - './_base_/optimizers/ssd_120e.yml', - './_base_/datasets/coco.yml', - './_base_/readers/ssd_reader.yml', - './_base_/runtime.yml', -] diff --git a/configs/yolov3_darknet53_270e_coco.yml b/configs/yolov3_darknet53_270e_coco.yml index b31e7e8af3be01e2e1ebc353e688e856ef595d7c..08ea8bad7ad19480fca8d0f51c40b94734b7e28b 100644 --- a/configs/yolov3_darknet53_270e_coco.yml +++ b/configs/yolov3_darknet53_270e_coco.yml @@ -1,7 +1,7 @@ _BASE_: [ './_base_/models/yolov3_darknet53.yml', './_base_/optimizers/yolov3_270e.yml', - './_base_/datasets/coco.yml', + './_base_/datasets/coco_detection.yml', './_base_/readers/yolov3_reader.yml', './_base_/runtime.yml', ] diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index 54eabce7a5014212a416543e22f8401fb7a253af..f267d8aeff7c7de6c3d9e30bd685ab160470125a 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -16,6 +16,7 @@ import copy import traceback import six import sys +import multiprocessing as mp if sys.version_info >= (3, 0): import queue as Queue else: @@ -27,45 +28,42 @@ from paddle.io import DistributedBatchSampler from ppdet.core.workspace import register, serializable, create from . import transform -from .transform import operator, batch_operator from ppdet.utils.logger import setup_logger logger = setup_logger('reader') class Compose(object): - def __init__(self, transforms, fields=None, from_=transform, - num_classes=81): + def __init__(self, transforms, num_classes=81): self.transforms = transforms self.transforms_cls = [] - output_fields = None for t in self.transforms: for k, v in t.items(): - op_cls = getattr(from_, k) + op_cls = getattr(transform, k) self.transforms_cls.append(op_cls(**v)) if hasattr(op_cls, 'num_classes'): op_cls.num_classes = num_classes - # TODO: should be refined in the future - if op_cls in [ - transform.Gt2YoloTargetOp, transform.Gt2YoloTarget - ]: - output_fields = ['image', 'gt_bbox'] - output_fields.extend([ - 'target{}'.format(i) - for i in range(len(v['anchor_masks'])) - ]) + def __call__(self, data): + for f in self.transforms_cls: + try: + data = f(data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warn("fail to map op [{}] with error: {} and stack:\n{}". + format(f, e, str(stack_info))) + raise e + + return data - self.fields = fields - self.output_fields = output_fields if output_fields else fields - def __call__(self, data): - if self.fields is not None: - data_new = [] - for item in data: - data_new.append(dict(zip(self.fields, item))) - data = data_new +class BatchCompose(Compose): + def __init__(self, transforms, num_classes=81): + super(BatchCompose, self).__init__(transforms, num_classes) + self.output_fields = mp.Manager().list([]) + self.lock = mp.Lock() + def __call__(self, data): for f in self.transforms_cls: try: data = f(data) @@ -75,23 +73,27 @@ class Compose(object): format(f, e, str(stack_info))) raise e - if self.output_fields is not None: - data_new = [] - for item in data: - batch = [] - for k in self.output_fields: - batch.append(item[k]) - data_new.append(batch) - batch_size = len(data_new) - data_new = list(zip(*data_new)) - if batch_size > 1: - data = [ - np.array(item).astype(item[0].dtype) for item in data_new - ] - else: - data = data_new - - return data + # parse output fields by first sample + # **this shoule be fixed if paddle.io.DataLoader support** + # For paddle.io.DataLoader not support dict currently, + # we need to parse the key from the first sample, + # BatchCompose.__call__ will be called in each worker + # process, so lock is need here. + if len(self.output_fields) == 0: + self.lock.acquire() + if len(self.output_fields) == 0: + for k, v in data[0].items(): + # FIXME(dkp): for more elegent coding + if k not in ['flipped', 'h', 'w']: + self.output_fields.append(k) + self.lock.release() + + data = [[data[i][k] for k in self.output_fields] + for i in range(len(data))] + data = list(zip(*data)) + + batch_data = [np.stack(d, axis=0) for d in data] + return batch_data class BaseDataLoader(object): @@ -99,8 +101,8 @@ class BaseDataLoader(object): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=False, @@ -108,21 +110,12 @@ class BaseDataLoader(object): num_classes=81, with_background=True, **kwargs): - # out fields - self._fields = inputs_def['fields'] if inputs_def else None # sample transform self._sample_transforms = Compose( sample_transforms, num_classes=num_classes) # batch transfrom - self._batch_transforms = None - if batch_transforms: - self._batch_transforms = Compose(batch_transforms, - copy.deepcopy(self._fields), - transform, num_classes) - self.output_fields = self._batch_transforms.output_fields - else: - self.output_fields = self._fields + self._batch_transforms = BatchCompose(batch_transforms, num_classes) self.batch_size = batch_size self.shuffle = shuffle @@ -139,8 +132,7 @@ class BaseDataLoader(object): self.dataset = dataset self.dataset.parse_dataset(self.with_background) # get data - self.dataset.set_out(self._sample_transforms, - copy.deepcopy(self._fields)) + self.dataset.set_transform(self._sample_transforms) # set kwargs self.dataset.set_kwargs(**self.kwargs) # batch sampler @@ -177,7 +169,10 @@ class BaseDataLoader(object): # data structure in paddle.io.DataLoader try: data = next(self.loader) - return {k: v for k, v in zip(self.output_fields, data)} + return { + k: v + for k, v in zip(self._batch_transforms.output_fields, data) + } except StopIteration: self.loader = iter(self.dataloader) six.reraise(*sys.exc_info()) @@ -191,8 +186,8 @@ class BaseDataLoader(object): class TrainReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=True, drop_last=True, @@ -210,8 +205,8 @@ class TrainReader(BaseDataLoader): class EvalReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=True, @@ -229,8 +224,8 @@ class EvalReader(BaseDataLoader): class TestReader(BaseDataLoader): def __init__(self, inputs_def=None, - sample_transforms=None, - batch_transforms=None, + sample_transforms=[], + batch_transforms=[], batch_size=1, shuffle=False, drop_last=False, diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index f21ee46ab2e6a73da62b2e8d9f8ce17e4f7c0bca..394ffdc86cd9aff3e45e731b27128cb3a369bcc6 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -28,9 +28,10 @@ class COCODataSet(DetDataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1): super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path, - sample_num) + data_fields, sample_num) self.load_image_only = False self.load_semantic = False @@ -82,13 +83,6 @@ class COCODataSet(DetDataset): im_w, im_h, img_id)) continue - coco_rec = { - 'im_file': im_path, - 'im_id': np.array([img_id]), - 'h': im_h, - 'w': im_w, - } - if not self.load_image_only: ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) instances = coco.loadAnns(ins_anno_ids) @@ -121,7 +115,6 @@ class COCODataSet(DetDataset): gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_class = np.zeros((num_bbox, 1), dtype=np.int32) - gt_score = np.ones((num_bbox, 1), dtype=np.float32) is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) difficult = np.zeros((num_bbox, 1), dtype=np.int32) gt_poly = [None] * num_bbox @@ -142,15 +135,25 @@ class COCODataSet(DetDataset): if has_segmentation and not any(gt_poly): continue - coco_rec.update({ + coco_rec = { + 'im_file': im_path, + 'im_id': np.array([img_id]), + 'h': im_h, + 'w': im_w, + } if 'image' in self.data_fields else {} + + gt_rec = { 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, - 'gt_score': gt_score, 'gt_poly': gt_poly, - }) + } + for k, v in gt_rec.items(): + if k in self.data_fields: + coco_rec[k] = v + # TODO: remove load_semantic - if self.load_semantic: + if self.load_semantic and 'semantic' in self.data_fields: seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps', 'train2017', im_fname[:-3] + 'png') coco_rec.update({'semantic': seg_path}) diff --git a/ppdet/data/source/dataset.py b/ppdet/data/source/dataset.py index 0b1aa42c2f8348a1353287a47ac19e6e37fc9b0f..e948f176b61d286e9ceeabeb5d4a50f11a5fa2d9 100644 --- a/ppdet/data/source/dataset.py +++ b/ppdet/data/source/dataset.py @@ -31,6 +31,7 @@ class DetDataset(Dataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1, use_default_label=None, **kwargs): @@ -38,6 +39,7 @@ class DetDataset(Dataset): self.dataset_dir = dataset_dir if dataset_dir is not None else '' self.anno_path = anno_path self.image_dir = image_dir if image_dir is not None else '' + self.data_fields = data_fields self.sample_num = sample_num self.use_default_label = use_default_label self._epoch = 0 @@ -63,26 +65,19 @@ class DetDataset(Dataset): for _ in range(3) ] - # data augment - roidb = self.transform(roidb) - # data item - out = OrderedDict() - for k in self.fields: - out[k] = roidb[k] - return out.values() + return self.transform(roidb) def set_kwargs(self, **kwargs): self.mixup_epoch = kwargs.get('mixup_epoch', -1) self.cutmix_epoch = kwargs.get('cutmix_epoch', -1) self.mosaic_epoch = kwargs.get('mosaic_epoch', -1) + def set_transform(self, transform): + self.transform = transform + def set_epoch(self, epoch_id): self._epoch = epoch_id - def set_out(self, sample_transform, fields): - self.transform = sample_transform - self.fields = fields - def parse_dataset(self, with_background=True): raise NotImplemented( "Need to implement parse_dataset method of Dataset") diff --git a/ppdet/data/source/voc.py b/ppdet/data/source/voc.py index 41c5b4c33c7eadcce56a918bd07c4ea53a25c7f5..fa28b12df19e44d2bc102bfb0d75739ce71073f9 100644 --- a/ppdet/data/source/voc.py +++ b/ppdet/data/source/voc.py @@ -46,12 +46,14 @@ class VOCDataSet(DetDataset): dataset_dir=None, image_dir=None, anno_path=None, + data_fields=['image'], sample_num=-1, label_list=None): super(VOCDataSet, self).__init__( dataset_dir=dataset_dir, image_dir=image_dir, anno_path=anno_path, + data_fields=data_fields, sample_num=sample_num) self.label_list = label_list @@ -113,7 +115,6 @@ class VOCDataSet(DetDataset): gt_bbox = [] gt_class = [] gt_score = [] - is_crowd = [] difficult = [] for i, obj in enumerate(objs): cname = obj.find('name').text @@ -130,7 +131,6 @@ class VOCDataSet(DetDataset): gt_bbox.append([x1, y1, x2, y2]) gt_class.append([cname2cid[cname]]) gt_score.append([1.]) - is_crowd.append([0]) difficult.append([_difficult]) else: logger.warn( @@ -140,19 +140,25 @@ class VOCDataSet(DetDataset): gt_bbox = np.array(gt_bbox).astype('float32') gt_class = np.array(gt_class).astype('int32') gt_score = np.array(gt_score).astype('float32') - is_crowd = np.array(is_crowd).astype('int32') difficult = np.array(difficult).astype('int32') + voc_rec = { 'im_file': img_file, 'im_id': im_id, 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, + 'w': im_w + } if 'image' in self.data_fields else {} + + gt_rec = { 'gt_class': gt_class, 'gt_score': gt_score, 'gt_bbox': gt_bbox, 'difficult': difficult } + for k, v in gt_rec.items(): + if k in self.data_fields: + voc_rec[k] = v + if len(objs) != 0: records.append(voc_rec) diff --git a/ppdet/data/transform/batch_operator.py b/ppdet/data/transform/batch_operator.py index d94ae6a14c161c939ab7ea52ad49a73b69a8c947..acfc326bb8252b3e2c62de9126ff965f2434bbe1 100644 --- a/ppdet/data/transform/batch_operator.py +++ b/ppdet/data/transform/batch_operator.py @@ -303,6 +303,11 @@ class Gt2YoloTargetOp(BaseOperator): # classification target[idx, 6 + cls, gj, gi] = 1. sample['target{}'.format(i)] = target + + # remove useless gt_class and gt_score after target calculated + sample.pop('gt_class') + sample.pop('gt_score') + return samples diff --git a/ppdet/data/transform/operator.py b/ppdet/data/transform/operator.py index f98e95f06b7846d1760d2e184ce1659560d79902..6c39ed6e94080eca5bb69c9058ab25883df297ae 100644 --- a/ppdet/data/transform/operator.py +++ b/ppdet/data/transform/operator.py @@ -116,6 +116,7 @@ class DecodeOp(BaseOperator): if 'image' not in sample: with open(sample['im_file'], 'rb') as f: sample['image'] = f.read() + sample.pop('im_file') im = sample['image'] data = np.frombuffer(im, dtype='uint8') @@ -1570,9 +1571,9 @@ class MixupOp(BaseOperator): gt_class2 = sample[1]['gt_class'] gt_class = np.concatenate((gt_class1, gt_class2), axis=0) result['gt_class'] = gt_class - if 'gt_score' in sample[0]: - gt_score1 = sample[0]['gt_score'] - gt_score2 = sample[1]['gt_score'] + + gt_score1 = np.ones_like(sample[0]['gt_class']) + gt_score2 = np.ones_like(sample[1]['gt_class']) gt_score = np.concatenate( (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) result['gt_score'] = gt_score @@ -1673,6 +1674,11 @@ class PadBoxOp(BaseOperator): if gt_num > 0: pad_diff[:gt_num] = sample['difficult'][:gt_num, 0] sample['difficult'] = pad_diff + if 'is_crowd' in sample: + pad_crowd = np.zeros((num_max, ), dtype=np.int32) + if gt_num > 0: + pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0] + sample['is_crowd'] = pad_crowd return sample diff --git a/tools/infer.py b/tools/infer.py index 65fbc3cefe3419e504e0b6b1db743f6747ba959c..b593321f11f25386b5340068231ce6f6379dd267 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -131,16 +131,18 @@ def run(FLAGS, cfg, place): dataset.set_images(test_images) test_loader = create('TestReader')(dataset, cfg['worker_num']) extra_key = ['im_shape', 'scale_factor', 'im_id'] - if cfg.metric == 'VOC': - extra_key += ['gt_bbox', 'gt_class', 'difficult'] # TODO: support other metrics imid2path = dataset.get_imid2path() - from ppdet.utils.coco_eval import get_category_info anno_file = dataset.get_anno() with_background = cfg.with_background use_default_label = dataset.use_default_label + + if cfg.metric == 'COCO': + from ppdet.utils.coco_eval import get_category_info + if cfg.metric == 'VOC': + from ppdet.utils.voc_eval import get_category_info clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label)