# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import division from __future__ import print_function import os import cv2 import numpy as np from pycocotools.coco import COCO from paddle.fluid.io import Dataset import logging logger = logging.getLogger(__name__) __all__ = ['COCODataset'] class COCODataset(Dataset): """ Load dataset with MS-COCO format. Args: dataset_dir (str): root directory for dataset. image_dir (str): directory for images. anno_path (str): voc annotation file path. sample_num (int): number of samples to load, -1 means all. use_default_label (bool): whether use the default mapping of label to integer index. Default True. with_background (bool): whether load background as a class, default True. transform (callable): callable transform to perform on samples, default None. mixup (bool): whether return image mixup samples, default False. alpha (float): alpha factor of beta distribution to generate mixup score, used only when mixup is True, default 1.5 beta (float): beta factor of beta distribution to generate mixup score, used only when mixup is True, default 1.5 """ def __init__(self, dataset_dir='', image_dir='', anno_path='', sample_num=-1, with_background=True, transform=None, mixup=False, alpha=1.5, beta=1.5): # roidbs is list of dict whose structure is: # { # 'im_file': im_fname, # image file name # 'im_id': im_id, # image id # 'h': im_h, # height of image # 'w': im_w, # width # 'is_crowd': is_crowd, # 'gt_class': gt_class, # 'gt_bbox': gt_bbox, # 'gt_score': gt_score, # 'difficult': difficult # } self._anno_path = os.path.join(dataset_dir, anno_path) self._image_dir = os.path.join(dataset_dir, image_dir) assert os.path.exists(self._anno_path), \ "anno_path {} not exists".format(anno_path) assert os.path.exists(self._image_dir), \ "image_dir {} not exists".format(image_dir) self._sample_num = sample_num self._with_background = with_background self._transform = transform self._mixup = mixup self._alpha = alpha self._beta = beta # load in dataset roidbs self._load_roidb_and_cname2cid() def _load_roidb_and_cname2cid(self): assert self._anno_path.endswith('.json'), \ 'invalid coco annotation file: ' + anno_path coco = COCO(self._anno_path) img_ids = coco.getImgIds() cat_ids = coco.getCatIds() records = [] ct = 0 # when with_background = True, mapping category to classid, like: # background:0, first_class:1, second_class:2, ... catid2clsid = dict({ catid: i + int(self._with_background) for i, catid in enumerate(cat_ids) }) cname2cid = dict({ coco.loadCats(catid)[0]['name']: clsid for catid, clsid in catid2clsid.items() }) for img_id in img_ids: img_anno = coco.loadImgs(img_id)[0] im_fname = img_anno['file_name'] im_w = float(img_anno['width']) im_h = float(img_anno['height']) ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) instances = coco.loadAnns(ins_anno_ids) bboxes = [] for inst in instances: x, y, box_w, box_h = inst['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(im_w - 1, x1 + max(0, box_w - 1)) y2 = min(im_h - 1, y1 + max(0, box_h - 1)) if inst['area'] > 0 and x2 >= x1 and y2 >= y1: inst['clean_bbox'] = [x1, y1, x2, y2] bboxes.append(inst) else: logger.warn( 'Found an invalid bbox in annotations: im_id: {}, ' 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( img_id, float(inst['area']), x1, y1, x2, y2)) num_bbox = len(bboxes) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_class = np.zeros((num_bbox, 1), dtype=np.int32) gt_score = np.ones((num_bbox, 1), dtype=np.float32) is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) difficult = np.zeros((num_bbox, 1), dtype=np.int32) gt_poly = [None] * num_bbox for i, box in enumerate(bboxes): catid = box['category_id'] gt_class[i][0] = catid2clsid[catid] gt_bbox[i, :] = box['clean_bbox'] is_crowd[i][0] = box['iscrowd'] if 'segmentation' in box: gt_poly[i] = box['segmentation'] im_fname = os.path.join(self._image_dir, im_fname) if self._image_dir else im_fname coco_rec = { 'im_file': im_fname, 'im_id': np.array([img_id]), 'h': im_h, 'w': im_w, 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_score': gt_score, 'gt_poly': gt_poly, } records.append(coco_rec) ct += 1 if self._sample_num > 0 and ct >= self._sample_num: break assert len(records) > 0, 'not found any coco record in %s' % (self._anno_path) logger.info('{} samples in file {}'.format(ct, self._anno_path)) self._roidbs, self._cname2cid = records, cname2cid @property def num_classes(self): return len(self._cname2cid) def __len__(self): return len(self._roidbs) def _getitem_by_index(self, idx): roidb = self._roidbs[idx] with open(roidb['im_file'], 'rb') as f: data = np.frombuffer(f.read(), dtype='uint8') im = cv2.imdecode(data, 1) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_id = roidb['im_id'] im_shape = np.array([roidb['h'], roidb['w']], dtype='int32') gt_bbox = roidb['gt_bbox'] gt_class = roidb['gt_class'] gt_score = roidb['gt_score'] return im_id, im_shape, im, gt_bbox, gt_class, gt_score def __getitem__(self, idx): im_id, im_shape, im, gt_bbox, gt_class, gt_score = self._getitem_by_index(idx) if self._mixup: mixup_idx = idx + np.random.randint(1, self.__len__()) mixup_idx %= self.__len__() _, _, mixup_im, mixup_bbox, mixup_class, _ = \ self._getitem_by_index(mixup_idx) im_shape, im, gt_bbox, gt_class, gt_score = \ self._mixup_image(im, gt_bbox, gt_class, mixup_im, mixup_bbox, mixup_class) if self._transform: im_id, im_shape, im, gt_bbox, gt_class, gt_score = \ self._transform(im_id, im_shape, im, gt_bbox, gt_class, gt_score) return [im_id, im_shape, im, gt_bbox, gt_class, gt_score] def _mixup_image(self, img1, bbox1, class1, img2, bbox2, class2): factor = np.random.beta(self._alpha, self._beta) factor = max(0.0, min(1.0, factor)) if factor >= 1.0: return img1, bbox1, class1, np.ones_like(class1, dtype="float32") if factor <= 0.0: return img2, bbox2, class2, np.ones_like(class2, dtype="float32") h = max(img1.shape[0], img2.shape[0]) w = max(img1.shape[1], img2.shape[1]) img = np.zeros((h, w, img1.shape[2]), 'float32') img[:img1.shape[0], :img1.shape[1], :] = \ img1.astype('float32') * factor img[:img2.shape[0], :img2.shape[1], :] += \ img2.astype('float32') * (1.0 - factor) gt_bbox = np.concatenate((bbox1, bbox2), axis=0) gt_class = np.concatenate((class1, class2), axis=0) score1 = np.ones_like(class1, dtype="float32") * factor score2 = np.ones_like(class2, dtype="float32") * (1.0 - factor) gt_score = np.concatenate((score1, score2), axis=0) im_shape = np.array([h, w], dtype='int32') return im_shape, img, gt_bbox, gt_class, gt_score @property def mixup(self): return self._mixup @mixup.setter def mixup(self, value): if not isinstance(value, bool): raise ValueError("mixup should be a boolean number") logger.info("{} set mixup to {}".format(self, value)) self._mixup = value def pascalvoc_label(with_background=True): labels_map = { 'aeroplane': 1, 'bicycle': 2, 'bird': 3, 'boat': 4, 'bottle': 5, 'bus': 6, 'car': 7, 'cat': 8, 'chair': 9, 'cow': 10, 'diningtable': 11, 'dog': 12, 'horse': 13, 'motorbike': 14, 'person': 15, 'pottedplant': 16, 'sheep': 17, 'sofa': 18, 'train': 19, 'tvmonitor': 20 } if not with_background: labels_map = {k: v - 1 for k, v in labels_map.items()} return labels_map