diff --git a/paddlex/cv/datasets/coco.py b/paddlex/cv/datasets/coco.py index ff7c2b2d2438fb88d359c94f9ede65d900d9216e..97e791be5ed3cac1656fba4429d90f1653bfe1be 100644 --- a/paddlex/cv/datasets/coco.py +++ b/paddlex/cv/datasets/coco.py @@ -128,7 +128,6 @@ class CocoDetection(VOCDetection): coco_rec = (im_info, label_info) self.file_list.append([im_fname, coco_rec]) - if not len(self.file_list) > 0: raise Exception('not found any coco record in %s' % (ann_file)) logging.info("{} samples in file {}".format( diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index 45335084a9f8ddd9b850b52f3a6db9fe6886a153..56104a4aa267b94afacfb1d455c978484b4796a8 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -14,6 +14,7 @@ from __future__ import absolute_import import copy +import os import os.path as osp import random import numpy as np @@ -170,6 +171,43 @@ class VOCDetection(Dataset): self.coco_gt.dataset = annotations self.coco_gt.createIndex() + def append_backgrounds(self, image_dir): + import cv2 + if not osp.exists(image_dir): + raise Exception("{} background images directory does not exist.".format(image_dir)) + image_list = os.listdir(image_dir) + max_img_id = max(self.coco_gt.getImgIds()) + for image in image_list: + if not is_pic(image): + continue + # False ground truth + gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32) + gt_class = np.array([[0]], dtype=np.int32) + gt_score = np.ones((1, 1), dtype=np.float32) + is_crowd = np.array([[0]], dtype=np.int32) + difficult = np.zeros((1, 1), dtype=np.int32) + gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]] + + max_img_id += 1 + im_fname = osp.join(image_dir, image) + img_data = cv2.imread(im_fname) + im_h, im_w, im_c = img_data.shape + im_info = { + 'im_id': np.array([max_img_id]).astype('int32'), + 'image_shape': np.array([im_h, im_w]).astype('int32'), + } + label_info = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_score': gt_score, + 'difficult': difficult, + 'gt_poly': gt_poly + } + coco_rec = (im_info, label_info) + self.file_list.append([im_fname, coco_rec]) + self.num_samples = len(self.file_list) + def iterator(self): self._epoch += 1 self._pos = 0 diff --git a/paddlex/cv/nets/detection/mask_rcnn.py b/paddlex/cv/nets/detection/mask_rcnn.py index a5dd018a4f44e6d354c0d452c06d2cdafa75a77a..b67f44a61db87fd858c032617df2f191bbbda69f 100644 --- a/paddlex/cv/nets/detection/mask_rcnn.py +++ b/paddlex/cv/nets/detection/mask_rcnn.py @@ -204,7 +204,7 @@ class MaskRCNN(object): bg_thresh_hi=self.bg_thresh_hi, bg_thresh_lo=self.bg_thresh_lo, bbox_reg_weights=self.bbox_reg_weights, - calass_nums=self.num_classes, + class_nums=self.num_classes, use_random=self.rpn_head.use_random) rois = outputs[0] diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index 19db33173b87b7cc20b87054cfbc1241176abc58..b8ce67f7339b5ad4adf8e4384015810be73a8770 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -722,22 +722,38 @@ class MixupImage(DetTransform): 'Becasuse gt_bbox/gt_class/gt_score is not in label_info!') gt_bbox1 = label_info['gt_bbox'] gt_bbox2 = im_info['mixup'][2]['gt_bbox'] - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) gt_class1 = label_info['gt_class'] gt_class2 = im_info['mixup'][2]['gt_class'] - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - gt_score1 = label_info['gt_score'] gt_score2 = im_info['mixup'][2]['gt_score'] - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) if 'gt_poly' in label_info: gt_poly1 = label_info['gt_poly'] gt_poly2 = im_info['mixup'][2]['gt_poly'] - label_info['gt_poly'] = gt_poly1 + gt_poly2 is_crowd1 = label_info['is_crowd'] is_crowd2 = im_info['mixup'][2]['is_crowd'] - is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + + if 0 not in gt_class1 and 0 not in gt_class2: + gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) + gt_class = np.concatenate((gt_class1, gt_class2), axis=0) + gt_score = np.concatenate( + (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + gt_poly2 + is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + elif 0 in gt_class1: + gt_bbox = gt_bbox2 + gt_class = gt_class2 + gt_score = gt_score2 * (1. - factor) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly2 + is_crowd = is_crowd2 + else: + gt_bbox = gt_bbox1 + gt_class = gt_class1 + gt_score = gt_score1 * factor + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + is_crowd = is_crowd1 label_info['gt_bbox'] = gt_bbox label_info['gt_score'] = gt_score label_info['gt_class'] = gt_class @@ -809,6 +825,8 @@ class RandomExpand(DetTransform): if np.random.uniform(0., 1.) < self.prob: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] height = int(image_shape[0]) width = int(image_shape[1]) @@ -904,6 +922,8 @@ class RandomCrop(DetTransform): if len(label_info['gt_bbox']) == 0: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] w = image_shape[1] @@ -1199,9 +1219,10 @@ class ArrangeYOLOv3(DetTransform): if gt_num > 0: label_info['gt_class'][:gt_num, 0] = label_info[ 'gt_class'][:gt_num, 0] - 1 - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] + if -1 not in label_info['gt_class']: + gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] + gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] + gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] # parse [x1, y1, x2, y2] to [x, y, w, h] gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.