diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index b5f1749e31a356879265ca66d580283158166afc..8633fe5305929c5238523a067af172720156d3d5 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -95,8 +95,8 @@ class VOCDetection(Dataset): if not osp.isfile(xml_file): continue if not osp.exists(img_file): - raise IOError( - 'The image file {} is not exist!'.format(img_file)) + raise IOError('The image file {} is not exist!'.format( + img_file)) tree = ET.parse(xml_file) if tree.find('id') is None: im_id = np.array([ct]) @@ -122,25 +122,20 @@ class VOCDetection(Dataset): y2 = float(obj.find('bndbox').find('ymax').text) x1 = max(0, x1) y1 = max(0, y1) - x2 = min(im_w - 1, x2) - y2 = min(im_h - 1, y2) + if im_w > 0.5 and im_h > 0.5: + x2 = min(im_w - 1, x2) + y2 = min(im_h - 1, y2) gt_bbox[i] = [x1, y1, x2, y2] is_crowd[i][0] = 0 difficult[i][0] = _difficult annotations['annotations'].append({ - 'iscrowd': - 0, - 'image_id': - int(im_id[0]), + 'iscrowd': 0, + 'image_id': int(im_id[0]), 'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1], - 'area': - float((x2 - x1 + 1) * (y2 - y1 + 1)), - 'category_id': - cname2cid[cname], - 'id': - ann_ct, - 'difficult': - _difficult + 'area': float((x2 - x1 + 1) * (y2 - y1 + 1)), + 'category_id': cname2cid[cname], + 'id': ann_ct, + 'difficult': _difficult }) ann_ct += 1 @@ -160,14 +155,10 @@ class VOCDetection(Dataset): self.file_list.append([img_file, voc_rec]) ct += 1 annotations['images'].append({ - 'height': - im_h, - 'width': - im_w, - 'id': - int(im_id[0]), - 'file_name': - osp.split(img_file)[1] + 'height': im_h, + 'width': im_w, + 'id': int(im_id[0]), + 'file_name': osp.split(img_file)[1] }) if not len(self.file_list) > 0: @@ -198,8 +189,7 @@ class VOCDetection(Dataset): else: mix_pos = 0 im_info['mixup'] = [ - files[mix_pos][0], - copy.deepcopy(files[mix_pos][1][0]), + files[mix_pos][0], copy.deepcopy(files[mix_pos][1][0]), copy.deepcopy(files[mix_pos][1][1]) ] self._pos += 1 diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index b9aee9e3bc8e432b575fbc0f9fc11d86a73ecb88..ca892d75bc7913cd1e238bb96e82e76f9a4fd716 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -111,8 +111,8 @@ class Compose(DetTransform): try: im = cv2.imread(im_file).astype('float32') except: - raise TypeError( - 'Can\'t read The image file {}!'.format(im_file)) + raise TypeError('Can\'t read The image file {}!'.format( + im_file)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # make default im_info with [h, w, 1] im_info['im_resize_info'] = np.array( @@ -145,19 +145,10 @@ class Compose(DetTransform): outputs = op(im, im_info, label_info) im = outputs[0] else: + im = execute_imgaug(op, im) if label_info is not None: - gt_poly = label_info.get('gt_poly', None) - gt_bbox = label_info['gt_bbox'] - if gt_poly is None: - im, aug_bbox = execute_imgaug(op, im, bboxes=gt_bbox) - else: - im, aug_bbox, aug_poly = execute_imgaug( - op, im, bboxes=gt_bbox, polygons=gt_poly) - label_info['gt_poly'] = aug_poly - label_info['gt_bbox'] = aug_bbox outputs = (im, im_info, label_info) else: - im, = execute_imgaug(op, im) outputs = (im, im_info) return outputs @@ -218,8 +209,8 @@ class ResizeByShort(DetTransform): im_short_size = min(im.shape[0], im.shape[1]) im_long_size = max(im.shape[0], im.shape[1]) scale = float(self.short_size) / im_short_size - if self.max_size > 0 and np.round( - scale * im_long_size) > self.max_size: + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: scale = float(self.max_size) / float(im_long_size) resized_width = int(round(im.shape[1] * scale)) resized_height = int(round(im.shape[0] * scale)) @@ -302,8 +293,8 @@ class Padding(DetTransform): if isinstance(self.target_size, int): padding_im_h = self.target_size padding_im_w = self.target_size - elif isinstance(self.target_size, list) or isinstance( - self.target_size, tuple): + elif isinstance(self.target_size, list) or isinstance(self.target_size, + tuple): padding_im_w = self.target_size[0] padding_im_h = self.target_size[1] elif self.coarsest_stride > 0: @@ -321,8 +312,8 @@ class Padding(DetTransform): raise ValueError( 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' .format(im_w, im_h, padding_im_w, padding_im_h)) - padding_im = np.zeros((padding_im_h, padding_im_w, im_c), - dtype=np.float32) + padding_im = np.zeros( + (padding_im_h, padding_im_w, im_c), dtype=np.float32) padding_im[:im_h, :im_w, :] = im if label_info is None: return (padding_im, im_info) @@ -932,8 +923,9 @@ class RandomCrop(DetTransform): crop_y = np.random.randint(0, h - crop_h) crop_x = np.random.randint(0, w - crop_w) crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h] - iou = iou_matrix(gt_bbox, np.array([crop_box], - dtype=np.float32)) + iou = iou_matrix( + gt_bbox, np.array( + [crop_box], dtype=np.float32)) if iou.max() < thresh: continue @@ -941,16 +933,21 @@ class RandomCrop(DetTransform): continue cropped_box, valid_ids = crop_box_with_center_constraint( - gt_bbox, np.array(crop_box, dtype=np.float32)) + gt_bbox, np.array( + crop_box, dtype=np.float32)) if valid_ids.size > 0: found = True break if found: if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0: - crop_polys = crop_segms(label_info['gt_poly'], valid_ids, - np.array(crop_box, dtype=np.int64), - h, w) + crop_polys = crop_segms( + label_info['gt_poly'], + valid_ids, + np.array( + crop_box, dtype=np.int64), + h, + w) if [] in crop_polys: delete_id = list() valid_polys = list() diff --git a/paddlex/cv/transforms/imgaug_support.py b/paddlex/cv/transforms/imgaug_support.py index ba8d808fd8759c3d07d17509d0bdba41c3e9bd2f..3924acd1821fc9fc4f85b8c486e4f2a20f4e8543 100644 --- a/paddlex/cv/transforms/imgaug_support.py +++ b/paddlex/cv/transforms/imgaug_support.py @@ -13,36 +13,41 @@ # limitations under the License. import numpy as np +import copy def execute_imgaug(augmenter, im, bboxes=None, polygons=None, segment_map=None): # 预处理,将bboxes, polygons转换成imgaug格式 - import imgaug.augmentables.polys as polys + import imgaug.augmentables.kps as kps import imgaug.augmentables.bbs as bbs aug_im = im.astype('uint8') + aug_im = augmenter.augment(image=aug_im) + return aug_im + # TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异 + # 目前仅支持对原图进行处理,因此只能使用pixlevel的imgaug增强操作 + # 以下代码暂不会执行 aug_bboxes = None if bboxes is not None: aug_bboxes = list() for i in range(len(bboxes)): - x1 = bboxes[i, 0] - 1 + x1 = bboxes[i, 0] y1 = bboxes[i, 1] x2 = bboxes[i, 2] y2 = bboxes[i, 3] aug_bboxes.append(bbs.BoundingBox(x1, y1, x2, y2)) - aug_polygons = None - lod_info = list() + aug_points = None if polygons is not None: - aug_polygons = list() + aug_points = list() for i in range(len(polygons)): num = len(polygons[i]) - lod_info.append(num) for j in range(num): - points = np.reshape(polygons[i][j], (-1, 2)) - aug_polygons.append(polys.Polygon(points)) + tmp = np.reshape(polygons[i][j], (-1, 2)) + for k in range(len(tmp)): + aug_points.append(kps.Keypoint(tmp[k, 0], tmp[k, 1])) aug_segment_map = None if segment_map is not None: @@ -56,72 +61,47 @@ def execute_imgaug(augmenter, im, bboxes=None, polygons=None, raise Exception( "Only support 2-dimensions for 3-dimensions for segment_map") - aug_im, aug_bboxes, aug_polygons, aug_seg_map = augmenter.augment( + unnormalized_batch = augmenter.augment( image=aug_im, bounding_boxes=aug_bboxes, - polygons=aug_polygons, - segmentation_maps=aug_segment_map) + keypoints=aug_points, + segmentation_maps=aug_segment_map, + return_batch=True) + aug_im = unnormalized_batch.images_aug[0] + aug_bboxes = unnormalized_batch.bounding_boxes_aug + aug_points = unnormalized_batch.keypoints_aug + aug_seg_map = unnormalized_batch.segmentation_maps_aug aug_im = aug_im.astype('float32') - if aug_polygons is not None: - assert len(aug_bboxes) == len( - lod_info - ), "Number of aug_bboxes should be equal to number of aug_polygons" - if aug_bboxes is not None: - # 裁剪掉在图像之外的bbox和polygon - for i in range(len(aug_bboxes)): - aug_bboxes[i] = aug_bboxes[i].clip_out_of_image(aug_im) - if aug_polygons is not None: - for i in range(len(aug_polygons)): - aug_polygons[i] = aug_polygons[i].clip_out_of_image(aug_im) - - # 过滤掉无效的bbox和polygon,并转换为训练数据格式 converted_bboxes = list() - converted_polygons = list() - poly_index = 0 for i in range(len(aug_bboxes)): - # 过滤width或height不足1像素的框 - if aug_bboxes[i].width < 1 or aug_bboxes[i].height < 1: - continue - if aug_polygons is None: - converted_bboxes.append([ - aug_bboxes[i].x1, aug_bboxes[i].y1, aug_bboxes[i].x2, - aug_bboxes[i].y2 - ]) - continue - - # 如若有polygons,将会继续执行下面代码 - polygons_this_box = list() - for ps in aug_polygons[poly_index:poly_index + lod_info[i]]: - if len(ps) == 0: - continue - for p in ps: - # 没有3个point的polygon被过滤 - if len(p.exterior) < 3: - continue - polygons_this_box.append(p.exterior.flatten().tolist()) - poly_index += lod_info[i] - - if len(polygons_this_box) == 0: - continue converted_bboxes.append([ aug_bboxes[i].x1, aug_bboxes[i].y1, aug_bboxes[i].x2, aug_bboxes[i].y2 ]) - converted_polygons.append(polygons_this_box) - if len(converted_bboxes) == 0: - aug_im = im - converted_bboxes = bboxes - converted_polygons = polygons + aug_bboxes = converted_bboxes + + aug_polygons = None + if aug_points is not None: + aug_polygons = copy.deepcopy(polygons) + idx = 0 + for i in range(len(aug_polygons)): + num = len(aug_polygons[i]) + for j in range(num): + num_points = len(aug_polygons[i][j]) // 2 + for k in range(num_points): + aug_polygons[i][j][k * 2] = aug_points[idx].x + aug_polygons[i][j][k * 2 + 1] = aug_points[idx].y + idx += 1 result = [aug_im] - if bboxes is not None: - result.append(np.array(converted_bboxes)) - if polygons is not None: - result.append(converted_polygons) - if segment_map is not None: + if aug_bboxes is not None: + result.append(np.array(aug_bboxes)) + if aug_polygons is not None: + result.append(aug_polygons) + if aug_seg_map is not None: n, h, w, c = aug_seg_map.shape if len(segment_map.shape) == 2: aug_seg_map = np.reshape(aug_seg_map, (h, w)) diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py index 9eb1222d02e74d8144c813c9f1398c5367c413a7..e562ba2601677085fdef23c57a6779ba77143f8d 100644 --- a/paddlex/cv/transforms/seg_transforms.py +++ b/paddlex/cv/transforms/seg_transforms.py @@ -101,11 +101,10 @@ class Compose(SegTransform): if len(outputs) == 3: label = outputs[2] else: + im = execute_imgaug(op, im) if label is not None: - im, label = execute_imgaug(op, im, segment_map=label) outputs = (im, im_info, label) else: - im, = execute_imgaug(op, im) outputs = (im, im_info) return outputs @@ -391,8 +390,8 @@ class ResizeByShort(SegTransform): im_short_size = min(im.shape[0], im.shape[1]) im_long_size = max(im.shape[0], im.shape[1]) scale = float(self.short_size) / im_short_size - if self.max_size > 0 and np.round( - scale * im_long_size) > self.max_size: + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: scale = float(self.max_size) / float(im_long_size) resized_width = int(round(im.shape[1] * scale)) resized_height = int(round(im.shape[0] * scale)) @@ -423,8 +422,8 @@ class ResizeRangeScaling(SegTransform): def __init__(self, min_value=400, max_value=600): if min_value > max_value: raise ValueError('min_value must be less than max_value, ' - 'but they are {} and {}.'.format( - min_value, max_value)) + 'but they are {} and {}.'.format(min_value, + max_value)) self.min_value = min_value self.max_value = max_value @@ -761,8 +760,8 @@ class RandomPaddingCrop(SegTransform): h_off = np.random.randint(img_height - crop_height + 1) w_off = np.random.randint(img_width - crop_width + 1) - im = im[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] + im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width + ), :] if label is not None: label = label[h_off:(crop_height + h_off), w_off:( w_off + crop_width)] diff --git a/setup.py b/setup.py index f8d54dc6b8100c544decaf0af9f8698c8640ff81..be173407ee5056e3b237fe8f7136349420d64526 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ setuptools.setup( long_description_content_type="text/plain", url="https://github.com/PaddlePaddle/PaddleX", packages=setuptools.find_packages(), - setup_requires=['cython', 'numpy', 'sklearn'], + setup_requires=['cython', 'numpy'], install_requires=[ "pycocotools;platform_system!='Windows'", 'pyyaml', 'colorama', 'tqdm', 'visualdl==1.3.0', 'paddleslim==1.0.1', 'visualdl==2.0.0a2' @@ -38,6 +38,4 @@ setuptools.setup( "Operating System :: OS Independent", ], license='Apache 2.0', - entry_points={'console_scripts': [ - 'paddlex=paddlex.command:main', - ]}) + entry_points={'console_scripts': ['paddlex=paddlex.command:main', ]})