diff --git a/deploy/raspberry/python/__init__.py b/deploy/raspberry/python/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..131e0650f5c8db6885ebab5cd342b37630f13be8 100644 --- a/deploy/raspberry/python/__init__.py +++ b/deploy/raspberry/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/raspberry/python/demo.py b/deploy/raspberry/python/demo.py index c99dc6c05920ebda4ea38b2abac74afd025bcea6..8102760f2ce8f6bfffd3dbe051933adbcfbd2a63 100644 --- a/deploy/raspberry/python/demo.py +++ b/deploy/raspberry/python/demo.py @@ -1,4 +1,4 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deploy/raspberry/python/transforms/__init__.py b/deploy/raspberry/python/transforms/__init__.py index 37c14e75f72f8c6b76a608116419d58437fab99e..9ec4809004549b5d564e7d69feb5d3a32fbebc98 100644 --- a/deploy/raspberry/python/transforms/__init__.py +++ b/deploy/raspberry/python/transforms/__init__.py @@ -1,10 +1,10 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/deploy/raspberry/python/transforms/box_utils.py b/deploy/raspberry/python/transforms/box_utils.py deleted file mode 100644 index 02f3c4d4c12af392ffde26e9a783d6ca9122e865..0000000000000000000000000000000000000000 --- a/deploy/raspberry/python/transforms/box_utils.py +++ /dev/null @@ -1,223 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import random -import math -import cv2 -import scipy - - -def bbox_area(src_bbox): - if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]: - return 0. - else: - width = src_bbox[2] - src_bbox[0] - height = src_bbox[3] - src_bbox[1] - return width * height - - -def jaccard_overlap(sample_bbox, object_bbox): - if sample_bbox[0] >= object_bbox[2] or \ - sample_bbox[2] <= object_bbox[0] or \ - sample_bbox[1] >= object_bbox[3] or \ - sample_bbox[3] <= object_bbox[1]: - return 0 - intersect_xmin = max(sample_bbox[0], object_bbox[0]) - intersect_ymin = max(sample_bbox[1], object_bbox[1]) - intersect_xmax = min(sample_bbox[2], object_bbox[2]) - intersect_ymax = min(sample_bbox[3], object_bbox[3]) - intersect_size = (intersect_xmax - intersect_xmin) * ( - intersect_ymax - intersect_ymin) - sample_bbox_size = bbox_area(sample_bbox) - object_bbox_size = bbox_area(object_bbox) - overlap = intersect_size / ( - sample_bbox_size + object_bbox_size - intersect_size) - return overlap - - -def iou_matrix(a, b): - tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) - br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) - - area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2) - area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) - area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) - area_o = (area_a[:, np.newaxis] + area_b - area_i) - return area_i / (area_o + 1e-10) - - -def crop_box_with_center_constraint(box, crop): - cropped_box = box.copy() - - cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2]) - cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:]) - cropped_box[:, :2] -= crop[:2] - cropped_box[:, 2:] -= crop[:2] - - centers = (box[:, :2] + box[:, 2:]) / 2 - valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1) - valid = np.logical_and( - valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1)) - - return cropped_box, np.where(valid)[0] - - -def is_poly(segm): - if not isinstance(segm, (list, dict)): - raise Exception("Invalid segm type: {}".format(type(segm))) - return isinstance(segm, list) - - -def crop_image(img, crop): - x1, y1, x2, y2 = crop - return img[y1:y2, x1:x2, :] - - -def crop_segms(segms, valid_ids, crop, height, width): - def _crop_poly(segm, crop): - xmin, ymin, xmax, ymax = crop - crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin] - crop_p = np.array(crop_coord).reshape(4, 2) - crop_p = Polygon(crop_p) - - crop_segm = list() - for poly in segm: - poly = np.array(poly).reshape(len(poly) // 2, 2) - polygon = Polygon(poly) - if not polygon.is_valid: - exterior = polygon.exterior - multi_lines = exterior.intersection(exterior) - polygons = shapely.ops.polygonize(multi_lines) - polygon = MultiPolygon(polygons) - multi_polygon = list() - if isinstance(polygon, MultiPolygon): - multi_polygon = copy.deepcopy(polygon) - else: - multi_polygon.append(copy.deepcopy(polygon)) - for per_polygon in multi_polygon: - inter = per_polygon.intersection(crop_p) - if not inter: - continue - if isinstance(inter, (MultiPolygon, GeometryCollection)): - for part in inter: - if not isinstance(part, Polygon): - continue - part = np.squeeze( - np.array(part.exterior.coords[:-1]).reshape(1, -1)) - part[0::2] -= xmin - part[1::2] -= ymin - crop_segm.append(part.tolist()) - elif isinstance(inter, Polygon): - crop_poly = np.squeeze( - np.array(inter.exterior.coords[:-1]).reshape(1, -1)) - crop_poly[0::2] -= xmin - crop_poly[1::2] -= ymin - crop_segm.append(crop_poly.tolist()) - else: - continue - return crop_segm - - def _crop_rle(rle, crop, height, width): - if 'counts' in rle and type(rle['counts']) == list: - rle = mask_util.frPyObjects(rle, height, width) - mask = mask_util.decode(rle) - mask = mask[crop[1]:crop[3], crop[0]:crop[2]] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - crop_segms = [] - for id in valid_ids: - segm = segms[id] - if is_poly(segm): - import copy - import shapely.ops - import logging - from shapely.geometry import Polygon, MultiPolygon, GeometryCollection - logging.getLogger("shapely").setLevel(logging.WARNING) - # Polygon format - crop_segms.append(_crop_poly(segm, crop)) - else: - # RLE format - import pycocotools.mask as mask_util - crop_segms.append(_crop_rle(segm, crop, height, width)) - return crop_segms - - -def expand_segms(segms, x, y, height, width, ratio): - def _expand_poly(poly, x, y): - expanded_poly = np.array(poly) - expanded_poly[0::2] += x - expanded_poly[1::2] += y - return expanded_poly.tolist() - - def _expand_rle(rle, x, y, height, width, ratio): - if 'counts' in rle and type(rle['counts']) == list: - rle = mask_util.frPyObjects(rle, height, width) - mask = mask_util.decode(rle) - expanded_mask = np.full((int(height * ratio), int(width * ratio)), - 0).astype(mask.dtype) - expanded_mask[y:y + height, x:x + width] = mask - rle = mask_util.encode( - np.array(expanded_mask, order='F', dtype=np.uint8)) - return rle - - expanded_segms = [] - for segm in segms: - if is_poly(segm): - # Polygon format - expanded_segms.append([_expand_poly(poly, x, y) for poly in segm]) - else: - # RLE format - import pycocotools.mask as mask_util - expanded_segms.append( - _expand_rle(segm, x, y, height, width, ratio)) - return expanded_segms - - -def box_horizontal_flip(bboxes, width): - oldx1 = bboxes[:, 0].copy() - oldx2 = bboxes[:, 2].copy() - bboxes[:, 0] = width - oldx2 - 1 - bboxes[:, 2] = width - oldx1 - 1 - if bboxes.shape[0] != 0 and (bboxes[:, 2] < bboxes[:, 0]).all(): - raise ValueError( - "RandomHorizontalFlip: invalid box, x2 should be greater than x1") - return bboxes - - -def segms_horizontal_flip(segms, height, width): - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - flipped_segms = [] - for segm in segms: - if is_poly(segm): - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - import pycocotools.mask as mask_util - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms diff --git a/deploy/raspberry/python/transforms/cls_transforms.py b/deploy/raspberry/python/transforms/cls_transforms.py index d6c7faa22d5fa44ea9537192bb00ea7302cbd680..6d64d3878356d8f302e9c7dcbf8981b627e77675 100644 --- a/deploy/raspberry/python/transforms/cls_transforms.py +++ b/deploy/raspberry/python/transforms/cls_transforms.py @@ -1,10 +1,10 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -18,7 +18,6 @@ import random import os.path as osp import numpy as np from PIL import Image, ImageEnhance -import utils.logging as logging class ClsTransform: @@ -49,14 +48,7 @@ class Compose(ClsTransform): 'must be equal or larger than 1!') self.transforms = transforms - # 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作 - for op in self.transforms: - if not isinstance(op, ClsTransform): - import imgaug.augmenters as iaa - if not isinstance(op, iaa.Augmenter): - raise Exception( - "Elements in transforms should be defined in 'paddlex.cls.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/" - ) + def __call__(self, im, label=None): """ @@ -79,18 +71,10 @@ class Compose(ClsTransform): raise TypeError('Can\'t read The image file {}!'.format(im)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) for op in self.transforms: - if isinstance(op, ClsTransform): - outputs = op(im, label) - im = outputs[0] - if len(outputs) == 2: - label = outputs[1] - else: - import imgaug.augmenters as iaa - if isinstance(op, iaa.Augmenter): - im = execute_imgaug(op, im) - outputs = (im, ) - if label is not None: - outputs = (im, label) + outputs = op(im, label) + im = outputs[0] + if len(outputs) == 2: + label = outputs[1] return outputs def add_augmenters(self, augmenters): @@ -100,109 +84,10 @@ class Compose(ClsTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) self.transforms = augmenters + self.transforms -class RandomCrop(ClsTransform): - """对图像进行随机剪裁,模型训练时的数据增强操作。 - - 1. 根据lower_scale、lower_ratio、upper_ratio计算随机剪裁的高、宽。 - 2. 根据随机剪裁的高、宽随机选取剪裁的起始点。 - 3. 剪裁图像。 - 4. 调整剪裁后的图像的大小到crop_size*crop_size。 - - Args: - crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。 - lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。 - lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。 - upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。 - """ - - def __init__(self, - crop_size=224, - lower_scale=0.08, - lower_ratio=3. / 4, - upper_ratio=4. / 3): - self.crop_size = crop_size - self.lower_scale = lower_scale - self.lower_ratio = lower_ratio - self.upper_ratio = upper_ratio - - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - label (int): 每张图像所对应的类别序号。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; - 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 - """ - im = random_crop(im, self.crop_size, self.lower_scale, - self.lower_ratio, self.upper_ratio) - if label is None: - return (im, ) - else: - return (im, label) - - -class RandomHorizontalFlip(ClsTransform): - """以一定的概率对图像进行随机水平翻转,模型训练时的数据增强操作。 - - Args: - prob (float): 随机水平翻转的概率。默认为0.5。 - """ - - def __init__(self, prob=0.5): - self.prob = prob - - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - label (int): 每张图像所对应的类别序号。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; - 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 - """ - if random.random() < self.prob: - im = horizontal_flip(im) - if label is None: - return (im, ) - else: - return (im, label) - - -class RandomVerticalFlip(ClsTransform): - """以一定的概率对图像进行随机垂直翻转,模型训练时的数据增强操作。 - - Args: - prob (float): 随机垂直翻转的概率。默认为0.5。 - """ - - def __init__(self, prob=0.5): - self.prob = prob - - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - label (int): 每张图像所对应的类别序号。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; - 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 - """ - if random.random() < self.prob: - im = vertical_flip(im) - if label is None: - return (im, ) - else: - return (im, label) - - class Normalize(ClsTransform): """对图像进行标准化。 @@ -315,131 +200,6 @@ class CenterCrop(ClsTransform): return (im, label) -class RandomRotate(ClsTransform): - def __init__(self, rotate_range=30, prob=0.5): - """以一定的概率对图像在[-rotate_range, rotaterange]角度范围内进行旋转,模型训练时的数据增强操作。 - - Args: - rotate_range (int): 旋转度数的范围。默认为30。 - prob (float): 随机旋转的概率。默认为0.5。 - """ - self.rotate_range = rotate_range - self.prob = prob - - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - label (int): 每张图像所对应的类别序号。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; - 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 - """ - rotate_lower = -self.rotate_range - rotate_upper = self.rotate_range - im = im.astype('uint8') - im = Image.fromarray(im) - if np.random.uniform(0, 1) < self.prob: - im = rotate(im, rotate_lower, rotate_upper) - im = np.asarray(im).astype('float32') - if label is None: - return (im, ) - else: - return (im, label) - - -class RandomDistort(ClsTransform): - """以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作。 - - 1. 对变换的操作顺序进行随机化操作。 - 2. 按照1中的顺序以一定的概率对图像在范围[-range, range]内进行随机像素内容变换。 - - Args: - brightness_range (float): 明亮度因子的范围。默认为0.9。 - brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 - contrast_range (float): 对比度因子的范围。默认为0.9。 - contrast_prob (float): 随机调整对比度的概率。默认为0.5。 - saturation_range (float): 饱和度因子的范围。默认为0.9。 - saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 - hue_range (int): 色调因子的范围。默认为18。 - hue_prob (float): 随机调整色调的概率。默认为0.5。 - """ - - def __init__(self, - brightness_range=0.9, - brightness_prob=0.5, - contrast_range=0.9, - contrast_prob=0.5, - saturation_range=0.9, - saturation_prob=0.5, - hue_range=18, - hue_prob=0.5): - self.brightness_range = brightness_range - self.brightness_prob = brightness_prob - self.contrast_range = contrast_range - self.contrast_prob = contrast_prob - self.saturation_range = saturation_range - self.saturation_prob = saturation_prob - self.hue_range = hue_range - self.hue_prob = hue_prob - - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - label (int): 每张图像所对应的类别序号。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; - 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 - """ - brightness_lower = 1 - self.brightness_range - brightness_upper = 1 + self.brightness_range - contrast_lower = 1 - self.contrast_range - contrast_upper = 1 + self.contrast_range - saturation_lower = 1 - self.saturation_range - saturation_upper = 1 + self.saturation_range - hue_lower = -self.hue_range - hue_upper = self.hue_range - ops = [brightness, contrast, saturation, hue] - random.shuffle(ops) - params_dict = { - 'brightness': { - 'brightness_lower': brightness_lower, - 'brightness_upper': brightness_upper - }, - 'contrast': { - 'contrast_lower': contrast_lower, - 'contrast_upper': contrast_upper - }, - 'saturation': { - 'saturation_lower': saturation_lower, - 'saturation_upper': saturation_upper - }, - 'hue': { - 'hue_lower': hue_lower, - 'hue_upper': hue_upper - } - } - prob_dict = { - 'brightness': self.brightness_prob, - 'contrast': self.contrast_prob, - 'saturation': self.saturation_prob, - 'hue': self.hue_prob, - } - for id in range(len(ops)): - params = params_dict[ops[id].__name__] - prob = prob_dict[ops[id].__name__] - params['im'] = im - if np.random.uniform(0, 1) < prob: - im = ops[id](**params) - if label is None: - return (im, ) - else: - return (im, label) - - class ArrangeClassifier(ClsTransform): """获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用 @@ -510,12 +270,7 @@ class ComposedClsTransforms(Compose): ) if mode == 'train': - # 训练时的transforms,包含数据增强 - transforms = [ - RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5), - Normalize( - mean=mean, std=std) - ] + pass else: # 验证/预测时的transforms transforms = [ diff --git a/deploy/raspberry/python/transforms/det_transforms.py b/deploy/raspberry/python/transforms/det_transforms.py index a2eecae88eedb1af907d5609a723c46698e76d9c..6cc5c7f7ebe7e324596327d568cd2d07b51b7c37 100644 --- a/deploy/raspberry/python/transforms/det_transforms.py +++ b/deploy/raspberry/python/transforms/det_transforms.py @@ -1,10 +1,10 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -24,10 +24,8 @@ import numpy as np import cv2 from PIL import Image, ImageEnhance -from .imgaug_support import execute_imgaug from .ops import * from .box_utils import * -import utils.logging as logging class DetTransform: @@ -61,14 +59,6 @@ class Compose(DetTransform): for t in self.transforms: if type(t).__name__ == 'MixupImage': self.use_mixup = True - # 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作 - for op in self.transforms: - if not isinstance(op, DetTransform): - import imgaug.augmenters as iaa - if not isinstance(op, iaa.Augmenter): - raise Exception( - "Elements in transforms should be defined in 'paddlex.det.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/" - ) def __call__(self, im, im_info=None, label_info=None): """ @@ -142,15 +132,8 @@ class Compose(DetTransform): for op in self.transforms: if im is None: return None - if isinstance(op, DetTransform): - outputs = op(im, im_info, label_info) - im = outputs[0] - else: - im = execute_imgaug(op, im) - if label_info is not None: - outputs = (im, im_info, label_info) - else: - outputs = (im, im_info) + outputs = op(im, im_info, label_info) + im = outputs[0] return outputs def add_augmenters(self, augmenters): @@ -160,7 +143,7 @@ class Compose(DetTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) self.transforms = augmenters + self.transforms @@ -411,82 +394,6 @@ class Resize(DetTransform): return (im, im_info, label_info) -class RandomHorizontalFlip(DetTransform): - """随机翻转图像、标注框、分割信息,模型训练时的数据增强操作。 - - 1. 随机采样一个0-1之间的小数,当小数小于水平翻转概率时, - 执行2-4步操作,否则直接返回。 - 2. 水平翻转图像。 - 3. 计算翻转后的真实标注框的坐标,更新label_info中的gt_bbox信息。 - 4. 计算翻转后的真实分割区域的坐标,更新label_info中的gt_poly信息。 - - Args: - prob (float): 随机水平翻转的概率。默认为0.5。 - - Raises: - TypeError: 形参数据类型不满足需求。 - """ - - def __init__(self, prob=0.5): - self.prob = prob - if not isinstance(self.prob, float): - raise TypeError("RandomHorizontalFlip: input type is invalid.") - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 - 存储与标注框相关信息的字典。 - 其中,im_info更新字段为: - - gt_bbox (np.ndarray): 水平翻转后的标注框坐标[x1, y1, x2, y2],形状为(n, 4), - 其中n代表真实标注框的个数。 - - gt_poly (list): 水平翻转后的多边形分割区域的x、y坐标,长度为n, - 其中n代表真实标注框的个数。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 数据长度不匹配。 - """ - if not isinstance(im, np.ndarray): - raise TypeError( - "RandomHorizontalFlip: image is not a numpy array.") - if len(im.shape) != 3: - raise ValueError( - "RandomHorizontalFlip: image is not 3-dimensional.") - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do RandomHorizontalFlip! ' + - 'Becasuse the im_info and label_info can not be None!') - if 'gt_bbox' not in label_info: - raise TypeError('Cannot do RandomHorizontalFlip! ' + \ - 'Becasuse gt_bbox is not in label_info!') - image_shape = im_info['image_shape'] - gt_bbox = label_info['gt_bbox'] - height = image_shape[0] - width = image_shape[1] - - if np.random.uniform(0, 1) < self.prob: - im = horizontal_flip(im) - if gt_bbox.shape[0] == 0: - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) - label_info['gt_bbox'] = box_horizontal_flip(gt_bbox, width) - if 'gt_poly' in label_info and \ - len(label_info['gt_poly']) != 0: - label_info['gt_poly'] = segms_horizontal_flip( - label_info['gt_poly'], height, width) - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) class Normalize(DetTransform): @@ -533,642 +440,6 @@ class Normalize(DetTransform): return (im, im_info, label_info) -class RandomDistort(DetTransform): - """以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作 - - 1. 对变换的操作顺序进行随机化操作。 - 2. 按照1中的顺序以一定的概率在范围[-range, range]对图像进行随机像素内容变换。 - - Args: - brightness_range (float): 明亮度因子的范围。默认为0.5。 - brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 - contrast_range (float): 对比度因子的范围。默认为0.5。 - contrast_prob (float): 随机调整对比度的概率。默认为0.5。 - saturation_range (float): 饱和度因子的范围。默认为0.5。 - saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 - hue_range (int): 色调因子的范围。默认为18。 - hue_prob (float): 随机调整色调的概率。默认为0.5。 - """ - - def __init__(self, - brightness_range=0.5, - brightness_prob=0.5, - contrast_range=0.5, - contrast_prob=0.5, - saturation_range=0.5, - saturation_prob=0.5, - hue_range=18, - hue_prob=0.5): - self.brightness_range = brightness_range - self.brightness_prob = brightness_prob - self.contrast_range = contrast_range - self.contrast_prob = contrast_prob - self.saturation_range = saturation_range - self.saturation_prob = saturation_prob - self.hue_range = hue_range - self.hue_prob = hue_prob - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 - 存储与标注框相关信息的字典。 - """ - brightness_lower = 1 - self.brightness_range - brightness_upper = 1 + self.brightness_range - contrast_lower = 1 - self.contrast_range - contrast_upper = 1 + self.contrast_range - saturation_lower = 1 - self.saturation_range - saturation_upper = 1 + self.saturation_range - hue_lower = -self.hue_range - hue_upper = self.hue_range - ops = [brightness, contrast, saturation, hue] - random.shuffle(ops) - params_dict = { - 'brightness': { - 'brightness_lower': brightness_lower, - 'brightness_upper': brightness_upper - }, - 'contrast': { - 'contrast_lower': contrast_lower, - 'contrast_upper': contrast_upper - }, - 'saturation': { - 'saturation_lower': saturation_lower, - 'saturation_upper': saturation_upper - }, - 'hue': { - 'hue_lower': hue_lower, - 'hue_upper': hue_upper - } - } - prob_dict = { - 'brightness': self.brightness_prob, - 'contrast': self.contrast_prob, - 'saturation': self.saturation_prob, - 'hue': self.hue_prob - } - for id in range(4): - params = params_dict[ops[id].__name__] - prob = prob_dict[ops[id].__name__] - params['im'] = im - - if np.random.uniform(0, 1) < prob: - im = ops[id](**params) - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) - - -class MixupImage(DetTransform): - """对图像进行mixup操作,模型训练时的数据增强操作,目前仅YOLOv3模型支持该transform。 - - 当label_info中不存在mixup字段时,直接返回,否则进行下述操作: - 1. 从随机beta分布中抽取出随机因子factor。 - 2. - - 当factor>=1.0时,去除label_info中的mixup字段,直接返回。 - - 当factor<=0.0时,直接返回label_info中的mixup字段,并在label_info中去除该字段。 - - 其余情况,执行下述操作: - (1)原图像乘以factor,mixup图像乘以(1-factor),叠加2个结果。 - (2)拼接原图像标注框和mixup图像标注框。 - (3)拼接原图像标注框类别和mixup图像标注框类别。 - (4)原图像标注框混合得分乘以factor,mixup图像标注框混合得分乘以(1-factor),叠加2个结果。 - 3. 更新im_info中的image_shape信息。 - - Args: - alpha (float): 随机beta分布的下限。默认为1.5。 - beta (float): 随机beta分布的上限。默认为1.5。 - mixup_epoch (int): 在前mixup_epoch轮使用mixup增强操作;当该参数为-1时,该策略不会生效。 - 默认为-1。 - - Raises: - ValueError: 数据长度不匹配。 - """ - - def __init__(self, alpha=1.5, beta=1.5, mixup_epoch=-1): - self.alpha = alpha - self.beta = beta - if self.alpha <= 0.0: - raise ValueError("alpha shold be positive in MixupImage") - if self.beta <= 0.0: - raise ValueError("beta shold be positive in MixupImage") - self.mixup_epoch = mixup_epoch - - def _mixup_img(self, img1, img2, factor): - h = max(img1.shape[0], img2.shape[0]) - w = max(img1.shape[1], img2.shape[1]) - img = np.zeros((h, w, img1.shape[2]), 'float32') - img[:img1.shape[0], :img1.shape[1], :] = \ - img1.astype('float32') * factor - img[:img2.shape[0], :img2.shape[1], :] += \ - img2.astype('float32') * (1.0 - factor) - return img.astype('float32') - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 - 存储与标注框相关信息的字典。 - 其中,im_info更新字段为: - - image_shape (np.ndarray): mixup后的图像高、宽二者组成的np.ndarray,形状为(2,)。 - im_info删除的字段: - - mixup (list): 与当前字段进行mixup的图像相关信息。 - label_info更新字段为: - - gt_bbox (np.ndarray): mixup后真实标注框坐标,形状为(n, 4), - 其中n代表真实标注框的个数。 - - gt_class (np.ndarray): mixup后每个真实标注框对应的类别序号,形状为(n, 1), - 其中n代表真实标注框的个数。 - - gt_score (np.ndarray): mixup后每个真实标注框对应的混合得分,形状为(n, 1), - 其中n代表真实标注框的个数。 - - Raises: - TypeError: 形参数据类型不满足需求。 - """ - if im_info is None: - raise TypeError('Cannot do MixupImage! ' + - 'Becasuse the im_info can not be None!') - if 'mixup' not in im_info: - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) - factor = np.random.beta(self.alpha, self.beta) - factor = max(0.0, min(1.0, factor)) - if im_info['epoch'] > self.mixup_epoch \ - or factor >= 1.0: - im_info.pop('mixup') - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) - if factor <= 0.0: - return im_info.pop('mixup') - im = self._mixup_img(im, im_info['mixup'][0], factor) - if label_info is None: - raise TypeError('Cannot do MixupImage! ' + - 'Becasuse the label_info can not be None!') - if 'gt_bbox' not in label_info or \ - 'gt_class' not in label_info or \ - 'gt_score' not in label_info: - raise TypeError('Cannot do MixupImage! ' + \ - 'Becasuse gt_bbox/gt_class/gt_score is not in label_info!') - gt_bbox1 = label_info['gt_bbox'] - gt_bbox2 = im_info['mixup'][2]['gt_bbox'] - gt_class1 = label_info['gt_class'] - gt_class2 = im_info['mixup'][2]['gt_class'] - gt_score1 = label_info['gt_score'] - gt_score2 = im_info['mixup'][2]['gt_score'] - if 'gt_poly' in label_info: - gt_poly1 = label_info['gt_poly'] - gt_poly2 = im_info['mixup'][2]['gt_poly'] - is_crowd1 = label_info['is_crowd'] - is_crowd2 = im_info['mixup'][2]['is_crowd'] - - if 0 not in gt_class1 and 0 not in gt_class2: - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) - if 'gt_poly' in label_info: - label_info['gt_poly'] = gt_poly1 + gt_poly2 - is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) - elif 0 in gt_class1: - gt_bbox = gt_bbox2 - gt_class = gt_class2 - gt_score = gt_score2 * (1. - factor) - if 'gt_poly' in label_info: - label_info['gt_poly'] = gt_poly2 - is_crowd = is_crowd2 - else: - gt_bbox = gt_bbox1 - gt_class = gt_class1 - gt_score = gt_score1 * factor - if 'gt_poly' in label_info: - label_info['gt_poly'] = gt_poly1 - is_crowd = is_crowd1 - label_info['gt_bbox'] = gt_bbox - label_info['gt_score'] = gt_score - label_info['gt_class'] = gt_class - label_info['is_crowd'] = is_crowd - im_info['image_shape'] = np.array([im.shape[0], - im.shape[1]]).astype('int32') - im_info.pop('mixup') - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) - - -class RandomExpand(DetTransform): - """随机扩张图像,模型训练时的数据增强操作。 - 1. 随机选取扩张比例(扩张比例大于1时才进行扩张)。 - 2. 计算扩张后图像大小。 - 3. 初始化像素值为输入填充值的图像,并将原图像随机粘贴于该图像上。 - 4. 根据原图像粘贴位置换算出扩张后真实标注框的位置坐标。 - 5. 根据原图像粘贴位置换算出扩张后真实分割区域的位置坐标。 - Args: - ratio (float): 图像扩张的最大比例。默认为4.0。 - prob (float): 随机扩张的概率。默认为0.5。 - fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。 - """ - - def __init__(self, - ratio=4., - prob=0.5, - fill_value=[123.675, 116.28, 103.53]): - super(RandomExpand, self).__init__() - assert ratio > 1.01, "expand ratio must be larger than 1.01" - self.ratio = ratio - self.prob = prob - assert isinstance(fill_value, Sequence), \ - "fill value must be sequence" - if not isinstance(fill_value, tuple): - fill_value = tuple(fill_value) - self.fill_value = fill_value - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - Returns: - tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 - 存储与标注框相关信息的字典。 - 其中,im_info更新字段为: - - image_shape (np.ndarray): 扩张后的图像高、宽二者组成的np.ndarray,形状为(2,)。 - label_info更新字段为: - - gt_bbox (np.ndarray): 随机扩张后真实标注框坐标,形状为(n, 4), - 其中n代表真实标注框的个数。 - - gt_class (np.ndarray): 随机扩张后每个真实标注框对应的类别序号,形状为(n, 1), - 其中n代表真实标注框的个数。 - Raises: - TypeError: 形参数据类型不满足需求。 - """ - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do RandomExpand! ' + - 'Becasuse the im_info and label_info can not be None!') - if 'gt_bbox' not in label_info or \ - 'gt_class' not in label_info: - raise TypeError('Cannot do RandomExpand! ' + \ - 'Becasuse gt_bbox/gt_class is not in label_info!') - if np.random.uniform(0., 1.) < self.prob: - return (im, im_info, label_info) - - if 'gt_class' in label_info and 0 in label_info['gt_class']: - return (im, im_info, label_info) - image_shape = im_info['image_shape'] - height = int(image_shape[0]) - width = int(image_shape[1]) - - expand_ratio = np.random.uniform(1., self.ratio) - h = int(height * expand_ratio) - w = int(width * expand_ratio) - if not h > height or not w > width: - return (im, im_info, label_info) - y = np.random.randint(0, h - height) - x = np.random.randint(0, w - width) - canvas = np.ones((h, w, 3), dtype=np.float32) - canvas *= np.array(self.fill_value, dtype=np.float32) - canvas[y:y + height, x:x + width, :] = im - - im_info['image_shape'] = np.array([h, w]).astype('int32') - if 'gt_bbox' in label_info and len(label_info['gt_bbox']) > 0: - label_info['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32) - if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0: - label_info['gt_poly'] = expand_segms(label_info['gt_poly'], x, y, - height, width, expand_ratio) - return (canvas, im_info, label_info) - - -class RandomCrop(DetTransform): - """随机裁剪图像。 - 1. 若allow_no_crop为True,则在thresholds加入’no_crop’。 - 2. 随机打乱thresholds。 - 3. 遍历thresholds中各元素: - (1) 如果当前thresh为’no_crop’,则返回原始图像和标注信息。 - (2) 随机取出aspect_ratio和scaling中的值并由此计算出候选裁剪区域的高、宽、起始点。 - (3) 计算真实标注框与候选裁剪区域IoU,若全部真实标注框的IoU都小于thresh,则继续第3步。 - (4) 如果cover_all_box为True且存在真实标注框的IoU小于thresh,则继续第3步。 - (5) 筛选出位于候选裁剪区域内的真实标注框,若有效框的个数为0,则继续第3步,否则进行第4步。 - 4. 换算有效真值标注框相对候选裁剪区域的位置坐标。 - 5. 换算有效分割区域相对候选裁剪区域的位置坐标。 - - Args: - aspect_ratio (list): 裁剪后短边缩放比例的取值范围,以[min, max]形式表示。默认值为[.5, 2.]。 - thresholds (list): 判断裁剪候选区域是否有效所需的IoU阈值取值列表。默认值为[.0, .1, .3, .5, .7, .9]。 - scaling (list): 裁剪面积相对原面积的取值范围,以[min, max]形式表示。默认值为[.3, 1.]。 - num_attempts (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。 - allow_no_crop (bool): 是否允许未进行裁剪。默认值为True。 - cover_all_box (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。 - """ - - def __init__(self, - aspect_ratio=[.5, 2.], - thresholds=[.0, .1, .3, .5, .7, .9], - scaling=[.3, 1.], - num_attempts=50, - allow_no_crop=True, - cover_all_box=False): - self.aspect_ratio = aspect_ratio - self.thresholds = thresholds - self.scaling = scaling - self.num_attempts = num_attempts - self.allow_no_crop = allow_no_crop - self.cover_all_box = cover_all_box - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 - 存储与标注框相关信息的字典。 - 其中,im_info更新字段为: - - image_shape (np.ndarray): 扩裁剪的图像高、宽二者组成的np.ndarray,形状为(2,)。 - label_info更新字段为: - - gt_bbox (np.ndarray): 随机裁剪后真实标注框坐标,形状为(n, 4), - 其中n代表真实标注框的个数。 - - gt_class (np.ndarray): 随机裁剪后每个真实标注框对应的类别序号,形状为(n, 1), - 其中n代表真实标注框的个数。 - - gt_score (np.ndarray): 随机裁剪后每个真实标注框对应的混合得分,形状为(n, 1), - 其中n代表真实标注框的个数。 - - Raises: - TypeError: 形参数据类型不满足需求。 - """ - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do RandomCrop! ' + - 'Becasuse the im_info and label_info can not be None!') - if 'gt_bbox' not in label_info or \ - 'gt_class' not in label_info: - raise TypeError('Cannot do RandomCrop! ' + \ - 'Becasuse gt_bbox/gt_class is not in label_info!') - - if len(label_info['gt_bbox']) == 0: - return (im, im_info, label_info) - if 'gt_class' in label_info and 0 in label_info['gt_class']: - return (im, im_info, label_info) - - image_shape = im_info['image_shape'] - w = image_shape[1] - h = image_shape[0] - gt_bbox = label_info['gt_bbox'] - thresholds = list(self.thresholds) - if self.allow_no_crop: - thresholds.append('no_crop') - np.random.shuffle(thresholds) - - for thresh in thresholds: - if thresh == 'no_crop': - return (im, im_info, label_info) - - found = False - for i in range(self.num_attempts): - scale = np.random.uniform(*self.scaling) - min_ar, max_ar = self.aspect_ratio - aspect_ratio = np.random.uniform( - max(min_ar, scale**2), min(max_ar, scale**-2)) - crop_h = int(h * scale / np.sqrt(aspect_ratio)) - crop_w = int(w * scale * np.sqrt(aspect_ratio)) - crop_y = np.random.randint(0, h - crop_h) - crop_x = np.random.randint(0, w - crop_w) - crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h] - iou = iou_matrix( - gt_bbox, np.array( - [crop_box], dtype=np.float32)) - if iou.max() < thresh: - continue - - if self.cover_all_box and iou.min() < thresh: - continue - - cropped_box, valid_ids = crop_box_with_center_constraint( - gt_bbox, np.array( - crop_box, dtype=np.float32)) - if valid_ids.size > 0: - found = True - break - - if found: - if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0: - crop_polys = crop_segms( - label_info['gt_poly'], - valid_ids, - np.array( - crop_box, dtype=np.int64), - h, - w) - if [] in crop_polys: - delete_id = list() - valid_polys = list() - for id, crop_poly in enumerate(crop_polys): - if crop_poly == []: - delete_id.append(id) - else: - valid_polys.append(crop_poly) - valid_ids = np.delete(valid_ids, delete_id) - if len(valid_polys) == 0: - return (im, im_info, label_info) - label_info['gt_poly'] = valid_polys - else: - label_info['gt_poly'] = crop_polys - im = crop_image(im, crop_box) - label_info['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0) - label_info['gt_class'] = np.take( - label_info['gt_class'], valid_ids, axis=0) - im_info['image_shape'] = np.array( - [crop_box[3] - crop_box[1], - crop_box[2] - crop_box[0]]).astype('int32') - if 'gt_score' in label_info: - label_info['gt_score'] = np.take( - label_info['gt_score'], valid_ids, axis=0) - - if 'is_crowd' in label_info: - label_info['is_crowd'] = np.take( - label_info['is_crowd'], valid_ids, axis=0) - return (im, im_info, label_info) - - return (im, im_info, label_info) - - -class ArrangeFasterRCNN(DetTransform): - """获取FasterRCNN模型训练/验证/预测所需信息。 - - Args: - mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 - - Raises: - ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 - """ - - def __init__(self, mode=None): - if mode not in ['train', 'eval', 'test', 'quant']: - raise ValueError( - "mode must be in ['train', 'eval', 'test', 'quant']!") - self.mode = mode - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd),分别对应 - 图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象; - 当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape, gt_bbox, gt_class, is_difficult), - 分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像id、图像大小信息、真实标注框、真实标注框对应的类别、 - 真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape),分别对应图像np.ndarray数据、 - 图像相当对于原图的resize信息、图像大小信息。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 数据长度不匹配。 - """ - im = permute(im, False) - if self.mode == 'train': - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do ArrangeFasterRCNN! ' + - 'Becasuse the im_info and label_info can not be None!') - if len(label_info['gt_bbox']) != len(label_info['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - im_resize_info = im_info['im_resize_info'] - gt_bbox = label_info['gt_bbox'] - gt_class = label_info['gt_class'] - is_crowd = label_info['is_crowd'] - outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd) - elif self.mode == 'eval': - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do ArrangeFasterRCNN! ' + - 'Becasuse the im_info and label_info can not be None!') - im_resize_info = im_info['im_resize_info'] - im_id = im_info['im_id'] - im_shape = np.array( - (im_info['image_shape'][0], im_info['image_shape'][1], 1), - dtype=np.float32) - gt_bbox = label_info['gt_bbox'] - gt_class = label_info['gt_class'] - is_difficult = label_info['difficult'] - outputs = (im, im_resize_info, im_id, im_shape, gt_bbox, gt_class, - is_difficult) - else: - if im_info is None: - raise TypeError('Cannot do ArrangeFasterRCNN! ' + - 'Becasuse the im_info can not be None!') - im_resize_info = im_info['im_resize_info'] - im_shape = np.array( - (im_info['image_shape'][0], im_info['image_shape'][1], 1), - dtype=np.float32) - outputs = (im, im_resize_info, im_shape) - return outputs - - -class ArrangeMaskRCNN(DetTransform): - """获取MaskRCNN模型训练/验证/预测所需信息。 - - Args: - mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 - - Raises: - ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 - """ - - def __init__(self, mode=None): - if mode not in ['train', 'eval', 'test', 'quant']: - raise ValueError( - "mode must be in ['train', 'eval', 'test', 'quant']!") - self.mode = mode - - def __call__(self, im, im_info=None, label_info=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label_info (dict, 可选): 存储与标注框相关的信息。 - - Returns: - tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd, gt_masks),分别对应 - 图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象、 - 真实分割区域;当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape),分别对应图像np.ndarray数据、 - 图像相当对于原图的resize信息、图像id、图像大小信息;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape), - 分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像大小信息。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 数据长度不匹配。 - """ - im = permute(im, False) - if self.mode == 'train': - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do ArrangeTrainMaskRCNN! ' + - 'Becasuse the im_info and label_info can not be None!') - if len(label_info['gt_bbox']) != len(label_info['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - im_resize_info = im_info['im_resize_info'] - gt_bbox = label_info['gt_bbox'] - gt_class = label_info['gt_class'] - is_crowd = label_info['is_crowd'] - assert 'gt_poly' in label_info - segms = label_info['gt_poly'] - if len(segms) != 0: - assert len(segms) == is_crowd.shape[0] - gt_masks = [] - valid = True - for i in range(len(segms)): - segm = segms[i] - gt_segm = [] - if is_crowd[i]: - gt_segm.append([[0, 0]]) - else: - for poly in segm: - if len(poly) == 0: - valid = False - break - gt_segm.append(np.array(poly).reshape(-1, 2)) - if (not valid) or len(gt_segm) == 0: - break - gt_masks.append(gt_segm) - outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd, - gt_masks) - else: - if im_info is None: - raise TypeError('Cannot do ArrangeMaskRCNN! ' + - 'Becasuse the im_info can not be None!') - im_resize_info = im_info['im_resize_info'] - im_shape = np.array( - (im_info['image_shape'][0], im_info['image_shape'][1], 1), - dtype=np.float32) - if self.mode == 'eval': - im_id = im_info['im_id'] - outputs = (im, im_resize_info, im_id, im_shape) - else: - outputs = (im, im_resize_info, im_shape) - return outputs class ArrangeYOLOv3(DetTransform): @@ -1208,50 +479,9 @@ class ArrangeYOLOv3(DetTransform): """ im = permute(im, False) if self.mode == 'train': - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do ArrangeYolov3! ' + - 'Becasuse the im_info and label_info can not be None!') - im_shape = im_info['image_shape'] - if len(label_info['gt_bbox']) != len(label_info['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - if len(label_info['gt_bbox']) != len(label_info['gt_score']): - raise ValueError("gt num mismatch: bbox and score.") - gt_bbox = np.zeros((50, 4), dtype=im.dtype) - gt_class = np.zeros((50, ), dtype=np.int32) - gt_score = np.zeros((50, ), dtype=im.dtype) - gt_num = min(50, len(label_info['gt_bbox'])) - if gt_num > 0: - label_info['gt_class'][:gt_num, 0] = label_info[ - 'gt_class'][:gt_num, 0] - 1 - if -1 not in label_info['gt_class']: - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] - # parse [x1, y1, x2, y2] to [x, y, w, h] - gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] - gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. - outputs = (im, gt_bbox, gt_class, gt_score, im_shape) + pass elif self.mode == 'eval': - if im_info is None or label_info is None: - raise TypeError( - 'Cannot do ArrangeYolov3! ' + - 'Becasuse the im_info and label_info can not be None!') - im_shape = im_info['image_shape'] - if len(label_info['gt_bbox']) != len(label_info['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - im_id = im_info['im_id'] - gt_bbox = np.zeros((50, 4), dtype=im.dtype) - gt_class = np.zeros((50, ), dtype=np.int32) - difficult = np.zeros((50, ), dtype=np.int32) - gt_num = min(50, len(label_info['gt_bbox'])) - if gt_num > 0: - label_info['gt_class'][:gt_num, 0] = label_info[ - 'gt_class'][:gt_num, 0] - 1 - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - difficult[:gt_num] = label_info['difficult'][:gt_num, 0] - outputs = (im, im_shape, im_id, gt_bbox, gt_class, difficult) + pass else: if im_info is None: raise TypeError('Cannot do ArrangeYolov3! ' + @@ -1261,51 +491,6 @@ class ArrangeYOLOv3(DetTransform): return outputs -class ComposedRCNNTransforms(Compose): - """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下, - 训练阶段: - 1. 随机以0.5的概率将图像水平翻转 - 2. 图像归一化 - 3. 图像按比例Resize,scale计算方式如下 - scale = min_max_size[0] / short_size_of_image - if max_size_of_image * scale > min_max_size[1]: - scale = min_max_size[1] / max_size_of_image - 4. 将3步骤的长宽进行padding,使得长宽为32的倍数 - 验证阶段: - 1. 图像归一化 - 2. 图像按比例Resize,scale计算方式同上训练阶段 - 3. 将2步骤的长宽进行padding,使得长宽为32的倍数 - - Args: - mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' - min_max_size(list): 图像在缩放时,最小边和最大边的约束条件 - mean(list): 图像均值 - std(list): 图像方差 - """ - - def __init__(self, - mode, - min_max_size=[800, 1333], - mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]): - if mode == 'train': - # 训练时的transforms,包含数据增强 - transforms = [ - RandomHorizontalFlip(prob=0.5), Normalize( - mean=mean, std=std), ResizeByShort( - short_size=min_max_size[0], max_size=min_max_size[1]), - Padding(coarsest_stride=32) - ] - else: - # 验证/预测时的transforms - transforms = [ - Normalize( - mean=mean, std=std), ResizeByShort( - short_size=min_max_size[0], max_size=min_max_size[1]), - Padding(coarsest_stride=32) - ] - - super(ComposedRCNNTransforms, self).__init__(transforms) class ComposedYOLOv3Transforms(Compose): @@ -1349,13 +534,7 @@ class ComposedYOLOv3Transforms(Compose): if mode == 'train': # 训练时的transforms,包含数据增强 - transforms = [ - MixupImage(mixup_epoch=mixup_epoch), RandomDistort(), - RandomExpand(), RandomCrop(), Resize( - target_size=width, - interp='RANDOM'), RandomHorizontalFlip(), Normalize( - mean=mean, std=std) - ] + pass else: # 验证/预测时的transforms transforms = [ diff --git a/deploy/raspberry/python/transforms/imgaug_support.py b/deploy/raspberry/python/transforms/imgaug_support.py deleted file mode 100644 index edaaba958d7501861ae36eac3dab8900af1ddb8f..0000000000000000000000000000000000000000 --- a/deploy/raspberry/python/transforms/imgaug_support.py +++ /dev/null @@ -1,111 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import copy - - -def execute_imgaug(augmenter, im, bboxes=None, polygons=None, - segment_map=None): - # 预处理,将bboxes, polygons转换成imgaug格式 - import imgaug.augmentables.kps as kps - import imgaug.augmentables.bbs as bbs - - aug_im = im.astype('uint8') - aug_im = augmenter.augment(image=aug_im).astype('float32') - return aug_im - - # TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异 - # 目前仅支持对原图进行处理,因此只能使用pixlevel的imgaug增强操作 - # 以下代码暂不会执行 - aug_bboxes = None - if bboxes is not None: - aug_bboxes = list() - for i in range(len(bboxes)): - x1 = bboxes[i, 0] - y1 = bboxes[i, 1] - x2 = bboxes[i, 2] - y2 = bboxes[i, 3] - aug_bboxes.append(bbs.BoundingBox(x1, y1, x2, y2)) - - aug_points = None - if polygons is not None: - aug_points = list() - for i in range(len(polygons)): - num = len(polygons[i]) - for j in range(num): - tmp = np.reshape(polygons[i][j], (-1, 2)) - for k in range(len(tmp)): - aug_points.append(kps.Keypoint(tmp[k, 0], tmp[k, 1])) - - aug_segment_map = None - if segment_map is not None: - if len(segment_map.shape) == 2: - h, w = segment_map.shape - aug_segment_map = np.reshape(segment_map, (1, h, w, 1)) - elif len(segment_map.shape) == 3: - h, w, c = segment_map.shape - aug_segment_map = np.reshape(segment_map, (1, h, w, c)) - else: - raise Exception( - "Only support 2-dimensions for 3-dimensions for segment_map") - - unnormalized_batch = augmenter.augment( - image=aug_im, - bounding_boxes=aug_bboxes, - keypoints=aug_points, - segmentation_maps=aug_segment_map, - return_batch=True) - aug_im = unnormalized_batch.images_aug[0] - aug_bboxes = unnormalized_batch.bounding_boxes_aug - aug_points = unnormalized_batch.keypoints_aug - aug_seg_map = unnormalized_batch.segmentation_maps_aug - - aug_im = aug_im.astype('float32') - - if aug_bboxes is not None: - converted_bboxes = list() - for i in range(len(aug_bboxes)): - converted_bboxes.append([ - aug_bboxes[i].x1, aug_bboxes[i].y1, aug_bboxes[i].x2, - aug_bboxes[i].y2 - ]) - aug_bboxes = converted_bboxes - - aug_polygons = None - if aug_points is not None: - aug_polygons = copy.deepcopy(polygons) - idx = 0 - for i in range(len(aug_polygons)): - num = len(aug_polygons[i]) - for j in range(num): - num_points = len(aug_polygons[i][j]) // 2 - for k in range(num_points): - aug_polygons[i][j][k * 2] = aug_points[idx].x - aug_polygons[i][j][k * 2 + 1] = aug_points[idx].y - idx += 1 - - result = [aug_im] - if aug_bboxes is not None: - result.append(np.array(aug_bboxes)) - if aug_polygons is not None: - result.append(aug_polygons) - if aug_seg_map is not None: - n, h, w, c = aug_seg_map.shape - if len(segment_map.shape) == 2: - aug_seg_map = np.reshape(aug_seg_map, (h, w)) - elif len(segment_map.shape) == 3: - aug_seg_map = np.reshape(aug_seg_map, (h, w, c)) - result.append(aug_seg_map) - return result diff --git a/deploy/raspberry/python/transforms/ops.py b/deploy/raspberry/python/transforms/ops.py index dd517d4ccb7c113cfd00460e5df27125248bb602..3f298d7824be48355b69973a1e14486172efcb08 100644 --- a/deploy/raspberry/python/transforms/ops.py +++ b/deploy/raspberry/python/transforms/ops.py @@ -1,10 +1,10 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/deploy/raspberry/python/transforms/seg_transforms.py b/deploy/raspberry/python/transforms/seg_transforms.py index dca64e90948e7fd3c798e29c8c17f8d056b69cc5..a3fb6241d415939a33f73a29b843f9ed45976463 100644 --- a/deploy/raspberry/python/transforms/seg_transforms.py +++ b/deploy/raspberry/python/transforms/seg_transforms.py @@ -1,11 +1,11 @@ # coding: utf8 -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -14,14 +14,12 @@ # limitations under the License. from .ops import * -from .imgaug_support import execute_imgaug import random import os.path as osp import numpy as np from PIL import Image import cv2 from collections import OrderedDict -import utils.logging as logging class SegTransform: @@ -53,14 +51,7 @@ class Compose(SegTransform): 'must be equal or larger than 1!') self.transforms = transforms self.to_rgb = False - # 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作 - for op in self.transforms: - if not isinstance(op, SegTransform): - import imgaug.augmenters as iaa - if not isinstance(op, iaa.Augmenter): - raise Exception( - "Elements in transforms should be defined in 'paddlex.seg.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/" - ) + def __call__(self, im, im_info=None, label=None): """ @@ -116,7 +107,7 @@ class Compose(SegTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) self.transforms = augmenters + self.transforms @@ -828,76 +819,6 @@ class RandomBlur(SegTransform): return (im, im_info, label) -class RandomRotate(SegTransform): - """对图像进行随机旋转, 模型训练时的数据增强操作。 - 在旋转区间[-rotate_range, rotate_range]内,对图像进行随机旋转,当存在标注图像时,同步进行, - 并对旋转后的图像和标注图像进行相应的padding。 - - Args: - rotate_range (float): 最大旋转角度。默认为15度。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认为255。 - - """ - - def __init__(self, - rotate_range=15, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - self.rotate_range = rotate_range - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (list): 存储图像reisze或padding前的shape信息,如 - [('resize', [200, 300]), ('padding', [400, 600])]表示 - 图像在过resize前shape为(200, 300), 过padding前shape为 - (400, 600) - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.rotate_range > 0: - (h, w) = im.shape[:2] - do_rotation = np.random.uniform(-self.rotate_range, - self.rotate_range) - pc = (w // 2, h // 2) - r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) - cos = np.abs(r[0, 0]) - sin = np.abs(r[0, 1]) - - nw = int((h * sin) + (w * cos)) - nh = int((h * cos) + (w * sin)) - - (cx, cy) = pc - r[0, 2] += (nw / 2) - cx - r[1, 2] += (nh / 2) - cy - dsize = (nw, nh) - im = cv2.warpAffine( - im, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - label = cv2.warpAffine( - label, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.label_padding_value) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) class RandomScaleAspect(SegTransform): @@ -1125,11 +1046,7 @@ class ComposedSegTransforms(Compose): std=[0.5, 0.5, 0.5]): if mode == 'train': # 训练时的transforms,包含数据增强 - transforms = [ - RandomHorizontalFlip(prob=0.5), ResizeStepScaling(), - RandomPaddingCrop(crop_size=train_crop_size), Normalize( - mean=mean, std=std) - ] + pass else: # 验证/预测时的transforms transforms = [Normalize(mean=mean, std=std)] diff --git a/deploy/raspberry/python/utils/__init__.py b/deploy/raspberry/python/utils/__init__.py deleted file mode 100644 index 8b4da380ba487e25dce661f684b035aa3a4f3690..0000000000000000000000000000000000000000 --- a/deploy/raspberry/python/utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - diff --git a/deploy/raspberry/python/utils/logging.py b/deploy/raspberry/python/utils/logging.py deleted file mode 100755 index ab4046bcf9e0a461f55eaf31cf4c10c28fd5cc73..0000000000000000000000000000000000000000 --- a/deploy/raspberry/python/utils/logging.py +++ /dev/null @@ -1,54 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import os -import sys -import colorama -from colorama import init - -init(autoreset=True) -levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} -log_level = 2 - -def log(level=2, message="", use_color=False): - current_time = time.time() - time_array = time.localtime(current_time) - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) - if log_level >= level: - if use_color: - print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[ - level], message).encode("utf-8").decode("latin1")) - else: - print("{} [{}]\t{}".format(current_time, levels[level], message) - .encode("utf-8").decode("latin1")) - sys.stdout.flush() - - -def debug(message="", use_color=False): - log(level=3, message=message, use_color=use_color) - - -def info(message="", use_color=False): - log(level=2, message=message, use_color=use_color) - - -def warning(message="", use_color=True): - log(level=1, message=message, use_color=use_color) - - -def error(message="", use_color=True, exit=True): - log(level=0, message=message, use_color=use_color) - if exit: - sys.exit(-1)