提交 2af6a45f 编写于 作者: S syyxsxx

fix python openvino

上级 152d0e81
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
from six import text_type as _text_type from six import text_type as _text_type
import argparse import argparse
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -21,7 +21,7 @@ import numpy as np ...@@ -21,7 +21,7 @@ import numpy as np
import yaml import yaml
from six import text_type as _text_type from six import text_type as _text_type
from openvino.inference_engine import IECore from openvino.inference_engine import IECore
from utils import logging
...@@ -33,11 +33,11 @@ class Predictor: ...@@ -33,11 +33,11 @@ class Predictor:
device="CPU"): device="CPU"):
self.device = device self.device = device
if not osp.exists(model_xml): if not osp.exists(model_xml):
logging.error("model xml file is not exists in {}".format(model_xml)) print("model xml file is not exists in {}".format(model_xml))
self.model_xml = model_xml self.model_xml = model_xml
self.model_bin = osp.splitext(model_xml)[0] + ".bin" self.model_bin = osp.splitext(model_xml)[0] + ".bin"
if not osp.exists(model_yaml): if not osp.exists(model_yaml):
logging,error("model yaml file is not exists in {}".format(model_yaml)) print("model yaml file is not exists in {}".format(model_yaml))
with open(model_yaml) as f: with open(model_yaml) as f:
self.info = yaml.load(f.read(), Loader=yaml.Loader) self.info = yaml.load(f.read(), Loader=yaml.Loader)
self.model_type = self.info['_Attributes']['model_type'] self.model_type = self.info['_Attributes']['model_type']
...@@ -65,9 +65,9 @@ class Predictor: ...@@ -65,9 +65,9 @@ class Predictor:
def create_predictor(self): def create_predictor(self):
#initialization for specified device #initialization for specified device
logging.info("Creating Inference Engine") print("Creating Inference Engine")
ie = IECore() ie = IECore()
logging.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_bin)) print("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_bin))
net = ie.read_network(model=self.model_xml, weights=self.model_bin) net = ie.read_network(model=self.model_xml, weights=self.model_bin)
net.batch_size = 1 net.batch_size = 1
network_config = {} network_config = {}
...@@ -135,11 +135,11 @@ class Predictor: ...@@ -135,11 +135,11 @@ class Predictor:
input_blob = next(iter(self.net.inputs)) input_blob = next(iter(self.net.inputs))
feed_dict[input_blob] = preprocessed_input['image'] feed_dict[input_blob] = preprocessed_input['image']
#Start sync inference #Start sync inference
logging.info("Starting inference in synchronous mode") print("Starting inference in synchronous mode")
res = self.predictor.infer(inputs=feed_dict) res = self.predictor.infer(inputs=feed_dict)
#Processing output blob #Processing output blob
logging.info("Processing output blob") print("Processing output blob")
return res return res
...@@ -167,7 +167,6 @@ class Predictor: ...@@ -167,7 +167,6 @@ class Predictor:
elif self.model_type == "segmenter": elif self.model_type == "segmenter":
im, im_info = self.transforms(image) im, im_info = self.transforms(image)
im = np.expand_dims(im, axis=0).copy() im = np.expand_dims(im, axis=0).copy()
#np.savetxt('./input_data.txt',im.flatten())
res['image'] = im res['image'] = im
res['im_info'] = im_info res['im_info'] = im_info
return res return res
...@@ -193,11 +192,9 @@ class Predictor: ...@@ -193,11 +192,9 @@ class Predictor:
it = iter(self.net.outputs) it = iter(self.net.outputs)
next(it) next(it)
score_name = next(it) score_name = next(it)
#np.savetxt('./score_map.txt',preds[score_name].flatten())
score_map = np.squeeze(preds[score_name]) score_map = np.squeeze(preds[score_name])
score_map = np.transpose(score_map, (1, 2, 0)) score_map = np.transpose(score_map, (1, 2, 0))
label_name = next(it) label_name = next(it)
#np.savetxt('./label_map.txt',preds[label_name].flatten())
label_map = np.squeeze(preds[label_name]).astype('uint8') label_map = np.squeeze(preds[label_name]).astype('uint8')
im_info = preprocessed_inputs['im_info'] im_info = preprocessed_inputs['im_info']
for info in im_info[::-1]: for info in im_info[::-1]:
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import random
import math
import cv2
import scipy
def bbox_area(src_bbox):
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
sample_bbox[1] >= object_bbox[3] or \
sample_bbox[3] <= object_bbox[1]:
return 0
intersect_xmin = max(sample_bbox[0], object_bbox[0])
intersect_ymin = max(sample_bbox[1], object_bbox[1])
intersect_xmax = min(sample_bbox[2], object_bbox[2])
intersect_ymax = min(sample_bbox[3], object_bbox[3])
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def iou_matrix(a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
return area_i / (area_o + 1e-10)
def crop_box_with_center_constraint(box, crop):
cropped_box = box.copy()
cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
def is_poly(segm):
if not isinstance(segm, (list, dict)):
raise Exception("Invalid segm type: {}".format(type(segm)))
return isinstance(segm, list)
def crop_image(img, crop):
x1, y1, x2, y2 = crop
return img[y1:y2, x1:x2, :]
def crop_segms(segms, valid_ids, crop, height, width):
def _crop_poly(segm, crop):
xmin, ymin, xmax, ymax = crop
crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
crop_p = np.array(crop_coord).reshape(4, 2)
crop_p = Polygon(crop_p)
crop_segm = list()
for poly in segm:
poly = np.array(poly).reshape(len(poly) // 2, 2)
polygon = Polygon(poly)
if not polygon.is_valid:
exterior = polygon.exterior
multi_lines = exterior.intersection(exterior)
polygons = shapely.ops.polygonize(multi_lines)
polygon = MultiPolygon(polygons)
multi_polygon = list()
if isinstance(polygon, MultiPolygon):
multi_polygon = copy.deepcopy(polygon)
else:
multi_polygon.append(copy.deepcopy(polygon))
for per_polygon in multi_polygon:
inter = per_polygon.intersection(crop_p)
if not inter:
continue
if isinstance(inter, (MultiPolygon, GeometryCollection)):
for part in inter:
if not isinstance(part, Polygon):
continue
part = np.squeeze(
np.array(part.exterior.coords[:-1]).reshape(1, -1))
part[0::2] -= xmin
part[1::2] -= ymin
crop_segm.append(part.tolist())
elif isinstance(inter, Polygon):
crop_poly = np.squeeze(
np.array(inter.exterior.coords[:-1]).reshape(1, -1))
crop_poly[0::2] -= xmin
crop_poly[1::2] -= ymin
crop_segm.append(crop_poly.tolist())
else:
continue
return crop_segm
def _crop_rle(rle, crop, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
crop_segms = []
for id in valid_ids:
segm = segms[id]
if is_poly(segm):
import copy
import shapely.ops
import logging
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
logging.getLogger("shapely").setLevel(logging.WARNING)
# Polygon format
crop_segms.append(_crop_poly(segm, crop))
else:
# RLE format
import pycocotools.mask as mask_util
crop_segms.append(_crop_rle(segm, crop, height, width))
return crop_segms
def expand_segms(segms, x, y, height, width, ratio):
def _expand_poly(poly, x, y):
expanded_poly = np.array(poly)
expanded_poly[0::2] += x
expanded_poly[1::2] += y
return expanded_poly.tolist()
def _expand_rle(rle, x, y, height, width, ratio):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
expanded_mask = np.full((int(height * ratio), int(width * ratio)),
0).astype(mask.dtype)
expanded_mask[y:y + height, x:x + width] = mask
rle = mask_util.encode(
np.array(expanded_mask, order='F', dtype=np.uint8))
return rle
expanded_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
expanded_segms.append([_expand_poly(poly, x, y) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
expanded_segms.append(
_expand_rle(segm, x, y, height, width, ratio))
return expanded_segms
def box_horizontal_flip(bboxes, width):
oldx1 = bboxes[:, 0].copy()
oldx2 = bboxes[:, 2].copy()
bboxes[:, 0] = width - oldx2 - 1
bboxes[:, 2] = width - oldx1 - 1
if bboxes.shape[0] != 0 and (bboxes[:, 2] < bboxes[:, 0]).all():
raise ValueError(
"RandomHorizontalFlip: invalid box, x2 should be greater than x1")
return bboxes
def segms_horizontal_flip(segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects([rle], height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -18,7 +18,6 @@ import random ...@@ -18,7 +18,6 @@ import random
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
import utils.logging as logging
class ClsTransform: class ClsTransform:
...@@ -49,14 +48,7 @@ class Compose(ClsTransform): ...@@ -49,14 +48,7 @@ class Compose(ClsTransform):
'must be equal or larger than 1!') 'must be equal or larger than 1!')
self.transforms = transforms self.transforms = transforms
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, ClsTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.cls.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, label=None): def __call__(self, im, label=None):
""" """
...@@ -79,18 +71,10 @@ class Compose(ClsTransform): ...@@ -79,18 +71,10 @@ class Compose(ClsTransform):
raise TypeError('Can\'t read The image file {}!'.format(im)) raise TypeError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms: for op in self.transforms:
if isinstance(op, ClsTransform):
outputs = op(im, label) outputs = op(im, label)
im = outputs[0] im = outputs[0]
if len(outputs) == 2: if len(outputs) == 2:
label = outputs[1] label = outputs[1]
else:
import imgaug.augmenters as iaa
if isinstance(op, iaa.Augmenter):
im = execute_imgaug(op, im)
outputs = (im, )
if label is not None:
outputs = (im, label)
return outputs return outputs
def add_augmenters(self, augmenters): def add_augmenters(self, augmenters):
...@@ -100,109 +84,10 @@ class Compose(ClsTransform): ...@@ -100,109 +84,10 @@ class Compose(ClsTransform):
transform_names = [type(x).__name__ for x in self.transforms] transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters: for aug in augmenters:
if type(aug).__name__ in transform_names: if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms self.transforms = augmenters + self.transforms
class RandomCrop(ClsTransform):
"""对图像进行随机剪裁,模型训练时的数据增强操作。
1. 根据lower_scale、lower_ratio、upper_ratio计算随机剪裁的高、宽。
2. 根据随机剪裁的高、宽随机选取剪裁的起始点。
3. 剪裁图像。
4. 调整剪裁后的图像的大小到crop_size*crop_size。
Args:
crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。
lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。
upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。
"""
def __init__(self,
crop_size=224,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
self.crop_size = crop_size
self.lower_scale = lower_scale
self.lower_ratio = lower_ratio
self.upper_ratio = upper_ratio
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = random_crop(im, self.crop_size, self.lower_scale,
self.lower_ratio, self.upper_ratio)
if label is None:
return (im, )
else:
return (im, label)
class RandomHorizontalFlip(ClsTransform):
"""以一定的概率对图像进行随机水平翻转,模型训练时的数据增强操作。
Args:
prob (float): 随机水平翻转的概率。默认为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is None:
return (im, )
else:
return (im, label)
class RandomVerticalFlip(ClsTransform):
"""以一定的概率对图像进行随机垂直翻转,模型训练时的数据增强操作。
Args:
prob (float): 随机垂直翻转的概率。默认为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is None:
return (im, )
else:
return (im, label)
class Normalize(ClsTransform): class Normalize(ClsTransform):
"""对图像进行标准化。 """对图像进行标准化。
...@@ -315,131 +200,6 @@ class CenterCrop(ClsTransform): ...@@ -315,131 +200,6 @@ class CenterCrop(ClsTransform):
return (im, label) return (im, label)
class RandomRotate(ClsTransform):
def __init__(self, rotate_range=30, prob=0.5):
"""以一定的概率对图像在[-rotate_range, rotaterange]角度范围内进行旋转,模型训练时的数据增强操作。
Args:
rotate_range (int): 旋转度数的范围。默认为30。
prob (float): 随机旋转的概率。默认为0.5。
"""
self.rotate_range = rotate_range
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
rotate_lower = -self.rotate_range
rotate_upper = self.rotate_range
im = im.astype('uint8')
im = Image.fromarray(im)
if np.random.uniform(0, 1) < self.prob:
im = rotate(im, rotate_lower, rotate_upper)
im = np.asarray(im).astype('float32')
if label is None:
return (im, )
else:
return (im, label)
class RandomDistort(ClsTransform):
"""以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像在范围[-range, range]内进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.9。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.9。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.9。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.9,
brightness_prob=0.5,
contrast_range=0.9,
contrast_prob=0.5,
saturation_range=0.9,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob,
}
for id in range(len(ops)):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label is None:
return (im, )
else:
return (im, label)
class ArrangeClassifier(ClsTransform): class ArrangeClassifier(ClsTransform):
"""获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用 """获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用
...@@ -510,12 +270,7 @@ class ComposedClsTransforms(Compose): ...@@ -510,12 +270,7 @@ class ComposedClsTransforms(Compose):
) )
if mode == 'train': if mode == 'train':
# 训练时的transforms,包含数据增强 pass
transforms = [
RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
Normalize(
mean=mean, std=std)
]
else: else:
# 验证/预测时的transforms # 验证/预测时的transforms
transforms = [ transforms = [
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -24,10 +24,8 @@ import numpy as np ...@@ -24,10 +24,8 @@ import numpy as np
import cv2 import cv2
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
from .imgaug_support import execute_imgaug
from .ops import * from .ops import *
from .box_utils import * from .box_utils import *
import utils.logging as logging
class DetTransform: class DetTransform:
...@@ -61,14 +59,6 @@ class Compose(DetTransform): ...@@ -61,14 +59,6 @@ class Compose(DetTransform):
for t in self.transforms: for t in self.transforms:
if type(t).__name__ == 'MixupImage': if type(t).__name__ == 'MixupImage':
self.use_mixup = True self.use_mixup = True
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, DetTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.det.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, im_info=None, label_info=None): def __call__(self, im, im_info=None, label_info=None):
""" """
...@@ -142,15 +132,8 @@ class Compose(DetTransform): ...@@ -142,15 +132,8 @@ class Compose(DetTransform):
for op in self.transforms: for op in self.transforms:
if im is None: if im is None:
return None return None
if isinstance(op, DetTransform):
outputs = op(im, im_info, label_info) outputs = op(im, im_info, label_info)
im = outputs[0] im = outputs[0]
else:
im = execute_imgaug(op, im)
if label_info is not None:
outputs = (im, im_info, label_info)
else:
outputs = (im, im_info)
return outputs return outputs
def add_augmenters(self, augmenters): def add_augmenters(self, augmenters):
...@@ -160,7 +143,7 @@ class Compose(DetTransform): ...@@ -160,7 +143,7 @@ class Compose(DetTransform):
transform_names = [type(x).__name__ for x in self.transforms] transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters: for aug in augmenters:
if type(aug).__name__ in transform_names: if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms self.transforms = augmenters + self.transforms
...@@ -411,82 +394,6 @@ class Resize(DetTransform): ...@@ -411,82 +394,6 @@ class Resize(DetTransform):
return (im, im_info, label_info) return (im, im_info, label_info)
class RandomHorizontalFlip(DetTransform):
"""随机翻转图像、标注框、分割信息,模型训练时的数据增强操作。
1. 随机采样一个0-1之间的小数,当小数小于水平翻转概率时,
执行2-4步操作,否则直接返回。
2. 水平翻转图像。
3. 计算翻转后的真实标注框的坐标,更新label_info中的gt_bbox信息。
4. 计算翻转后的真实分割区域的坐标,更新label_info中的gt_poly信息。
Args:
prob (float): 随机水平翻转的概率。默认为0.5。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, prob=0.5):
self.prob = prob
if not isinstance(self.prob, float):
raise TypeError("RandomHorizontalFlip: input type is invalid.")
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- gt_bbox (np.ndarray): 水平翻转后的标注框坐标[x1, y1, x2, y2],形状为(n, 4),
其中n代表真实标注框的个数。
- gt_poly (list): 水平翻转后的多边形分割区域的x、y坐标,长度为n,
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if not isinstance(im, np.ndarray):
raise TypeError(
"RandomHorizontalFlip: image is not a numpy array.")
if len(im.shape) != 3:
raise ValueError(
"RandomHorizontalFlip: image is not 3-dimensional.")
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomHorizontalFlip! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info:
raise TypeError('Cannot do RandomHorizontalFlip! ' + \
'Becasuse gt_bbox is not in label_info!')
image_shape = im_info['image_shape']
gt_bbox = label_info['gt_bbox']
height = image_shape[0]
width = image_shape[1]
if np.random.uniform(0, 1) < self.prob:
im = horizontal_flip(im)
if gt_bbox.shape[0] == 0:
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
label_info['gt_bbox'] = box_horizontal_flip(gt_bbox, width)
if 'gt_poly' in label_info and \
len(label_info['gt_poly']) != 0:
label_info['gt_poly'] = segms_horizontal_flip(
label_info['gt_poly'], height, width)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Normalize(DetTransform): class Normalize(DetTransform):
...@@ -533,642 +440,6 @@ class Normalize(DetTransform): ...@@ -533,642 +440,6 @@ class Normalize(DetTransform):
return (im, im_info, label_info) return (im, im_info, label_info)
class RandomDistort(DetTransform):
"""以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率在范围[-range, range]对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class MixupImage(DetTransform):
"""对图像进行mixup操作,模型训练时的数据增强操作,目前仅YOLOv3模型支持该transform。
当label_info中不存在mixup字段时,直接返回,否则进行下述操作:
1. 从随机beta分布中抽取出随机因子factor。
2.
- 当factor>=1.0时,去除label_info中的mixup字段,直接返回。
- 当factor<=0.0时,直接返回label_info中的mixup字段,并在label_info中去除该字段。
- 其余情况,执行下述操作:
(1)原图像乘以factor,mixup图像乘以(1-factor),叠加2个结果。
(2)拼接原图像标注框和mixup图像标注框。
(3)拼接原图像标注框类别和mixup图像标注框类别。
(4)原图像标注框混合得分乘以factor,mixup图像标注框混合得分乘以(1-factor),叠加2个结果。
3. 更新im_info中的image_shape信息。
Args:
alpha (float): 随机beta分布的下限。默认为1.5。
beta (float): 随机beta分布的上限。默认为1.5。
mixup_epoch (int): 在前mixup_epoch轮使用mixup增强操作;当该参数为-1时,该策略不会生效。
默认为-1。
Raises:
ValueError: 数据长度不匹配。
"""
def __init__(self, alpha=1.5, beta=1.5, mixup_epoch=-1):
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in MixupImage")
if self.beta <= 0.0:
raise ValueError("beta shold be positive in MixupImage")
self.mixup_epoch = mixup_epoch
def _mixup_img(self, img1, img2, factor):
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
return img.astype('float32')
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): mixup后的图像高、宽二者组成的np.ndarray,形状为(2,)。
im_info删除的字段:
- mixup (list): 与当前字段进行mixup的图像相关信息。
label_info更新字段为:
- gt_bbox (np.ndarray): mixup后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): mixup后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): mixup后每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None:
raise TypeError('Cannot do MixupImage! ' +
'Becasuse the im_info can not be None!')
if 'mixup' not in im_info:
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if im_info['epoch'] > self.mixup_epoch \
or factor >= 1.0:
im_info.pop('mixup')
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
if factor <= 0.0:
return im_info.pop('mixup')
im = self._mixup_img(im, im_info['mixup'][0], factor)
if label_info is None:
raise TypeError('Cannot do MixupImage! ' +
'Becasuse the label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info or \
'gt_score' not in label_info:
raise TypeError('Cannot do MixupImage! ' + \
'Becasuse gt_bbox/gt_class/gt_score is not in label_info!')
gt_bbox1 = label_info['gt_bbox']
gt_bbox2 = im_info['mixup'][2]['gt_bbox']
gt_class1 = label_info['gt_class']
gt_class2 = im_info['mixup'][2]['gt_class']
gt_score1 = label_info['gt_score']
gt_score2 = im_info['mixup'][2]['gt_score']
if 'gt_poly' in label_info:
gt_poly1 = label_info['gt_poly']
gt_poly2 = im_info['mixup'][2]['gt_poly']
is_crowd1 = label_info['is_crowd']
is_crowd2 = im_info['mixup'][2]['is_crowd']
if 0 not in gt_class1 and 0 not in gt_class2:
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
elif 0 in gt_class1:
gt_bbox = gt_bbox2
gt_class = gt_class2
gt_score = gt_score2 * (1. - factor)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly2
is_crowd = is_crowd2
else:
gt_bbox = gt_bbox1
gt_class = gt_class1
gt_score = gt_score1 * factor
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1
is_crowd = is_crowd1
label_info['gt_bbox'] = gt_bbox
label_info['gt_score'] = gt_score
label_info['gt_class'] = gt_class
label_info['is_crowd'] = is_crowd
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
im_info.pop('mixup')
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class RandomExpand(DetTransform):
"""随机扩张图像,模型训练时的数据增强操作。
1. 随机选取扩张比例(扩张比例大于1时才进行扩张)。
2. 计算扩张后图像大小。
3. 初始化像素值为输入填充值的图像,并将原图像随机粘贴于该图像上。
4. 根据原图像粘贴位置换算出扩张后真实标注框的位置坐标。
5. 根据原图像粘贴位置换算出扩张后真实分割区域的位置坐标。
Args:
ratio (float): 图像扩张的最大比例。默认为4.0。
prob (float): 随机扩张的概率。默认为0.5。
fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。
"""
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
super(RandomExpand, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
assert isinstance(fill_value, Sequence), \
"fill value must be sequence"
if not isinstance(fill_value, tuple):
fill_value = tuple(fill_value)
self.fill_value = fill_value
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): 扩张后的图像高、宽二者组成的np.ndarray,形状为(2,)。
label_info更新字段为:
- gt_bbox (np.ndarray): 随机扩张后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 随机扩张后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomExpand! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info:
raise TypeError('Cannot do RandomExpand! ' + \
'Becasuse gt_bbox/gt_class is not in label_info!')
if np.random.uniform(0., 1.) < self.prob:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
height = int(image_shape[0])
width = int(image_shape[1])
expand_ratio = np.random.uniform(1., self.ratio)
h = int(height * expand_ratio)
w = int(width * expand_ratio)
if not h > height or not w > width:
return (im, im_info, label_info)
y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width)
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[y:y + height, x:x + width, :] = im
im_info['image_shape'] = np.array([h, w]).astype('int32')
if 'gt_bbox' in label_info and len(label_info['gt_bbox']) > 0:
label_info['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32)
if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
label_info['gt_poly'] = expand_segms(label_info['gt_poly'], x, y,
height, width, expand_ratio)
return (canvas, im_info, label_info)
class RandomCrop(DetTransform):
"""随机裁剪图像。
1. 若allow_no_crop为True,则在thresholds加入’no_crop’。
2. 随机打乱thresholds。
3. 遍历thresholds中各元素:
(1) 如果当前thresh为’no_crop’,则返回原始图像和标注信息。
(2) 随机取出aspect_ratio和scaling中的值并由此计算出候选裁剪区域的高、宽、起始点。
(3) 计算真实标注框与候选裁剪区域IoU,若全部真实标注框的IoU都小于thresh,则继续第3步。
(4) 如果cover_all_box为True且存在真实标注框的IoU小于thresh,则继续第3步。
(5) 筛选出位于候选裁剪区域内的真实标注框,若有效框的个数为0,则继续第3步,否则进行第4步。
4. 换算有效真值标注框相对候选裁剪区域的位置坐标。
5. 换算有效分割区域相对候选裁剪区域的位置坐标。
Args:
aspect_ratio (list): 裁剪后短边缩放比例的取值范围,以[min, max]形式表示。默认值为[.5, 2.]。
thresholds (list): 判断裁剪候选区域是否有效所需的IoU阈值取值列表。默认值为[.0, .1, .3, .5, .7, .9]。
scaling (list): 裁剪面积相对原面积的取值范围,以[min, max]形式表示。默认值为[.3, 1.]。
num_attempts (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
allow_no_crop (bool): 是否允许未进行裁剪。默认值为True。
cover_all_box (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
"""
def __init__(self,
aspect_ratio=[.5, 2.],
thresholds=[.0, .1, .3, .5, .7, .9],
scaling=[.3, 1.],
num_attempts=50,
allow_no_crop=True,
cover_all_box=False):
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
self.scaling = scaling
self.num_attempts = num_attempts
self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): 扩裁剪的图像高、宽二者组成的np.ndarray,形状为(2,)。
label_info更新字段为:
- gt_bbox (np.ndarray): 随机裁剪后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 随机裁剪后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): 随机裁剪后每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomCrop! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info:
raise TypeError('Cannot do RandomCrop! ' + \
'Becasuse gt_bbox/gt_class is not in label_info!')
if len(label_info['gt_bbox']) == 0:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
w = image_shape[1]
h = image_shape[0]
gt_bbox = label_info['gt_bbox']
thresholds = list(self.thresholds)
if self.allow_no_crop:
thresholds.append('no_crop')
np.random.shuffle(thresholds)
for thresh in thresholds:
if thresh == 'no_crop':
return (im, im_info, label_info)
found = False
for i in range(self.num_attempts):
scale = np.random.uniform(*self.scaling)
min_ar, max_ar = self.aspect_ratio
aspect_ratio = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(aspect_ratio))
crop_w = int(w * scale * np.sqrt(aspect_ratio))
crop_y = np.random.randint(0, h - crop_h)
crop_x = np.random.randint(0, w - crop_w)
crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
iou = iou_matrix(
gt_bbox, np.array(
[crop_box], dtype=np.float32))
if iou.max() < thresh:
continue
if self.cover_all_box and iou.min() < thresh:
continue
cropped_box, valid_ids = crop_box_with_center_constraint(
gt_bbox, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
if found:
if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
crop_polys = crop_segms(
label_info['gt_poly'],
valid_ids,
np.array(
crop_box, dtype=np.int64),
h,
w)
if [] in crop_polys:
delete_id = list()
valid_polys = list()
for id, crop_poly in enumerate(crop_polys):
if crop_poly == []:
delete_id.append(id)
else:
valid_polys.append(crop_poly)
valid_ids = np.delete(valid_ids, delete_id)
if len(valid_polys) == 0:
return (im, im_info, label_info)
label_info['gt_poly'] = valid_polys
else:
label_info['gt_poly'] = crop_polys
im = crop_image(im, crop_box)
label_info['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
label_info['gt_class'] = np.take(
label_info['gt_class'], valid_ids, axis=0)
im_info['image_shape'] = np.array(
[crop_box[3] - crop_box[1],
crop_box[2] - crop_box[0]]).astype('int32')
if 'gt_score' in label_info:
label_info['gt_score'] = np.take(
label_info['gt_score'], valid_ids, axis=0)
if 'is_crowd' in label_info:
label_info['is_crowd'] = np.take(
label_info['is_crowd'], valid_ids, axis=0)
return (im, im_info, label_info)
return (im, im_info, label_info)
class ArrangeFasterRCNN(DetTransform):
"""获取FasterRCNN模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd),分别对应
图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象;
当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape, gt_bbox, gt_class, is_difficult),
分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像id、图像大小信息、真实标注框、真实标注框对应的类别、
真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape),分别对应图像np.ndarray数据、
图像相当对于原图的resize信息、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_resize_info = im_info['im_resize_info']
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_crowd = label_info['is_crowd']
outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd)
elif self.mode == 'eval':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_id = im_info['im_id']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_difficult = label_info['difficult']
outputs = (im, im_resize_info, im_id, im_shape, gt_bbox, gt_class,
is_difficult)
else:
if im_info is None:
raise TypeError('Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
outputs = (im, im_resize_info, im_shape)
return outputs
class ArrangeMaskRCNN(DetTransform):
"""获取MaskRCNN模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd, gt_masks),分别对应
图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象、
真实分割区域;当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape),分别对应图像np.ndarray数据、
图像相当对于原图的resize信息、图像id、图像大小信息;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape),
分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeTrainMaskRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_resize_info = im_info['im_resize_info']
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_crowd = label_info['is_crowd']
assert 'gt_poly' in label_info
segms = label_info['gt_poly']
if len(segms) != 0:
assert len(segms) == is_crowd.shape[0]
gt_masks = []
valid = True
for i in range(len(segms)):
segm = segms[i]
gt_segm = []
if is_crowd[i]:
gt_segm.append([[0, 0]])
else:
for poly in segm:
if len(poly) == 0:
valid = False
break
gt_segm.append(np.array(poly).reshape(-1, 2))
if (not valid) or len(gt_segm) == 0:
break
gt_masks.append(gt_segm)
outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd,
gt_masks)
else:
if im_info is None:
raise TypeError('Cannot do ArrangeMaskRCNN! ' +
'Becasuse the im_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
if self.mode == 'eval':
im_id = im_info['im_id']
outputs = (im, im_resize_info, im_id, im_shape)
else:
outputs = (im, im_resize_info, im_shape)
return outputs
class ArrangeYOLOv3(DetTransform): class ArrangeYOLOv3(DetTransform):
...@@ -1208,50 +479,9 @@ class ArrangeYOLOv3(DetTransform): ...@@ -1208,50 +479,9 @@ class ArrangeYOLOv3(DetTransform):
""" """
im = permute(im, False) im = permute(im, False)
if self.mode == 'train': if self.mode == 'train':
if im_info is None or label_info is None: pass
raise TypeError(
'Cannot do ArrangeYolov3! ' +
'Becasuse the im_info and label_info can not be None!')
im_shape = im_info['image_shape']
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
if len(label_info['gt_bbox']) != len(label_info['gt_score']):
raise ValueError("gt num mismatch: bbox and score.")
gt_bbox = np.zeros((50, 4), dtype=im.dtype)
gt_class = np.zeros((50, ), dtype=np.int32)
gt_score = np.zeros((50, ), dtype=im.dtype)
gt_num = min(50, len(label_info['gt_bbox']))
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
if -1 not in label_info['gt_class']:
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
# parse [x1, y1, x2, y2] to [x, y, w, h]
gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2]
gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.
outputs = (im, gt_bbox, gt_class, gt_score, im_shape)
elif self.mode == 'eval': elif self.mode == 'eval':
if im_info is None or label_info is None: pass
raise TypeError(
'Cannot do ArrangeYolov3! ' +
'Becasuse the im_info and label_info can not be None!')
im_shape = im_info['image_shape']
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_id = im_info['im_id']
gt_bbox = np.zeros((50, 4), dtype=im.dtype)
gt_class = np.zeros((50, ), dtype=np.int32)
difficult = np.zeros((50, ), dtype=np.int32)
gt_num = min(50, len(label_info['gt_bbox']))
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
difficult[:gt_num] = label_info['difficult'][:gt_num, 0]
outputs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
else: else:
if im_info is None: if im_info is None:
raise TypeError('Cannot do ArrangeYolov3! ' + raise TypeError('Cannot do ArrangeYolov3! ' +
...@@ -1261,51 +491,6 @@ class ArrangeYOLOv3(DetTransform): ...@@ -1261,51 +491,6 @@ class ArrangeYOLOv3(DetTransform):
return outputs return outputs
class ComposedRCNNTransforms(Compose):
""" RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
训练阶段:
1. 随机以0.5的概率将图像水平翻转
2. 图像归一化
3. 图像按比例Resize,scale计算方式如下
scale = min_max_size[0] / short_size_of_image
if max_size_of_image * scale > min_max_size[1]:
scale = min_max_size[1] / max_size_of_image
4. 将3步骤的长宽进行padding,使得长宽为32的倍数
验证阶段:
1. 图像归一化
2. 图像按比例Resize,scale计算方式同上训练阶段
3. 将2步骤的长宽进行padding,使得长宽为32的倍数
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
min_max_size=[800, 1333],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
else:
# 验证/预测时的transforms
transforms = [
Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
super(ComposedRCNNTransforms, self).__init__(transforms)
class ComposedYOLOv3Transforms(Compose): class ComposedYOLOv3Transforms(Compose):
...@@ -1349,13 +534,7 @@ class ComposedYOLOv3Transforms(Compose): ...@@ -1349,13 +534,7 @@ class ComposedYOLOv3Transforms(Compose):
if mode == 'train': if mode == 'train':
# 训练时的transforms,包含数据增强 # 训练时的transforms,包含数据增强
transforms = [ pass
MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
RandomExpand(), RandomCrop(), Resize(
target_size=width,
interp='RANDOM'), RandomHorizontalFlip(), Normalize(
mean=mean, std=std)
]
else: else:
# 验证/预测时的transforms # 验证/预测时的transforms
transforms = [ transforms = [
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import copy
def execute_imgaug(augmenter, im, bboxes=None, polygons=None,
segment_map=None):
# 预处理,将bboxes, polygons转换成imgaug格式
import imgaug.augmentables.kps as kps
import imgaug.augmentables.bbs as bbs
aug_im = im.astype('uint8')
aug_im = augmenter.augment(image=aug_im).astype('float32')
return aug_im
# TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异
# 目前仅支持对原图进行处理,因此只能使用pixlevel的imgaug增强操作
# 以下代码暂不会执行
aug_bboxes = None
if bboxes is not None:
aug_bboxes = list()
for i in range(len(bboxes)):
x1 = bboxes[i, 0]
y1 = bboxes[i, 1]
x2 = bboxes[i, 2]
y2 = bboxes[i, 3]
aug_bboxes.append(bbs.BoundingBox(x1, y1, x2, y2))
aug_points = None
if polygons is not None:
aug_points = list()
for i in range(len(polygons)):
num = len(polygons[i])
for j in range(num):
tmp = np.reshape(polygons[i][j], (-1, 2))
for k in range(len(tmp)):
aug_points.append(kps.Keypoint(tmp[k, 0], tmp[k, 1]))
aug_segment_map = None
if segment_map is not None:
if len(segment_map.shape) == 2:
h, w = segment_map.shape
aug_segment_map = np.reshape(segment_map, (1, h, w, 1))
elif len(segment_map.shape) == 3:
h, w, c = segment_map.shape
aug_segment_map = np.reshape(segment_map, (1, h, w, c))
else:
raise Exception(
"Only support 2-dimensions for 3-dimensions for segment_map")
unnormalized_batch = augmenter.augment(
image=aug_im,
bounding_boxes=aug_bboxes,
keypoints=aug_points,
segmentation_maps=aug_segment_map,
return_batch=True)
aug_im = unnormalized_batch.images_aug[0]
aug_bboxes = unnormalized_batch.bounding_boxes_aug
aug_points = unnormalized_batch.keypoints_aug
aug_seg_map = unnormalized_batch.segmentation_maps_aug
aug_im = aug_im.astype('float32')
if aug_bboxes is not None:
converted_bboxes = list()
for i in range(len(aug_bboxes)):
converted_bboxes.append([
aug_bboxes[i].x1, aug_bboxes[i].y1, aug_bboxes[i].x2,
aug_bboxes[i].y2
])
aug_bboxes = converted_bboxes
aug_polygons = None
if aug_points is not None:
aug_polygons = copy.deepcopy(polygons)
idx = 0
for i in range(len(aug_polygons)):
num = len(aug_polygons[i])
for j in range(num):
num_points = len(aug_polygons[i][j]) // 2
for k in range(num_points):
aug_polygons[i][j][k * 2] = aug_points[idx].x
aug_polygons[i][j][k * 2 + 1] = aug_points[idx].y
idx += 1
result = [aug_im]
if aug_bboxes is not None:
result.append(np.array(aug_bboxes))
if aug_polygons is not None:
result.append(aug_polygons)
if aug_seg_map is not None:
n, h, w, c = aug_seg_map.shape
if len(segment_map.shape) == 2:
aug_seg_map = np.reshape(aug_seg_map, (h, w))
elif len(segment_map.shape) == 3:
aug_seg_map = np.reshape(aug_seg_map, (h, w, c))
result.append(aug_seg_map)
return result
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# coding: utf8 # coding: utf8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,14 +14,12 @@ ...@@ -14,14 +14,12 @@
# limitations under the License. # limitations under the License.
from .ops import * from .ops import *
from .imgaug_support import execute_imgaug
import random import random
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import cv2 import cv2
from collections import OrderedDict from collections import OrderedDict
import utils.logging as logging
class SegTransform: class SegTransform:
...@@ -53,14 +51,7 @@ class Compose(SegTransform): ...@@ -53,14 +51,7 @@ class Compose(SegTransform):
'must be equal or larger than 1!') 'must be equal or larger than 1!')
self.transforms = transforms self.transforms = transforms
self.to_rgb = False self.to_rgb = False
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, SegTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.seg.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
""" """
...@@ -116,7 +107,7 @@ class Compose(SegTransform): ...@@ -116,7 +107,7 @@ class Compose(SegTransform):
transform_names = [type(x).__name__ for x in self.transforms] transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters: for aug in augmenters:
if type(aug).__name__ in transform_names: if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms self.transforms = augmenters + self.transforms
...@@ -828,76 +819,6 @@ class RandomBlur(SegTransform): ...@@ -828,76 +819,6 @@ class RandomBlur(SegTransform):
return (im, im_info, label) return (im, im_info, label)
class RandomRotate(SegTransform):
"""对图像进行随机旋转, 模型训练时的数据增强操作。
在旋转区间[-rotate_range, rotate_range]内,对图像进行随机旋转,当存在标注图像时,同步进行,
并对旋转后的图像和标注图像进行相应的padding。
Args:
rotate_range (float): 最大旋转角度。默认为15度。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认为255。
"""
def __init__(self,
rotate_range=15,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
self.rotate_range = rotate_range
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.rotate_range > 0:
(h, w) = im.shape[:2]
do_rotation = np.random.uniform(-self.rotate_range,
self.rotate_range)
pc = (w // 2, h // 2)
r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
cos = np.abs(r[0, 0])
sin = np.abs(r[0, 1])
nw = int((h * sin) + (w * cos))
nh = int((h * cos) + (w * sin))
(cx, cy) = pc
r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy
dsize = (nw, nh)
im = cv2.warpAffine(
im,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.im_padding_value)
label = cv2.warpAffine(
label,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomScaleAspect(SegTransform): class RandomScaleAspect(SegTransform):
...@@ -1125,11 +1046,7 @@ class ComposedSegTransforms(Compose): ...@@ -1125,11 +1046,7 @@ class ComposedSegTransforms(Compose):
std=[0.5, 0.5, 0.5]): std=[0.5, 0.5, 0.5]):
if mode == 'train': if mode == 'train':
# 训练时的transforms,包含数据增强 # 训练时的transforms,包含数据增强
transforms = [ pass
RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
RandomPaddingCrop(crop_size=train_crop_size), Normalize(
mean=mean, std=std)
]
else: else:
# 验证/预测时的transforms # 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)] transforms = [Normalize(mean=mean, std=std)]
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import sys
import colorama
from colorama import init
init(autoreset=True)
levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
log_level = 2
def log(level=2, message="", use_color=False):
current_time = time.time()
time_array = time.localtime(current_time)
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if log_level >= level:
if use_color:
print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[
level], message).encode("utf-8").decode("latin1"))
else:
print("{} [{}]\t{}".format(current_time, levels[level], message)
.encode("utf-8").decode("latin1"))
sys.stdout.flush()
def debug(message="", use_color=False):
log(level=3, message=message, use_color=use_color)
def info(message="", use_color=False):
log(level=2, message=message, use_color=use_color)
def warning(message="", use_color=True):
log(level=1, message=message, use_color=use_color)
def error(message="", use_color=True, exit=True):
log(level=0, message=message, use_color=use_color)
if exit:
sys.exit(-1)
...@@ -6,11 +6,11 @@ PaddleX支持将训练好的Paddle模型通过OpenVINO实现模型的预测加 ...@@ -6,11 +6,11 @@ PaddleX支持将训练好的Paddle模型通过OpenVINO实现模型的预测加
|硬件平台|Linux|Windows|Raspbian OS|c++|python |分类|检测|分割| |硬件平台|Linux|Windows|Raspbian OS|c++|python |分类|检测|分割|
| ----| ---- | ---- | ----| ---- | ---- |---- | ---- |---- | | ----| ---- | ---- | ----| ---- | ---- |---- | ---- |---- |
|CPU|支持|支持|不支持|支持|支持|支持|支持|支持| |CPU|支持|支持|不支持|支持|支持|支持|支持|支持|
|VPU|支持|支持|支持|支持|支持|支持|不支持|不支持| |VPU|支持|支持|支持|支持|支持|支持|不支持|不支持|
其中Raspbian OS为树莓派操作系统。 **注意**:其中Raspbian OS为树莓派操作系统。检测模型仅支持YOLOV3,由于OpenVINO不支持ONNX的resize-11 OP的原因,目前还不支持Paddle的分割模型
## 部署流程 ## 部署流程
**PaddleX到OpenVINO的部署流程可以分为如下两步** **PaddleX到OpenVINO的部署流程可以分为如下两步**
...@@ -29,4 +29,4 @@ CPU或者VPU的情况下使用OpenVINO进行预测加速 ...@@ -29,4 +29,4 @@ CPU或者VPU的情况下使用OpenVINO进行预测加速
**[Windows](./windows.md)**:介绍了PaddleX在操作系统为Window,编程语言为C++,硬件平台为CPU或者VPU的情况下使用OpenVINO进行预测加速 **[Windows](./windows.md)**:介绍了PaddleX在操作系统为Window,编程语言为C++,硬件平台为CPU或者VPU的情况下使用OpenVINO进行预测加速
**[python](./windows.md)**:介绍了PaddleX在python下使用OpenVINO进行预测加速 **[Python](./python.md)**:介绍了PaddleX在python下使用OpenVINO进行预测加速
\ No newline at end of file \ No newline at end of file
...@@ -63,7 +63,7 @@ ARCH=x86 ...@@ -63,7 +63,7 @@ ARCH=x86
### Step4: 预测 ### Step4: 预测
编译成功后,分类任务的预测可执行程序为`classifier`分割任务的预测可执行程序为`segmenter`检测任务的预测可执行程序为`detector`,其主要命令参数说明如下: 编译成功后,分类任务的预测可执行程序为`classifier`,检测任务的预测可执行程序为`detector`,其主要命令参数说明如下:
| 参数 | 说明 | | 参数 | 说明 |
| ---- | ---- | | ---- | ---- |
...@@ -72,7 +72,7 @@ ARCH=x86 ...@@ -72,7 +72,7 @@ ARCH=x86
| --image_list | 按行存储图片路径的.txt文件 | | --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"| | --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"|
| --cfg_dir | PaddleX model 的.yml配置文件 | | --cfg_dir | PaddleX model 的.yml配置文件 |
| --save_dir | 可视化结果图片保存地址,仅适用于检测和分割任务,默认值为" "既不保存可视化结果 | | --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 |
### 样例 ### 样例
`样例一` `样例一`
...@@ -85,7 +85,7 @@ linux系统在CPU下做单张图片的分类任务预测 ...@@ -85,7 +85,7 @@ linux系统在CPU下做单张图片的分类任务预测
`样例二`: `样例二`:
linux系统在CPU下做多张图片的分割任务预测,并保存预测可视化结果 linux系统在CPU下做多张图片的检测任务预测,并保存预测可视化结果
预测的多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: 预测的多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
``` ```
/path/to/images/test_img1.jpeg /path/to/images/test_img1.jpeg
...@@ -95,7 +95,7 @@ linux系统在CPU下做多张图片的分割任务预测,并保存预测可视 ...@@ -95,7 +95,7 @@ linux系统在CPU下做多张图片的分割任务预测,并保存预测可视
``` ```
```shell ```shell
./build/segmenter --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_dir=/path/to/PadlleX_model.yml --save_dir ./output ./build/detector --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_dir=/path/to/PadlleX_model.yml --save_dir ./output
``` ```
`样例三`: `样例三`:
...@@ -118,9 +118,6 @@ linux系统在CPU下做多张图片的分割任务预测,并保存预测可视 ...@@ -118,9 +118,6 @@ linux系统在CPU下做多张图片的分割任务预测,并保存预测可视
|---|---|---|---| |---|---|---|---|
|resnet-50 | 20.56 | 16.12 | 224*224 | |resnet-50 | 20.56 | 16.12 | 224*224 |
|mobilenet-V2 | 5.16 | 2.31 |224*224| |mobilenet-V2 | 5.16 | 2.31 |224*224|
|hrnet | None | 63.35 |512*512|
|unet | 276.40 | 211.49 |512*512|
|deeplabv3 | None | 25.91 |512*512|
|yolov3-mobilnetv1 |76.63| 46.26|608*608 | |yolov3-mobilnetv1 |76.63| 46.26|608*608 |
`测试二`: `测试二`:
......
...@@ -73,7 +73,7 @@ D: ...@@ -73,7 +73,7 @@ D:
cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
``` ```
* 编译成功后,图片预测demo的入口程序为`detector.exe``classifier.exe``segmenter.exe`用户可根据自己的模型类型选择,其主要命令参数说明如下: * 编译成功后,图片预测demo的入口程序为`detector.exe``classifier.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 | | 参数 | 说明 |
| ---- | ---- | | ---- | ---- |
...@@ -82,7 +82,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release ...@@ -82,7 +82,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
| --image_list | 按行存储图片路径的.txt文件 | | --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"| | --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"|
| --cfg_dir | PaddleX model 的.yml配置文件 | | --cfg_dir | PaddleX model 的.yml配置文件 |
| --save_dir | 可视化结果图片保存地址,仅适用于检测和分割任务,默认值为" "既不保存可视化结果 | | --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 |
### 样例 ### 样例
`样例一` `样例一`
...@@ -94,7 +94,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release ...@@ -94,7 +94,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
``` ```
`样例二`: `样例二`:
在CPU下做多张图片的分割任务预测,并保存预测可视化结果 在CPU下做多张图片的检测任务预测,并保存预测可视化结果
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: 预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
``` ```
/path/to/images/test_img1.jpeg /path/to/images/test_img1.jpeg
...@@ -104,7 +104,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release ...@@ -104,7 +104,7 @@ cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
``` ```
```shell ```shell
./segmenter.exe --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_dir=/path/to/PadlleX_model.yml --save_dir ./output ./detector.exe --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_dir=/path/to/PadlleX_model.yml --save_dir ./output
``` ```
`样例三`: `样例三`:
......
...@@ -23,7 +23,7 @@ sudo apt-get upgrade ...@@ -23,7 +23,7 @@ sudo apt-get upgrade
``` ```
## Paddle-Lite部署 ## Paddle-Lite部署
基于Paddle-Lite的部署目前可以支持PaddleX的分类、分割与检测模型 基于Paddle-Lite的部署目前可以支持PaddleX的分类、分割与检测模型,其实检测模型仅支持YOLOV3
部署的流程包括:PaddleX模型转换与转换后的模型部署 部署的流程包括:PaddleX模型转换与转换后的模型部署
**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html),Paddle-Lite详细资料请参考[Paddle-Lite](https://paddle-lite.readthedocs.io/zh/latest/index.html) **说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html),Paddle-Lite详细资料请参考[Paddle-Lite](https://paddle-lite.readthedocs.io/zh/latest/index.html)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册