未验证 提交 c2548389 编写于 作者: J Jason 提交者: GitHub

Merge pull request #106 from PaddlePaddle/develop_jason

Develop jason
...@@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 ...@@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
| :------- | :------------| | :------- | :------------|
| 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)[RandomVerticalFlip](cls_transforms.html#randomverticalflip)<br> [RandomRotate](cls_transforms.html#randomratate)[RandomDistort](cls_transforms.html#randomdistort) | | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)[RandomVerticalFlip](cls_transforms.html#randomverticalflip)<br> [RandomRotate](cls_transforms.html#randomratate)[RandomDistort](cls_transforms.html#randomdistort) |
|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)[RandomExpand](det_transforms.html#randomexpand) | |目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)[RandomExpand](det_transforms.html#randomexpand) |
|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotation](seg_transforms.html#randomrotation)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) | |语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotate](seg_transforms.html#randomrotate)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) |
## imgaug增强库的支持 ## imgaug增强库的支持
......
...@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1) ...@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
* **prob** (float): 图像模糊概率。默认为0.1。 * **prob** (float): 图像模糊概率。默认为0.1。
## RandomRotation ## RandomRotate
```python ```python
paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255) paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
``` ```
......
...@@ -53,4 +53,4 @@ log_level = 2 ...@@ -53,4 +53,4 @@ log_level = 2
from . import interpret from . import interpret
__version__ = '1.0.2.github' __version__ = '1.0.4'
...@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data): ...@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data):
padding_batch = [] padding_batch = []
for data in batch_data: for data in batch_data:
im_c, im_h, im_w = data[0].shape[:] im_c, im_h, im_w = data[0].shape[:]
padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), padding_im = np.zeros(
dtype=np.float32) (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = data[0] padding_im[:, :im_h, :im_w] = data[0]
padding_batch.append((padding_im, ) + data[1:]) padding_batch.append((padding_im, ) + data[1:])
return padding_batch return padding_batch
...@@ -226,8 +226,8 @@ class Dataset: ...@@ -226,8 +226,8 @@ class Dataset:
if num_workers == 'auto': if num_workers == 'auto':
import multiprocessing as mp import multiprocessing as mp
num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
if platform.platform().startswith( if platform.platform().startswith("Darwin") or platform.platform(
"Darwin") or platform.platform().startswith("Windows"): ).startswith("Windows"):
parallel_method = 'thread' parallel_method = 'thread'
if transforms is None: if transforms is None:
raise Exception("transform should be defined.") raise Exception("transform should be defined.")
......
...@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI): ...@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI):
if mode == 'train': if mode == 'train':
self.optimizer.minimize(model_out) self.optimizer.minimize(model_out)
outputs['loss'] = model_out outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else: else:
outputs['pred'] = model_out[0] outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1] outputs['logit'] = model_out[1]
...@@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI): ...@@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI):
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]) images = np.array([d[0] for d in data])
labels = np.array([d[1] for d in data])
_, _, im_h, im_w = images.shape
labels = list()
for d in data:
padding_label = np.zeros(
(1, im_h, im_w)).astype('int64') + self.ignore_index
padding_label[:, :im_h, :im_w] = d[1]
labels.append(padding_label)
labels = np.array(labels)
num_samples = images.shape[0] num_samples = images.shape[0]
if num_samples < batch_size: if num_samples < batch_size:
num_pad_samples = batch_size - num_samples num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images]) images = np.concatenate([images, pad_images])
feed_data = {'image': images} feed_data = {'image': images}
outputs = self.exe.run( outputs = self.exe.run(self.parallel_test_prog,
self.parallel_test_prog, feed=feed_data,
feed=feed_data, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=True)
return_numpy=True)
pred = outputs[0] pred = outputs[0]
if num_samples < batch_size: if num_samples < batch_size:
pred = pred[0:num_samples] pred = pred[0:num_samples]
...@@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI): ...@@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI):
metrics = OrderedDict( metrics = OrderedDict(
zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'],
[miou, category_iou, macc, category_acc, [miou, category_iou, macc, category_acc, conf_mat.kappa()]))
conf_mat.kappa()]))
if return_details: if return_details:
eval_details = { eval_details = {
'confusion_matrix': conf_mat.confusion_matrix.tolist() 'confusion_matrix': conf_mat.confusion_matrix.tolist()
...@@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI): ...@@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI):
transforms=self.test_transforms, mode='test') transforms=self.test_transforms, mode='test')
im, im_info = self.test_transforms(im_file) im, im_info = self.test_transforms(im_file)
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
result = self.exe.run( result = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im},
feed={'image': im}, fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()))
pred = result[0] pred = result[0]
pred = np.squeeze(pred).astype('uint8') pred = np.squeeze(pred).astype('uint8')
logit = result[1] logit = result[1]
...@@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI): ...@@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI):
pred = pred[0:h, 0:w] pred = pred[0:h, 0:w]
logit = logit[0:h, 0:w, :] logit = logit[0:h, 0:w, :]
else: else:
raise Exception("Unexpected info '{}' in im_info".format( raise Exception("Unexpected info '{}' in im_info".format(info[
info[0])) 0]))
return {'label_map': pred, 'score_map': logit} return {'label_map': pred, 'score_map': logit}
...@@ -135,7 +135,8 @@ class DeepLabv3p(object): ...@@ -135,7 +135,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
name=name_scope + 'weights', name=name_scope + 'weights',
regularizer=None, regularizer=None,
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.06))
with scope('encoder'): with scope('encoder'):
channel = 256 channel = 256
with scope("image_pool"): with scope("image_pool"):
...@@ -151,8 +152,8 @@ class DeepLabv3p(object): ...@@ -151,8 +152,8 @@ class DeepLabv3p(object):
padding=0, padding=0,
param_attr=param_attr)) param_attr=param_attr))
input_shape = fluid.layers.shape(input) input_shape = fluid.layers.shape(input)
image_avg = fluid.layers.resize_bilinear( image_avg = fluid.layers.resize_bilinear(image_avg,
image_avg, input_shape[2:]) input_shape[2:])
with scope("aspp0"): with scope("aspp0"):
aspp0 = bn_relu( aspp0 = bn_relu(
...@@ -244,7 +245,8 @@ class DeepLabv3p(object): ...@@ -244,7 +245,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
name=name_scope + 'weights', name=name_scope + 'weights',
regularizer=None, regularizer=None,
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.06))
with scope('decoder'): with scope('decoder'):
with scope('concat'): with scope('concat'):
decode_shortcut = bn_relu( decode_shortcut = bn_relu(
...@@ -326,9 +328,6 @@ class DeepLabv3p(object): ...@@ -326,9 +328,6 @@ class DeepLabv3p(object):
if self.mode == 'train': if self.mode == 'train':
inputs['label'] = fluid.data( inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label') dtype='int32', shape=[None, 1, None, None], name='label')
elif self.mode == 'eval':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
return inputs return inputs
def build_net(self, inputs): def build_net(self, inputs):
...@@ -351,7 +350,8 @@ class DeepLabv3p(object): ...@@ -351,7 +350,8 @@ class DeepLabv3p(object):
name=name_scope + 'weights', name=name_scope + 'weights',
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0), regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.01))
with scope('logit'): with scope('logit'):
with fluid.name_scope('last_conv'): with fluid.name_scope('last_conv'):
logit = conv( logit = conv(
......
...@@ -92,6 +92,12 @@ class Compose(ClsTransform): ...@@ -92,6 +92,12 @@ class Compose(ClsTransform):
outputs = (im, label) outputs = (im, label)
return outputs return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms
class RandomCrop(ClsTransform): class RandomCrop(ClsTransform):
"""对图像进行随机剪裁,模型训练时的数据增强操作。 """对图像进行随机剪裁,模型训练时的数据增强操作。
...@@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform): ...@@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform):
else: else:
outputs = (im, ) outputs = (im, )
return outputs return outputs
class ComposedClsTransforms(Compose):
""" 分类模型的基础Transforms流程,具体如下
训练阶段:
1. 随机从图像中crop一块子图,并resize成crop_size大小
2. 将1的输出按0.5的概率随机进行水平翻转
3. 将图像进行归一化
验证/预测阶段:
1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
2. 从图像中心crop出一个大小为crop_size的图像
3. 将图像进行归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
raise Exception(
"In classifier model, width and height should be equal, please modify your parameter `crop_size`"
)
width = crop_size[0]
if width % 32 != 0:
raise Exception(
"In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
ResizeByShort(short_size=int(width * 1.14)),
CenterCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
super(ComposedClsTransforms, self).__init__(transforms)
...@@ -152,6 +152,12 @@ class Compose(DetTransform): ...@@ -152,6 +152,12 @@ class Compose(DetTransform):
outputs = (im, im_info) outputs = (im, im_info)
return outputs return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms
class ResizeByShort(DetTransform): class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。 """根据图像的短边调整图像大小(resize)。
...@@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform): ...@@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform):
im_shape = im_info['image_shape'] im_shape = im_info['image_shape']
outputs = (im, im_shape) outputs = (im, im_shape)
return outputs return outputs
class ComposedRCNNTransforms(Compose):
""" RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
训练阶段:
1. 随机以0.5的概率将图像水平翻转
2. 图像归一化
3. 图像按比例Resize,scale计算方式如下
scale = min_max_size[0] / short_size_of_image
if max_size_of_image * scale > min_max_size[1]:
scale = min_max_size[1] / max_size_of_image
4. 将3步骤的长宽进行padding,使得长宽为32的倍数
验证阶段:
1. 图像归一化
2. 图像按比例Resize,scale计算方式同上训练阶段
3. 将2步骤的长宽进行padding,使得长宽为32的倍数
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
min_max_size=[800, 1333],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
else:
# 验证/预测时的transforms
transforms = [
Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
super(ComposedRCNNTransforms, self).__init__(transforms)
class ComposedYOLOTransforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下,
训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
4. 随机裁剪图像
5. 将4步骤的输出图像Resize成shape参数的大小
6. 随机0.5的概率水平翻转图像
7. 图像归一化
验证/预测阶段:
1. 将图像Resize成shape参数大小
2. 图像归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
raise Exception(
"In YOLOv3 model, width and height should be equal")
width = shape[0]
if width % 32 != 0:
raise Exception(
"In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
RandomExpand(), RandomCrop(), Resize(
target_size=width,
interp='RANDOM'), RandomHorizontalFlip(), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
Resize(
target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std)
]
super(ComposedYOLOTransforms, self).__init__(transforms)
...@@ -108,6 +108,12 @@ class Compose(SegTransform): ...@@ -108,6 +108,12 @@ class Compose(SegTransform):
outputs = (im, im_info) outputs = (im, im_info)
return outputs return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms
class RandomHorizontalFlip(SegTransform): class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
...@@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform): ...@@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform):
return (im, im_info) return (im, im_info)
else: else:
return (im, ) return (im, )
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
1. 随机对图像以0.5的概率水平翻转
2. 按不同的比例随机Resize原图
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
预测阶段:
1. 图像归一化
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
RandomPaddingCrop(crop_size=train_crop_size), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [Resize(512), Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms)
...@@ -97,8 +97,6 @@ class Predictor: ...@@ -97,8 +97,6 @@ class Predictor:
config.disable_glog_info() config.disable_glog_info()
if memory_optimize: if memory_optimize:
config.enable_memory_optim() config.enable_memory_optim()
else:
config.diable_memory_optim()
# 开启计算图分析优化,包括OP融合等 # 开启计算图分析优化,包括OP融合等
config.switch_ir_optim(True) config.switch_ir_optim(True)
......
...@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit ...@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
setuptools.setup( setuptools.setup(
name="paddlex", name="paddlex",
version='1.0.2', version='1.0.4',
author="paddlex", author="paddlex",
author_email="paddlex@baidu.com", author_email="paddlex@baidu.com",
description=long_description, description=long_description,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册