diff --git a/docs/apis/transforms/augment.md b/docs/apis/transforms/augment.md
index 4ed04ca064cad113a2375dc3375d651572b374c1..f8c66b45d2d853fde57c520b079f9974e5fa4d76 100644
--- a/docs/apis/transforms/augment.md
+++ b/docs/apis/transforms/augment.md
@@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
| :------- | :------------|
| 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、
[RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) |
|目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) |
-|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) |
+|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotate](seg_transforms.html#randomrotate)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) |
## imgaug增强库的支持
diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md
index 1708290680e767b3a06615f0b789279b716433ea..d95d8a4d9a35723b0f489fa972ed28fcadd1d211 100755
--- a/docs/apis/transforms/seg_transforms.md
+++ b/docs/apis/transforms/seg_transforms.md
@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
* **prob** (float): 图像模糊概率。默认为0.1。
-## RandomRotation类
+## RandomRotate类
```python
paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
```
diff --git a/paddlex/__init__.py b/paddlex/__init__.py
index de762df7ad7dc01670e795b93f709bb23a08f1c8..972210bdb80c445e59d4a8ed10418ee988bd353c 100644
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -53,4 +53,4 @@ log_level = 2
from . import interpret
-__version__ = '1.0.2.github'
+__version__ = '1.0.4'
diff --git a/paddlex/cv/datasets/dataset.py b/paddlex/cv/datasets/dataset.py
index c3bec8997cb0a04590f4946dc87b0eb8bd3a0c43..3cc8b52c10f41ce17bbe8da7fd5289b96aac409e 100644
--- a/paddlex/cv/datasets/dataset.py
+++ b/paddlex/cv/datasets/dataset.py
@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data):
padding_batch = []
for data in batch_data:
im_c, im_h, im_w = data[0].shape[:]
- padding_im = np.zeros((im_c, max_shape[1], max_shape[2]),
- dtype=np.float32)
+ padding_im = np.zeros(
+ (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = data[0]
padding_batch.append((padding_im, ) + data[1:])
return padding_batch
@@ -226,8 +226,8 @@ class Dataset:
if num_workers == 'auto':
import multiprocessing as mp
num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
- if platform.platform().startswith(
- "Darwin") or platform.platform().startswith("Windows"):
+ if platform.platform().startswith("Darwin") or platform.platform(
+ ).startswith("Windows"):
parallel_method = 'thread'
if transforms is None:
raise Exception("transform should be defined.")
diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py
index a253aa5d1d8c005c7903b57a9b9b36da45982d78..3127bd8549ae221f7f7604613bba2e1437b93605 100644
--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI):
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
- elif mode == 'eval':
- outputs['loss'] = model_out[0]
- outputs['pred'] = model_out[1]
- outputs['label'] = model_out[2]
- outputs['mask'] = model_out[3]
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
@@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI):
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data])
- labels = np.array([d[1] for d in data])
+
+ _, _, im_h, im_w = images.shape
+ labels = list()
+ for d in data:
+ padding_label = np.zeros(
+ (1, im_h, im_w)).astype('int64') + self.ignore_index
+ padding_label[:, :im_h, :im_w] = d[1]
+ labels.append(padding_label)
+ labels = np.array(labels)
+
num_samples = images.shape[0]
if num_samples < batch_size:
num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images])
feed_data = {'image': images}
- outputs = self.exe.run(
- self.parallel_test_prog,
- feed=feed_data,
- fetch_list=list(self.test_outputs.values()),
- return_numpy=True)
+ outputs = self.exe.run(self.parallel_test_prog,
+ feed=feed_data,
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=True)
pred = outputs[0]
if num_samples < batch_size:
pred = pred[0:num_samples]
@@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI):
metrics = OrderedDict(
zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'],
- [miou, category_iou, macc, category_acc,
- conf_mat.kappa()]))
+ [miou, category_iou, macc, category_acc, conf_mat.kappa()]))
if return_details:
eval_details = {
'confusion_matrix': conf_mat.confusion_matrix.tolist()
@@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI):
transforms=self.test_transforms, mode='test')
im, im_info = self.test_transforms(im_file)
im = np.expand_dims(im, axis=0)
- result = self.exe.run(
- self.test_prog,
- feed={'image': im},
- fetch_list=list(self.test_outputs.values()))
+ result = self.exe.run(self.test_prog,
+ feed={'image': im},
+ fetch_list=list(self.test_outputs.values()))
pred = result[0]
pred = np.squeeze(pred).astype('uint8')
logit = result[1]
@@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI):
pred = pred[0:h, 0:w]
logit = logit[0:h, 0:w, :]
else:
- raise Exception("Unexpected info '{}' in im_info".format(
- info[0]))
+ raise Exception("Unexpected info '{}' in im_info".format(info[
+ 0]))
return {'label_map': pred, 'score_map': logit}
diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py
index 08dad240c0b28d6e6e13845dcc0c9148c442014f..60a34d0128bf271d42fa8658100318ce05e31812 100644
--- a/paddlex/cv/nets/segmentation/deeplabv3p.py
+++ b/paddlex/cv/nets/segmentation/deeplabv3p.py
@@ -135,7 +135,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=None,
- initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
+ initializer=fluid.initializer.TruncatedNormal(
+ loc=0.0, scale=0.06))
with scope('encoder'):
channel = 256
with scope("image_pool"):
@@ -151,8 +152,8 @@ class DeepLabv3p(object):
padding=0,
param_attr=param_attr))
input_shape = fluid.layers.shape(input)
- image_avg = fluid.layers.resize_bilinear(
- image_avg, input_shape[2:])
+ image_avg = fluid.layers.resize_bilinear(image_avg,
+ input_shape[2:])
with scope("aspp0"):
aspp0 = bn_relu(
@@ -244,7 +245,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=None,
- initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
+ initializer=fluid.initializer.TruncatedNormal(
+ loc=0.0, scale=0.06))
with scope('decoder'):
with scope('concat'):
decode_shortcut = bn_relu(
@@ -326,9 +328,6 @@ class DeepLabv3p(object):
if self.mode == 'train':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
- elif self.mode == 'eval':
- inputs['label'] = fluid.data(
- dtype='int32', shape=[None, 1, None, None], name='label')
return inputs
def build_net(self, inputs):
@@ -351,7 +350,8 @@ class DeepLabv3p(object):
name=name_scope + 'weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
- initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
+ initializer=fluid.initializer.TruncatedNormal(
+ loc=0.0, scale=0.01))
with scope('logit'):
with fluid.name_scope('last_conv'):
logit = conv(
diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py
index bcb8e6d38de9becacce4d80e2ff54588c15352f4..6dc4ea7b95d876ae896c77395ab155bec1727a8a 100644
--- a/paddlex/cv/transforms/cls_transforms.py
+++ b/paddlex/cv/transforms/cls_transforms.py
@@ -92,6 +92,12 @@ class Compose(ClsTransform):
outputs = (im, label)
return outputs
+ def add_augmenters(self, augmenters):
+ if not isinstance(augmenters, list):
+ raise Exception(
+ "augmenters should be list type in func add_augmenters()")
+ self.transforms = augmenters + self.transforms.transforms
+
class RandomCrop(ClsTransform):
"""对图像进行随机剪裁,模型训练时的数据增强操作。
@@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform):
else:
outputs = (im, )
return outputs
+
+
+class ComposedClsTransforms(Compose):
+ """ 分类模型的基础Transforms流程,具体如下
+ 训练阶段:
+ 1. 随机从图像中crop一块子图,并resize成crop_size大小
+ 2. 将1的输出按0.5的概率随机进行水平翻转
+ 3. 将图像进行归一化
+ 验证/预测阶段:
+ 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
+ 2. 从图像中心crop出一个大小为crop_size的图像
+ 3. 将图像进行归一化
+
+ Args:
+ mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+ crop_size(int|list): 输入模型里的图像大小
+ mean(list): 图像均值
+ std(list): 图像方差
+ """
+
+ def __init__(self,
+ mode,
+ crop_size=[224, 224],
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]):
+ width = crop_size
+ if isinstance(crop_size, list):
+ if crop_size[0] != crop_size[1]:
+ raise Exception(
+ "In classifier model, width and height should be equal, please modify your parameter `crop_size`"
+ )
+ width = crop_size[0]
+ if width % 32 != 0:
+ raise Exception(
+ "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
+ )
+
+ if mode == 'train':
+ # 训练时的transforms,包含数据增强
+ transforms = [
+ RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
+ Normalize(
+ mean=mean, std=std)
+ ]
+ else:
+ # 验证/预测时的transforms
+ transforms = [
+ ResizeByShort(short_size=int(width * 1.14)),
+ CenterCrop(crop_size=width), Normalize(
+ mean=mean, std=std)
+ ]
+
+ super(ComposedClsTransforms, self).__init__(transforms)
diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py
index ca892d75bc7913cd1e238bb96e82e76f9a4fd716..19db33173b87b7cc20b87054cfbc1241176abc58 100644
--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
@@ -152,6 +152,12 @@ class Compose(DetTransform):
outputs = (im, im_info)
return outputs
+ def add_augmenters(self, augmenters):
+ if not isinstance(augmenters, list):
+ raise Exception(
+ "augmenters should be list type in func add_augmenters()")
+ self.transforms = augmenters + self.transforms.transforms
+
class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。
@@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform):
im_shape = im_info['image_shape']
outputs = (im, im_shape)
return outputs
+
+
+class ComposedRCNNTransforms(Compose):
+ """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
+ 训练阶段:
+ 1. 随机以0.5的概率将图像水平翻转
+ 2. 图像归一化
+ 3. 图像按比例Resize,scale计算方式如下
+ scale = min_max_size[0] / short_size_of_image
+ if max_size_of_image * scale > min_max_size[1]:
+ scale = min_max_size[1] / max_size_of_image
+ 4. 将3步骤的长宽进行padding,使得长宽为32的倍数
+ 验证阶段:
+ 1. 图像归一化
+ 2. 图像按比例Resize,scale计算方式同上训练阶段
+ 3. 将2步骤的长宽进行padding,使得长宽为32的倍数
+
+ Args:
+ mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+ min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
+ mean(list): 图像均值
+ std(list): 图像方差
+ """
+
+ def __init__(self,
+ mode,
+ min_max_size=[800, 1333],
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]):
+ if mode == 'train':
+ # 训练时的transforms,包含数据增强
+ transforms = [
+ RandomHorizontalFlip(prob=0.5), Normalize(
+ mean=mean, std=std), ResizeByShort(
+ short_size=min_max_size[0], max_size=min_max_size[1]),
+ Padding(coarsest_stride=32)
+ ]
+ else:
+ # 验证/预测时的transforms
+ transforms = [
+ Normalize(
+ mean=mean, std=std), ResizeByShort(
+ short_size=min_max_size[0], max_size=min_max_size[1]),
+ Padding(coarsest_stride=32)
+ ]
+
+ super(ComposedRCNNTransforms, self).__init__(transforms)
+
+
+class ComposedYOLOTransforms(Compose):
+ """YOLOv3模型的图像预处理流程,具体如下,
+ 训练阶段:
+ 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
+ 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
+ 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
+ 4. 随机裁剪图像
+ 5. 将4步骤的输出图像Resize成shape参数的大小
+ 6. 随机0.5的概率水平翻转图像
+ 7. 图像归一化
+ 验证/预测阶段:
+ 1. 将图像Resize成shape参数大小
+ 2. 图像归一化
+
+ Args:
+ mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+ shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
+ mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
+ mean(list): 图像均值
+ std(list): 图像方差
+ """
+
+ def __init__(self,
+ mode,
+ shape=[608, 608],
+ mixup_epoch=250,
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]):
+ width = shape
+ if isinstance(shape, list):
+ if shape[0] != shape[1]:
+ raise Exception(
+ "In YOLOv3 model, width and height should be equal")
+ width = shape[0]
+ if width % 32 != 0:
+ raise Exception(
+ "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
+ )
+
+ if mode == 'train':
+ # 训练时的transforms,包含数据增强
+ transforms = [
+ MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
+ RandomExpand(), RandomCrop(), Resize(
+ target_size=width,
+ interp='RANDOM'), RandomHorizontalFlip(), Normalize(
+ mean=mean, std=std)
+ ]
+ else:
+ # 验证/预测时的transforms
+ transforms = [
+ Resize(
+ target_size=width, interp='CUBIC'), Normalize(
+ mean=mean, std=std)
+ ]
+ super(ComposedYOLOTransforms, self).__init__(transforms)
diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py
index e562ba2601677085fdef23c57a6779ba77143f8d..d3c67648d500d915315c5607cfc5c2f5538a9090 100644
--- a/paddlex/cv/transforms/seg_transforms.py
+++ b/paddlex/cv/transforms/seg_transforms.py
@@ -108,6 +108,12 @@ class Compose(SegTransform):
outputs = (im, im_info)
return outputs
+ def add_augmenters(self, augmenters):
+ if not isinstance(augmenters, list):
+ raise Exception(
+ "augmenters should be list type in func add_augmenters()")
+ self.transforms = augmenters + self.transforms.transforms
+
class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
@@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform):
return (im, im_info)
else:
return (im, )
+
+
+class ComposedSegTransforms(Compose):
+ """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
+ 训练阶段:
+ 1. 随机对图像以0.5的概率水平翻转
+ 2. 按不同的比例随机Resize原图
+ 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
+ 4. 图像归一化
+ 预测阶段:
+ 1. 图像归一化
+
+ Args:
+ mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+ train_crop_size(list): 模型训练阶段,随机从原图crop的大小
+ mean(list): 图像均值
+ std(list): 图像方差
+ """
+
+ def __init__(self,
+ mode,
+ train_crop_size=[769, 769],
+ mean=[0.5, 0.5, 0.5],
+ std=[0.5, 0.5, 0.5]):
+ if mode == 'train':
+ # 训练时的transforms,包含数据增强
+ transforms = [
+ RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
+ RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+ mean=mean, std=std)
+ ]
+ else:
+ # 验证/预测时的transforms
+ transforms = [Resize(512), Normalize(mean=mean, std=std)]
+
+ super(ComposedSegTransforms, self).__init__(transforms)
diff --git a/paddlex/deploy.py b/paddlex/deploy.py
index bb2618c1d844836a4884d93218f7d67434103b8e..0aee491ecdda1609b8827f94d0412a26bf053650 100644
--- a/paddlex/deploy.py
+++ b/paddlex/deploy.py
@@ -97,8 +97,6 @@ class Predictor:
config.disable_glog_info()
if memory_optimize:
config.enable_memory_optim()
- else:
- config.diable_memory_optim()
# 开启计算图分析优化,包括OP融合等
config.switch_ir_optim(True)
diff --git a/setup.py b/setup.py
index a044495c902f6b754a69265c5020d7dbda992b14..bba199719ce65075f8a61b965bca49f026406c91 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
setuptools.setup(
name="paddlex",
- version='1.0.2',
+ version='1.0.4',
author="paddlex",
author_email="paddlex@baidu.com",
description=long_description,