diff --git a/docs/apis/transforms/augment.md b/docs/apis/transforms/augment.md index 4ed04ca064cad113a2375dc3375d651572b374c1..f8c66b45d2d853fde57c520b079f9974e5fa4d76 100644 --- a/docs/apis/transforms/augment.md +++ b/docs/apis/transforms/augment.md @@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 | :------- | :------------| | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、
[RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) | |目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) | -|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | +|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotate](seg_transforms.html#randomrotate)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | ## imgaug增强库的支持 diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md index 1708290680e767b3a06615f0b789279b716433ea..d95d8a4d9a35723b0f489fa972ed28fcadd1d211 100755 --- a/docs/apis/transforms/seg_transforms.md +++ b/docs/apis/transforms/seg_transforms.md @@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1) * **prob** (float): 图像模糊概率。默认为0.1。 -## RandomRotation类 +## RandomRotate类 ```python paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255) ``` diff --git a/paddlex/__init__.py b/paddlex/__init__.py index de762df7ad7dc01670e795b93f709bb23a08f1c8..972210bdb80c445e59d4a8ed10418ee988bd353c 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -53,4 +53,4 @@ log_level = 2 from . import interpret -__version__ = '1.0.2.github' +__version__ = '1.0.4' diff --git a/paddlex/cv/datasets/dataset.py b/paddlex/cv/datasets/dataset.py index c3bec8997cb0a04590f4946dc87b0eb8bd3a0c43..3cc8b52c10f41ce17bbe8da7fd5289b96aac409e 100644 --- a/paddlex/cv/datasets/dataset.py +++ b/paddlex/cv/datasets/dataset.py @@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data): padding_batch = [] for data in batch_data: im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) + padding_im = np.zeros( + (im_c, max_shape[1], max_shape[2]), dtype=np.float32) padding_im[:, :im_h, :im_w] = data[0] padding_batch.append((padding_im, ) + data[1:]) return padding_batch @@ -226,8 +226,8 @@ class Dataset: if num_workers == 'auto': import multiprocessing as mp num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 - if platform.platform().startswith( - "Darwin") or platform.platform().startswith("Windows"): + if platform.platform().startswith("Darwin") or platform.platform( + ).startswith("Windows"): parallel_method = 'thread' if transforms is None: raise Exception("transform should be defined.") diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index a253aa5d1d8c005c7903b57a9b9b36da45982d78..3127bd8549ae221f7f7604613bba2e1437b93605 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI): for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) - labels = np.array([d[1] for d in data]) + + _, _, im_h, im_w = images.shape + labels = list() + for d in data: + padding_label = np.zeros( + (1, im_h, im_w)).astype('int64') + self.ignore_index + padding_label[:, :im_h, :im_w] = d[1] + labels.append(padding_label) + labels = np.array(labels) + num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) + outputs = self.exe.run(self.parallel_test_prog, + feed=feed_data, + fetch_list=list(self.test_outputs.values()), + return_numpy=True) pred = outputs[0] if num_samples < batch_size: pred = pred[0:num_samples] @@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI): metrics = OrderedDict( zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], - [miou, category_iou, macc, category_acc, - conf_mat.kappa()])) + [miou, category_iou, macc, category_acc, conf_mat.kappa()])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() @@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI): transforms=self.test_transforms, mode='test') im, im_info = self.test_transforms(im_file) im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) + result = self.exe.run(self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values())) pred = result[0] pred = np.squeeze(pred).astype('uint8') logit = result[1] @@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI): pred = pred[0:h, 0:w] logit = logit[0:h, 0:w, :] else: - raise Exception("Unexpected info '{}' in im_info".format( - info[0])) + raise Exception("Unexpected info '{}' in im_info".format(info[ + 0])) return {'label_map': pred, 'score_map': logit} diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py index 08dad240c0b28d6e6e13845dcc0c9148c442014f..60a34d0128bf271d42fa8658100318ce05e31812 100644 --- a/paddlex/cv/nets/segmentation/deeplabv3p.py +++ b/paddlex/cv/nets/segmentation/deeplabv3p.py @@ -135,7 +135,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('encoder'): channel = 256 with scope("image_pool"): @@ -151,8 +152,8 @@ class DeepLabv3p(object): padding=0, param_attr=param_attr)) input_shape = fluid.layers.shape(input) - image_avg = fluid.layers.resize_bilinear( - image_avg, input_shape[2:]) + image_avg = fluid.layers.resize_bilinear(image_avg, + input_shape[2:]) with scope("aspp0"): aspp0 = bn_relu( @@ -244,7 +245,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('decoder'): with scope('concat'): decode_shortcut = bn_relu( @@ -326,9 +328,6 @@ class DeepLabv3p(object): if self.mode == 'train': inputs['label'] = fluid.data( dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') return inputs def build_net(self, inputs): @@ -351,7 +350,8 @@ class DeepLabv3p(object): name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.01)) with scope('logit'): with fluid.name_scope('last_conv'): logit = conv( diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py index bcb8e6d38de9becacce4d80e2ff54588c15352f4..6dc4ea7b95d876ae896c77395ab155bec1727a8a 100644 --- a/paddlex/cv/transforms/cls_transforms.py +++ b/paddlex/cv/transforms/cls_transforms.py @@ -92,6 +92,12 @@ class Compose(ClsTransform): outputs = (im, label) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class RandomCrop(ClsTransform): """对图像进行随机剪裁,模型训练时的数据增强操作。 @@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform): else: outputs = (im, ) return outputs + + +class ComposedClsTransforms(Compose): + """ 分类模型的基础Transforms流程,具体如下 + 训练阶段: + 1. 随机从图像中crop一块子图,并resize成crop_size大小 + 2. 将1的输出按0.5的概率随机进行水平翻转 + 3. 将图像进行归一化 + 验证/预测阶段: + 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 + 2. 从图像中心crop出一个大小为crop_size的图像 + 3. 将图像进行归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + crop_size(int|list): 输入模型里的图像大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + crop_size=[224, 224], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = crop_size + if isinstance(crop_size, list): + if crop_size[0] != crop_size[1]: + raise Exception( + "In classifier model, width and height should be equal, please modify your parameter `crop_size`" + ) + width = crop_size[0] + if width % 32 != 0: + raise Exception( + "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`" + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5), + Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + ResizeByShort(short_size=int(width * 1.14)), + CenterCrop(crop_size=width), Normalize( + mean=mean, std=std) + ] + + super(ComposedClsTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index ca892d75bc7913cd1e238bb96e82e76f9a4fd716..19db33173b87b7cc20b87054cfbc1241176abc58 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -152,6 +152,12 @@ class Compose(DetTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class ResizeByShort(DetTransform): """根据图像的短边调整图像大小(resize)。 @@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform): im_shape = im_info['image_shape'] outputs = (im, im_shape) return outputs + + +class ComposedRCNNTransforms(Compose): + """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下, + 训练阶段: + 1. 随机以0.5的概率将图像水平翻转 + 2. 图像归一化 + 3. 图像按比例Resize,scale计算方式如下 + scale = min_max_size[0] / short_size_of_image + if max_size_of_image * scale > min_max_size[1]: + scale = min_max_size[1] / max_size_of_image + 4. 将3步骤的长宽进行padding,使得长宽为32的倍数 + 验证阶段: + 1. 图像归一化 + 2. 图像按比例Resize,scale计算方式同上训练阶段 + 3. 将2步骤的长宽进行padding,使得长宽为32的倍数 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + min_max_size(list): 图像在缩放时,最小边和最大边的约束条件 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + min_max_size=[800, 1333], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + else: + # 验证/预测时的transforms + transforms = [ + Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + + super(ComposedRCNNTransforms, self).__init__(transforms) + + +class ComposedYOLOTransforms(Compose): + """YOLOv3模型的图像预处理流程,具体如下, + 训练阶段: + 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage + 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 + 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand + 4. 随机裁剪图像 + 5. 将4步骤的输出图像Resize成shape参数的大小 + 6. 随机0.5的概率水平翻转图像 + 7. 图像归一化 + 验证/预测阶段: + 1. 将图像Resize成shape参数大小 + 2. 图像归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + shape=[608, 608], + mixup_epoch=250, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = shape + if isinstance(shape, list): + if shape[0] != shape[1]: + raise Exception( + "In YOLOv3 model, width and height should be equal") + width = shape[0] + if width % 32 != 0: + raise Exception( + "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...." + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + MixupImage(mixup_epoch=mixup_epoch), RandomDistort(), + RandomExpand(), RandomCrop(), Resize( + target_size=width, + interp='RANDOM'), RandomHorizontalFlip(), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + Resize( + target_size=width, interp='CUBIC'), Normalize( + mean=mean, std=std) + ] + super(ComposedYOLOTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py index e562ba2601677085fdef23c57a6779ba77143f8d..d3c67648d500d915315c5607cfc5c2f5538a9090 100644 --- a/paddlex/cv/transforms/seg_transforms.py +++ b/paddlex/cv/transforms/seg_transforms.py @@ -108,6 +108,12 @@ class Compose(SegTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class RandomHorizontalFlip(SegTransform): """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 @@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform): return (im, im_info) else: return (im, ) + + +class ComposedSegTransforms(Compose): + """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下 + 训练阶段: + 1. 随机对图像以0.5的概率水平翻转 + 2. 按不同的比例随机Resize原图 + 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 + 4. 图像归一化 + 预测阶段: + 1. 图像归一化 + + Args: + mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + train_crop_size=[769, 769], + mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), ResizeStepScaling(), + RandomPaddingCrop(crop_size=train_crop_size), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [Resize(512), Normalize(mean=mean, std=std)] + + super(ComposedSegTransforms, self).__init__(transforms) diff --git a/paddlex/deploy.py b/paddlex/deploy.py index bb2618c1d844836a4884d93218f7d67434103b8e..0aee491ecdda1609b8827f94d0412a26bf053650 100644 --- a/paddlex/deploy.py +++ b/paddlex/deploy.py @@ -97,8 +97,6 @@ class Predictor: config.disable_glog_info() if memory_optimize: config.enable_memory_optim() - else: - config.diable_memory_optim() # 开启计算图分析优化,包括OP融合等 config.switch_ir_optim(True) diff --git a/setup.py b/setup.py index a044495c902f6b754a69265c5020d7dbda992b14..bba199719ce65075f8a61b965bca49f026406c91 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit setuptools.setup( name="paddlex", - version='1.0.2', + version='1.0.4', author="paddlex", author_email="paddlex@baidu.com", description=long_description,