diff --git a/demo/colorization/README.md b/demo/colorization/README.md new file mode 100644 index 0000000000000000000000000000000000000000..77555dbd5fb536221d8a74d0e4d1d7ad43bb9659 --- /dev/null +++ b/demo/colorization/README.md @@ -0,0 +1,106 @@ +# PaddleHub 图像着色 + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用user_guided_colorization模型对[Canvas](../../docs/reference/dataset.md#class-hubdatasetsCanvas)等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +import paddlehub.vision.transforms as T + +transform = T.Compose([T.Resize((176, 176), interpolation='NEAREST'), + T.RGB2LAB()], to_rgb=True) +``` + +`transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +**NOTE:** 要将`T.Compose`中`to_rgb`设定为True. + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import Canvas + +color_set = Canvas(transform=transform, mode='train') + +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test` 默认为`train`。 + +数据集的准备代码可以参考 [canvas.py](../../paddlehub/datasets/canvas.py)。`hub.datasets.Canvas()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='user_guided_colorization', classification=True, prob=1, num_point=None, load_checkpoint=None) +``` +* `name`: 选择预训练模型的名字。 +* `classification`: 着色任务分为两阶段训练,第一阶段`classification`设定为True, 用于不加着色块网络的训练,第二阶段`classification`设定为False,用于输入图像加入着色块的训练。 +* `prob`: 不加着色块的概率,默认值为1,即不加着色块 +* `num_point`: 着色块的数量,默认为None. +* `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='img_colorization_ckpt') +trainer.train(color_set, epochs=201, batch_size=25, eval_dataset=color_set, log_interval=10, save_interval=10) +``` + +#### 优化策略 + +Paddle2.0-rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + +其中`Adam`: + +* `learning_rate`: 全局学习率。默认为1e-4; +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证过程所用的数据集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='user_guided_colorization', load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(images='house.png', visualization=True, save_path='result') +``` + +参数配置正确后,请执行脚本`python predict.py`, 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。若想获取油画风着色效果,请下载参数文件[油画着色](https://paddlehub.bj.bcebos.com/dygraph/models/canvas_rc.pdparams) + +**Args** +* `images`:原始图像路径; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认为'result'。 diff --git a/demo/colorization/predict.py b/demo/colorization/predict.py index f24dab7995a223c7bb272ba29ef0e287704af6c5..ff705d8500aeb7cd56861ab1aaaab3c2db3e52f9 100644 --- a/demo/colorization/predict.py +++ b/demo/colorization/predict.py @@ -2,5 +2,5 @@ import paddle import paddlehub as hub if __name__ == '__main__': - model = hub.Module(name='user_guided_colorization', load_checkpoint='/PATH/TO/CHECKPOINT') + model = hub.Module(name='user_guided_colorization', load_checkpoint='/PATH/TO/CHECKPOINT', prob=0.01) result = model.predict(images='house.png') diff --git a/demo/colorization/train.py b/demo/colorization/train.py index 343aa11857bcc3f9edf9815e8a3e0c7132670d81..3808f52d7f4e23fb650692b868231ee828aa4927 100644 --- a/demo/colorization/train.py +++ b/demo/colorization/train.py @@ -6,12 +6,12 @@ from paddlehub.datasets import Canvas if __name__ == '__main__': - model = hub.Module(name='user_guided_colorization', classification=True, prob= 0.125) + model = hub.Module(name='user_guided_colorization', classification=True, prob=1) transform = T.Compose([T.Resize((256, 256), interpolation='NEAREST'), T.RandomPaddingCrop(crop_size=176), - T.RGB2LAB()]) + T.RGB2LAB()], to_rgb=True) color_set = Canvas(transform=transform, mode='train') optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) trainer = Trainer(model, optimizer, checkpoint_dir='img_colorization_ckpt') - trainer.train(color_set, epochs=101, batch_size=2, eval_dataset=color_set, log_interval=10, save_interval=10) + trainer.train(color_set, epochs=201, batch_size=25, eval_dataset=color_set, log_interval=10, save_interval=10) diff --git a/demo/image_classification/README.md b/demo/image_classification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5388a74b8cd97d24958242a308f0430124810e34 --- /dev/null +++ b/demo/image_classification/README.md @@ -0,0 +1,108 @@ +# PaddleHub 图像分类 + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用resnet50_vd_imagenet_ssld对[Flowers](../../docs/reference/dataset.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +import paddlehub.vision.transforms as T + +transforms = T.Compose([T.Resize((224, 224)), T.Normalize()]) +``` + +`transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import Flowers + +flowers = Flowers(transforms) + +flowers_validate = Flowers(transforms, mode='val') +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + +### Step3: 加载预训练模型 + +```python +module = hub.Module(name="resnet50_vd_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) +``` +* `name`: 选择预训练模型的名字。 +* `class_dim`: 设置最终输出分类类别。 + +PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 +目前部分模型还没有完全升级到2.0版本,敬请期待。 + +如果想尝试efficientnet模型,只需要更换Module中的`name`参数即可. +```python +# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 +module = hub.Module(name="efficientnetb7_imagenet") +``` + +### Step4: 选择优化策略和运行配置 + +```python +optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + +trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) +``` + +#### 优化策略 + +Paddle2.0-rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + +其中`Adam`: + +* `learning_rate`: 全局学习率。默认为1e-3; +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + + model = hub.Module(name='mobilenet_v2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint=/PATH/TO/CHECKPOINT) + result = model.predict('flower.jpg') +``` + +参数配置正确后,请执行脚本`python predict.py`, 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 diff --git a/demo/image_classification/flower.jpg b/demo/image_classification/flower.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f836a09aa1baedd3842e8d4445cc93157158a78a Binary files /dev/null and b/demo/image_classification/flower.jpg differ diff --git a/demo/image_classification/predict.py b/demo/image_classification/predict.py index b61dcfcd3c1682cd0e51af521fa6ece944b77042..380e7ba087d9de25f144241accb5bc4377752062 100644 --- a/demo/image_classification/predict.py +++ b/demo/image_classification/predict.py @@ -2,8 +2,5 @@ import paddle import paddlehub as hub if __name__ == '__main__': - - model = hub.Module(name='mobilenet_v2_imagenet', class_dim=5) - state_dict = paddle.load('img_classification_ckpt') - model.set_dict(state_dict) - result = model.predict('flower.jpg') + model = hub.Module(name='resnet50_vd_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint=None) + result, feature = model.predict('flower.jpg') \ No newline at end of file diff --git a/demo/image_classification/train.py b/demo/image_classification/train.py index 35cd15169f4cea7e40a4e84735f35aba00ae3697..6da4ecb8ce3bbf3387c35717b8d9f2a198ffc4c5 100644 --- a/demo/image_classification/train.py +++ b/demo/image_classification/train.py @@ -6,13 +6,9 @@ from paddlehub.datasets import Flowers if __name__ == '__main__': transforms = T.Compose([T.Resize((224, 224)), T.Normalize()]) - flowers = Flowers(transforms) flowers_validate = Flowers(transforms, mode='val') - - model = hub.Module(name='mobilenet_v2_imagenet', class_dim=flowers.num_classes) - + model = hub.Module(name='resnet50_vd_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint=None) optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') - - trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=10) \ No newline at end of file diff --git a/demo/style_transfer/README.md b/demo/style_transfer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..70ab1017301b27b9f3a44e3b17b68554c705c490 --- /dev/null +++ b/demo/style_transfer/README.md @@ -0,0 +1,107 @@ +# PaddleHub 图像风格迁移 + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用msgnet模型对[MiniCOCO](../../docs/reference/dataset.md#class-hubdatasetsMiniCOCO)等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +import paddlehub.vision.transforms as T + +transform = T.Compose([T.Resize((256, 256), interpolation='LINEAR')]) +``` + +`transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets.minicoco import MiniCOCO + +color_set = MiniCOCO(transform=transform, mode='train') + +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, 默认为`train`。 + +数据集的准备代码可以参考 [minicoco.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.MiniCOCO()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='msgnet', load_checkpoint=None) +``` +* `name`: 选择预训练模型的名字。 +* `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_style_ckpt') +trainer.train(styledata, epochs=101, batch_size=4, eval_dataset=styledata, log_interval=10, save_interval=10) +``` + +#### 优化策略 + +Paddle2.0-rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + +其中`PolynomialDecay`: + +* `learning_rate`: 初始学习率,数据类型为Python float; +* `power`: 多项式的幂,默认值为1.0; +* `decay_steps`: 衰减步数。必须是正整数,该参数确定衰减周期。 + +其中`Adam`: + +* `learning_rate`: 全局学习率。默认为1e-4; +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='msgnet', load_checkpoint=/PATH/TO/CHECKPOINT) + result = model.predict(origin="venice-boat.jpg", style="candy.jpg", visualization=True, save_path ='result') +``` + +参数配置正确后,请执行脚本`python predict.py`, 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + +**Args** +* `origin`:原始图像路径; +* `style`: 风格图像路径; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认为'result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 diff --git a/docs/reference/vision.md b/docs/reference/vision.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a6dda422c1670cafcbe454a8f8f557b96ec13065 100644 --- a/docs/reference/vision.md +++ b/docs/reference/vision.md @@ -0,0 +1,239 @@ +# Class `hub.vision.transforms.Compose` + +```python +hub.vision.transforms.Compose( + transforms: Callable, + to_rgb: bool = False) +``` + +Compose preprocessing operators for obtaining prepocessed data. The shape of input image for all operations is [H, W, C], where H is the image height, W is the image width, and C is the number of image channels. + +**Args** +* transforms(callmethod) : The method of preprocess images. +* to_rgb(bool): Whether to transform the input from BGR mode to RGB mode, default is False. + + +# Class `hub.vision.transforms.RandomHorizontalFlip` + +```python +hub.vision.transforms.RandomHorizontalFlip(prob: float = 0.5) +``` + +Randomly flip the image horizontally according to given probability. + +**Args** + +* prob(float): The probability for flipping the image horizontally, default is 0.5. + + +# Class `hub.vision.transforms.RandomVerticalFlip` + +```python +hub.vision.transforms.RandomVerticalFlip( + prob: float = 0.5) +``` + +Randomly flip the image vertically according to given probability. + +**Args** + +* prob(float): The probability for flipping the image vertically, default is 0.5. + + +# Class `hub.vision.transforms.Resize` + +```python +hub.vision.transforms.Resize( + target_size: Union[List[int], int], + interpolation: str = 'LINEAR') +``` + +Resize input image to target size. + +**Args** + +* target_size(List[int]|int]): Target image size. +* interpolation(str): Interpolation mode, default is 'LINEAR'. It support 6 modes: 'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4' and 'RANDOM'. + + +# Class `hub.vision.transforms.ResizeByLong` + +```python +hub.vision.transforms.ResizeByLong(long_size: int) +``` + +Resize the long side of the input image to the target size. + +**Args** + +* long_size(int|list[int]): The target size of long side. + + +# Class `hub.vision.transforms.ResizeRangeScaling` + +```python +hub.vision.transforms.ResizeRangeScaling( + min_value: int = 400, + max_value: int = 600) +``` + +Randomly select a targeted size to resize the image according to given range. + +**Args** + +* min_value(int): The minimum value for targeted size. +* max_value(int): The maximum value for targeted size. + + +# Class `hub.vision.transforms.ResizeStepScaling` + +```python +hub.vision.transforms.ResizeStepScaling( + min_scale_factor: float = 0.75, + max_scale_factor: float = 1.25, + scale_step_size: float = 0.25) +``` + +Randomly select a scale factor to resize the image according to given range. + +**Args** + +* min_scale_factor(float): The minimum scale factor for targeted scale. +* max_scale_factor(float): The maximum scale factor for targeted scale. +* scale_step_size(float): Scale interval. + + +# Class `hub.vision.transforms.Normalize` + +```python +hub.vision.transforms.Normalize( + mean: list = [0.5, 0.5, 0.5], + std: list =[0.5, 0.5, 0.5]) +``` + +Normalize the input image. + +**Args** + +* mean(list): Mean value for normalization. +* std(list): Standard deviation for normalization. + + +# Class `hub.vision.transforms.Padding` + + ```python + hub.vision.transforms.Padding( + target_size: Union[List[int], Tuple[int], int], + im_padding_value: list = [127.5, 127.5, 127.5]) + ``` + + Padding input into targeted size according to specific padding value. + + **Args** + +* target_size(Union[List[int], Tuple[int], int]): Targeted image size. +* im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + + +# Class `hub.vision.transforms.RandomPaddingCrop` + + ```python + hub.vision.transforms.RandomPaddingCrop( + crop_size(Union[List[int], Tuple[int], int]), + im_padding_value: list = [127.5, 127.5, 127.5]) + ``` + + Padding input image if crop size is greater than image size. Otherwise, crop the input image to given size. + + **Args** + +* crop_size(Union[List[int], Tuple[int], int]): Targeted image size. +* im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + + +# Class `hub.vision.transforms.RandomBlur` + + ```python + hub.vision.transforms.RandomBlur(prob: float = 0.1) + ``` + + Random blur input image by Gaussian filter according to given probability. + + **Args** + +* prob(float): The probability to blur the image, default is 0.1. + + +# Class `hub.vision.transforms.RandomRotation` + + ```python + hub.vision.transforms.RandomRotation( + max_rotation: float = 15., + im_padding_value: list = [127.5, 127.5, 127.5]) + ``` + + Rotate the input image at random angle. The angle will not exceed to max_rotation. + + **Args** + +* max_rotation(float): Upper bound of rotation angle. +* im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + + +# Class `hub.vision.transforms.RandomDistort` + + ```python + hub.vision.transforms.RandomDistort( + brightness_range: float = 0.5, + brightness_prob: float = 0.5, + contrast_range: float = 0.5, + contrast_prob: float = 0.5, + saturation_range: float = 0.5, + saturation_prob: float = 0.5, + hue_range: float= 18., + hue_prob: float= 0.5) + ``` + + Random adjust brightness, contrast, saturation and hue according to the given random range and probability, respectively. + + **Args** + +* brightness_range(float): Boundary of brightness. +* brightness_prob(float): Probability for disturb the brightness of image. +* contrast_range(float): Boundary of contrast. +* contrast_prob(float): Probability for disturb the contrast of image. +* saturation_range(float): Boundary of saturation. +* saturation_prob(float): Probability for disturb the saturation of image. +* hue_range(float): Boundary of hue. +* hue_prob(float): Probability for disturb the hue of image. + + +# Class `hub.vision.transforms.RGB2LAB` + + ```python + hub.vision.transforms.RGB2LAB() + ``` + + Convert color space from RGB to LAB. + + +# Class `hub.vision.transforms.LAB2RGB` + + ```python + hub.vision.transforms.LAB2RGB() + ``` + + Convert color space from LAB to RGB. + + +# Class `hub.vision.transforms.CenterCrop` + + ```python + hub.vision.transforms.CenterCrop(crop_size: int) + ``` + + Crop the middle part of the image to the specified size. + + **Args** + +* crop_size(int): Target size for croped image. \ No newline at end of file diff --git a/docs/tutorial/how_to_load_data.md b/docs/tutorial/how_to_load_data.md index 0067e1d8aff68f5f66625efdae3c1b0ef3f40678..712006b68faeeea38fd3b6d6bbeb351f36b31cfe 100644 --- a/docs/tutorial/how_to_load_data.md +++ b/docs/tutorial/how_to_load_data.md @@ -2,9 +2,177 @@ 训练一个新任务时,如果从零开始训练时,这将是一个耗时的过程,并且效果可能达不到理想的效果,此时您可以利用PaddleHub提供的预训练模型进行具体任务的Fine-tune。您只需要对自定义数据进行相应的预处理,随后输入预训练模型中,即可得到相应的结果。请参考如下内容设置数据集的结构。 - ## 一、图像分类数据集 +利用PaddleHub迁移分类任务使用自定义数据时,需要切分数据集,将数据集切分为训练集、验证集和测试集。 + +### 数据准备 + +需要三个文本文件来记录对应的图片路径和标签,此外还需要一个标签文件用于记录标签的名称。 +``` +├─data: 数据目录 +  ├─train_list.txt:训练集数据列表 +  ├─test_list.txt:测试集数据列表 +  ├─validate_list.txt:验证集数据列表 + ├─label_list.txt:标签列表 +  └─…… +``` +训练/验证/测试集的数据列表文件的格式如下 +``` +图片1路径 图片1标签 +图片2路径 图片2标签 +... +``` +label_list.txt的格式如下 +``` +分类1名称 +分类2名称 +... +``` + +示例: +以[Flower数据集](../reference/dataset.md)为示例,train_list.txt/test_list.txt/validate_list.txt内容如下示例 +``` +roses/8050213579_48e1e7109f.jpg 0 +sunflowers/45045003_30bbd0a142_m.jpg 3 +daisy/3415180846_d7b5cced14_m.jpg 2 +``` + +label_list.txt内容如下: +``` +roses +tulips +daisy +sunflowers +dandelion +``` + +### 数据集加载 + +数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。具体使用如下: + +```python +from paddlehub.datasets import Flowers + +flowers = Flowers(transforms) + +flowers_validate = Flowers(transforms, mode='val') +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + ## 二、图像着色数据集 -## 三、风格迁移数据集 +利用PaddleHub迁移着色任务使用自定义数据时,需要切分数据集,将数据集切分为训练集和测试集。 + +### 数据准备 + +需要将准备用于着色训练和测试的彩色图像分成训练集数据和测试集数据。 +``` +├─data: 数据目录 +  ├─train:训练集数据 + |-图片文件夹1 + |-图片文件夹2 + |-…… + |-图片1 + |-图片2 + |-…… + +  ├─test:测试集数据 + |-图片文件夹1 + |-图片文件夹2 + |-…… + |-图片1 + |-图片2 + |-…… +  └─…… +``` + +示例: +PaddleHub为用户提供了用于着色的数据集`Canvas数据集`, 它由1193张莫奈风格和400张梵高风格的图像组成,以[Canvas数据集](../reference/dataset.md)为示例,train文件夹内容如下: + +``` +├─train:训练集数据 + |-monet + |-图片1 + |-图片2 + |-…… + |-vango + |-图片1 + |-图片2 + |-…… +``` + +### 数据集加载 + +数据集的准备代码可以参考 [canvas.py](../../paddlehub/datasets/canvas.py)。`hub.datasets.Canvas()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。具体使用如下: + +```python +from paddlehub.datasets import Canvas + +color_set = Canvas(transforms, mode='train') +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, 默认为`train`。 + +## 二、风格迁移数据集 + +利用PaddleHub进行风格迁移任务使用自定义数据时,需要切分数据集,将数据集切分为训练集和测试集。 + +### 数据准备 + +需要将准备用于风格迁移训练和测试的彩色图像分成训练集数据和测试集数据。 + +``` +├─data: 数据目录 +  ├─train:训练集数据 + |-图片文件夹1 + |-图片文件夹2 + |-…… + |-图片1 + |-图片2 + |-…… + +  ├─test:测试集数据 + |-图片文件夹1 + |-图片文件夹2 + |-…… + |-图片1 + |-图片2 + |-…… + |- 21styles + |-图片1 + |-图片2 +  └─…… +``` + +示例: +PaddleHub为用户提供了用于风格迁移的数据集`Minicoco数据集`, 训练集数据和测试集数据来源于COCO2014, 其中训练集有2001张图片,测试集有200张图片。 `21styles`文件夹下存放着21张不同风格的图片,用户可以根据自己的需求更换不同风格的图片。以[Minicoco数据集](../reference/dataset.md)为示例,train文件夹内容如下: + +``` +├─train:训练集数据 + |-train + |-图片1 + |-图片2 + |-…… + |-test + |-图片1 + |-图片2 + |-…… + |-21styles + |-图片1 + |-图片2 + |-…… +``` + +### 数据集加载 + +数据集的准备代码可以参考 [minicoco.py](../../paddlehub/datasets/minicoco.py)。`hub.datasets.Minicoco()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。具体使用如下: + +```python +from paddlehub.datasets import MiniCOCO + +ccolor_set = MiniCOCO(transforms, mode='train') +``` +* `transforms`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, 默认为`train`。 diff --git a/paddlehub/module/cv_module.py b/paddlehub/module/cv_module.py index 05d14a61b28d81c2c1f434fc348af1381a262618..ad73f15aaa35b5b7197ef6e57173942e35a2fac0 100644 --- a/paddlehub/module/cv_module.py +++ b/paddlehub/module/cv_module.py @@ -69,7 +69,8 @@ class ImageClassifierModule(RunModule, ImageServing): images = batch[0] labels = paddle.unsqueeze(batch[1], axis=-1) - preds = self(images) + preds, feature = self(images) + loss, _ = F.softmax_with_cross_entropy(preds, labels, return_softmax=True, axis=1) loss = paddle.mean(loss) acc = paddle.metric.accuracy(preds, labels) @@ -89,10 +90,11 @@ class ImageClassifierModule(RunModule, ImageServing): images = self.transforms(images) if len(images.shape) == 3: images = images[np.newaxis, :] - preds = self(paddle.to_tensor(images)) + preds, feature = self(paddle.to_tensor(images)) preds = F.softmax(preds, axis=1).numpy() pred_idxs = np.argsort(preds)[::-1][:, :top_k] res = [] + for i, pred in enumerate(pred_idxs): res_dict = {} for k in pred: diff --git a/paddlehub/vision/functional.py b/paddlehub/vision/functional.py index 09d65c8c557dd975a11e8232db54bc4cf78b2368..69e79677f2e270c364b925c78c909e59fa343f73 100644 --- a/paddlehub/vision/functional.py +++ b/paddlehub/vision/functional.py @@ -73,7 +73,7 @@ def resize_long(im: np.ndarray, long_size: int, interpolation: int = cv2.INTER_L Args: im(np.ndarray): Input image. - target_size(int|list[int]): The target size of long side. + long_size(int|list[int]): The target size of long side. interpolation(int): Interpolation method. Default to cv2.INTER_LINEAR. ''' value = max(im.shape[0], im.shape[1]) diff --git a/paddlehub/vision/transforms.py b/paddlehub/vision/transforms.py index bc6266cb64e6458318b898d118081c90987de7fd..91f0e305e5ec09b4ad0a354c4e8f6baa4dd364fa 100644 --- a/paddlehub/vision/transforms.py +++ b/paddlehub/vision/transforms.py @@ -14,7 +14,7 @@ # limitations under the License. import random -from typing import Callable +from typing import Callable, Union, List, Tuple import cv2 import PIL @@ -23,7 +23,14 @@ import paddlehub.vision.functional as F class Compose: - def __init__(self, transforms, to_rgb=False): + """ + Compose preprocessing operators for obtaining prepocessed data. The shape of input image for all operations is [H, W, C], where H is the image height, W is the image width, and C is the number of image channels. + + Args: + transforms(callmethod) : The method of preprocess images. + to_rgb(bool): Whether to transform the input from BGR mode to RGB mode, default is False. + """ + def __init__(self, transforms: Callable, to_rgb: bool = False): if not isinstance(transforms, list): raise TypeError('The transforms must be a list!') if len(transforms) < 1: @@ -32,7 +39,7 @@ class Compose: self.transforms = transforms self.to_rgb = to_rgb - def __call__(self, im): + def __call__(self, im: Union[np.ndarray, str]): if isinstance(im, str): im = cv2.imread(im).astype('float32') @@ -50,26 +57,45 @@ class Compose: class RandomHorizontalFlip: - def __init__(self, prob=0.5): + """ + Randomly flip the image horizontally according to given probability. + + Args: + prob(float): The probability for flipping the image horizontally, default is 0.5. + """ + def __init__(self, prob: float = 0.5): self.prob = prob - def __call__(self, im): + def __call__(self, im: np.ndarray): if random.random() < self.prob: im = F.horizontal_flip(im) return im class RandomVerticalFlip: - def __init__(self, prob=0.5): + """ + Randomly flip the image vertically according to given probability. + + Args: + prob(float): The probability for flipping the image vertically, default is 0.5. + """ + def __init__(self, prob: float = 0.5): self.prob = prob - def __call__(self, im): + def __call__(self, im: np.ndarray): if random.random() < self.prob: im = F.vertical_flip(im) return im class Resize: + """ + Resize input image to target size. + + Args: + target_size(List[int]|int]): Target image size. + interpolation(str): Interpolation mode, default is 'LINEAR'. It support 6 modes: 'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4' and 'RANDOM'. + """ # The interpolation mode interpolation_dict = { 'NEAREST': cv2.INTER_NEAREST, @@ -79,7 +105,7 @@ class Resize: 'LANCZOS4': cv2.INTER_LANCZOS4 } - def __init__(self, target_size, interpolation='LINEAR'): + def __init__(self, target_size: Union[List[int], int], interpolation: str = 'LINEAR'): self.interpolation = interpolation if not (interpolation == "RANDOM" or interpolation in self.interpolation_dict): raise ValueError("interpolation should be one of {}".format(self.interpolation_dict.keys())) @@ -93,7 +119,7 @@ class Resize: self.target_size = target_size - def __call__(self, im): + def __call__(self, im: np.ndarray): if self.interpolation == "RANDOM": interpolation = random.choice(list(self.interpolation_dict.keys())) else: @@ -103,7 +129,13 @@ class Resize: class ResizeByLong: - def __init__(self, long_size): + """ + Resize the long side of the input image to the target size. + + Args: + long_size(int|list[int]): The target size of long side. + """ + def __init__(self, long_size: Union[List[int], int]): self.long_size = long_size def __call__(self, im): @@ -112,14 +144,21 @@ class ResizeByLong: class ResizeRangeScaling: - def __init__(self, min_value=400, max_value=600): + """ + Randomly select a targeted size to resize the image according to given range. + + Args: + min_value(int): The minimum value for targeted size. + max_value(int): The maximum value for targeted size. + """ + def __init__(self, min_value: int = 400, max_value: int = 600): if min_value > max_value: raise ValueError('min_value must be less than max_value, ' 'but they are {} and {}.'.format(min_value, max_value)) self.min_value = min_value self.max_value = max_value - def __call__(self, im): + def __call__(self, im: np.ndarray): if self.min_value == self.max_value: random_size = self.max_value else: @@ -129,7 +168,16 @@ class ResizeRangeScaling: class ResizeStepScaling: - def __init__(self, min_scale_factor=0.75, max_scale_factor=1.25, scale_step_size=0.25): + """ + Randomly select a scale factor to resize the image according to given range. + + Args: + min_scale_factor(float): The minimum scale factor for targeted scale. + max_scale_factor(float): The maximum scale factor for targeted scale. + scale_step_size(float): Scale interval. + + """ + def __init__(self, min_scale_factor: float = 0.75, max_scale_factor: float = 1.25, scale_step_size: float = 0.25): if min_scale_factor > max_scale_factor: raise ValueError('min_scale_factor must be less than max_scale_factor, ' 'but they are {} and {}.'.format(min_scale_factor, max_scale_factor)) @@ -137,7 +185,7 @@ class ResizeStepScaling: self.max_scale_factor = max_scale_factor self.scale_step_size = scale_step_size - def __call__(self, im): + def __call__(self, im: np.ndarray): if self.min_scale_factor == self.max_scale_factor: scale_factor = self.min_scale_factor @@ -157,7 +205,14 @@ class ResizeStepScaling: class Normalize: - def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): + """ + Normalize the input image. + + Args: + mean(list): Mean value for normalization. + std(list): Standard deviation for normalization. + """ + def __init__(self, mean: list = [0.5, 0.5, 0.5], std: list = [0.5, 0.5, 0.5]): self.mean = mean self.std = std if not (isinstance(self.mean, list) and isinstance(self.std, list)): @@ -174,7 +229,14 @@ class Normalize: class Padding: - def __init__(self, target_size, im_padding_value=[127.5, 127.5, 127.5]): + """ + Padding input into targeted size according to specific padding value. + + Args: + target_size(Union[List[int], Tuple[int], int]): Targeted image size. + im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + """ + def __init__(self, target_size: Union[List[int], Tuple[int], int], im_padding_value: list = [127.5, 127.5, 127.5]): if isinstance(target_size, list) or isinstance(target_size, tuple): if len(target_size) != 2: raise ValueError( @@ -185,7 +247,7 @@ class Padding: self.target_size = target_size self.im_padding_value = im_padding_value - def __call__(self, im): + def __call__(self, im: np.ndarray): im_height, im_width = im.shape[0], im.shape[1] if isinstance(self.target_size, int): target_height = self.target_size @@ -206,6 +268,13 @@ class Padding: class RandomPaddingCrop: + """ + Padding input image if crop size is greater than image size. Otherwise, crop the input image to given size. + + Args: + crop_size(Union[List[int], Tuple[int], int]): Targeted image size. + im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + """ def __init__(self, crop_size, im_padding_value=[127.5, 127.5, 127.5]): if isinstance(crop_size, list) or isinstance(crop_size, tuple): if len(crop_size) != 2: @@ -247,10 +316,16 @@ class RandomPaddingCrop: class RandomBlur: - def __init__(self, prob=0.1): + """ + Random blur input image by Gaussian filter according to given probability. + + Args: + prob(float): The probability to blur the image, default is 0.1. + """ + def __init__(self, prob: float = 0.1): self.prob = prob - def __call__(self, im): + def __call__(self, im: np.ndarray): if self.prob <= 0: n = 0 elif self.prob >= 1: @@ -270,7 +345,15 @@ class RandomBlur: class RandomRotation: - def __init__(self, max_rotation=15, im_padding_value=[127.5, 127.5, 127.5]): + """ + Rotate the input image at random angle. The angle will not exceed to max_rotation. + + Args: + + max_rotation(float): Upper bound of rotation angle. + im_padding_value(list): Border value for 3 channels, default is [127.5, 127.5, 127.5]. + """ + def __init__(self, max_rotation: float = 15, im_padding_value: list = [127.5, 127.5, 127.5]): self.max_rotation = max_rotation self.im_padding_value = im_padding_value @@ -301,47 +384,32 @@ class RandomRotation: return im -class RandomScaleAspect: - def __init__(self, min_scale=0.5, aspect_ratio=0.33): - self.min_scale = min_scale - self.aspect_ratio = aspect_ratio - def __call__(self, im): - if self.min_scale != 0 and self.aspect_ratio != 0: - img_height = im.shape[0] - img_width = im.shape[1] - for i in range(0, 10): - area = img_height * img_width - target_area = area * np.random.uniform(self.min_scale, 1.0) - aspectRatio = np.random.uniform(self.aspect_ratio, 1.0 / self.aspect_ratio) - - dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) - dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) - if (np.random.randint(10) < 5): - tmp = dw - dw = dh - dh = tmp - - if (dh < img_height and dw < img_width): - h1 = np.random.randint(0, img_height - dh) - w1 = np.random.randint(0, img_width - dw) - - im = im[h1:(h1 + dh), w1:(w1 + dw), :] - im = cv2.resize(im, (img_width, img_height), interpolation=cv2.INTER_LINEAR) - return im +class RandomDistort: + """ + Random adjust brightness, contrast, saturation and hue according to the given random range and probability, respectively. + Args: -class RandomDistort: + brightness_range(float): Boundary of brightness. + brightness_prob(float): Probability for disturb the brightness of image. + contrast_range(float): Boundary of contrast. + contrast_prob(float): Probability for disturb the contrast of image. + saturation_range(float): Boundary of saturation. + saturation_prob(float): Probability for disturb the saturation of image. + hue_range(float): Boundary of hue. + hue_prob(float): Probability for disturb the hue of image. + """ def __init__(self, - brightness_range=0.5, - brightness_prob=0.5, - contrast_range=0.5, - contrast_prob=0.5, - saturation_range=0.5, - saturation_prob=0.5, - hue_range=18, - hue_prob=0.5): + brightness_range: float = 0.5, + brightness_prob: float = 0.5, + contrast_range: float = 0.5, + contrast_prob: float = 0.5, + saturation_range: float = 0.5, + saturation_prob: float = 0.5, + hue_range: float = 18, + hue_prob: float = 0.5): self.brightness_range = brightness_range self.brightness_prob = brightness_prob self.contrast_range = contrast_range @@ -351,7 +419,7 @@ class RandomDistort: self.hue_range = hue_range self.hue_prob = hue_prob - def __call__(self, im): + def __call__(self, im: np.ndarray): brightness_lower = 1 - self.brightness_range brightness_upper = 1 + self.brightness_range contrast_lower = 1 - self.contrast_range @@ -360,7 +428,7 @@ class RandomDistort: saturation_upper = 1 + self.saturation_range hue_lower = -self.hue_range hue_upper = self.hue_range - ops = ['brightness', 'contrast', 'saturation', 'hue'] + ops = [F.brightness, F.contrast, F.saturation, F.hue] random.shuffle(ops) params_dict = { 'brightness': { @@ -539,27 +607,6 @@ class LAB2RGB: return self.lab2rgb(img) -class ColorPostprocess: - """ - Transform images from [0, 1] to [0, 255] - - Args: - type(type): Type of Image value. - - Return: - img(np.ndarray): Image in range of 0-255. - """ - - def __init__(self, type: type = np.uint8): - self.type = type - - def __call__(self, img: np.ndarray): - img = np.transpose(img, (1, 2, 0)) - img = np.clip(img, 0, 1) * 255 - img = img.astype(self.type) - return img - - class CenterCrop: """ Crop the middle part of the image to the specified size.